Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
W
webmagic
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
沈俊林
webmagic
Commits
81bb809d
Commit
81bb809d
authored
Nov 12, 2013
by
yihua.huang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update scripts
parent
7f26b844
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
225 additions
and
0 deletions
+225
-0
pom.xml
webmagic-scripts/pom.xml
+57
-0
ScriptConsole.java
...ain/java/us/codecraft/webmagic/scripts/ScriptConsole.java
+147
-0
log4j.xml
webmagic-scripts/src/main/resources/log4j.xml
+21
-0
No files found.
webmagic-scripts/pom.xml
View file @
81bb809d
...
@@ -18,6 +18,11 @@
...
@@ -18,6 +18,11 @@
<artifactId>
jruby
</artifactId>
<artifactId>
jruby
</artifactId>
<version>
1.7.6
</version>
<version>
1.7.6
</version>
</dependency>
</dependency>
<dependency>
<groupId>
commons-cli
</groupId>
<artifactId>
commons-cli
</artifactId>
<version>
1.2
</version>
</dependency>
<dependency>
<dependency>
<groupId>
junit
</groupId>
<groupId>
junit
</groupId>
<artifactId>
junit
</artifactId>
<artifactId>
junit
</artifactId>
...
@@ -30,5 +35,57 @@
...
@@ -30,5 +35,57 @@
</dependency>
</dependency>
</dependencies>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>
org.apache.maven.plugins
</groupId>
<artifactId>
maven-dependency-plugin
</artifactId>
<executions>
<execution>
<id>
copy-dependencies
</id>
<phase>
package
</phase>
<goals>
<goal>
copy-dependencies
</goal>
</goals>
<configuration>
<outputDirectory>
${project.build.directory}/lib
</outputDirectory>
<overWriteReleases>
false
</overWriteReleases>
<overWriteSnapshots>
false
</overWriteSnapshots>
<overWriteIfNewer>
true
</overWriteIfNewer>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>
maven-compiler-plugin
</artifactId>
<configuration>
<source>
1.6
</source>
<target>
1.6
</target>
<encoding>
UTF-8
</encoding>
</configuration>
</plugin>
<plugin>
<groupId>
org.apache.maven.plugins
</groupId>
<artifactId>
maven-resources-plugin
</artifactId>
<configuration>
<encoding>
UTF-8
</encoding>
</configuration>
</plugin>
<plugin>
<groupId>
org.apache.maven.plugins
</groupId>
<artifactId>
maven-jar-plugin
</artifactId>
<configuration>
<archive>
<manifest>
<addClasspath>
true
</addClasspath>
<classpathPrefix>
./lib/
</classpathPrefix>
<mainClass>
us.codecraft.webmagic.scripts.ScriptConsole
</mainClass>
</manifest>
</archive>
</configuration>
</plugin>
</plugins>
</build>
</project>
</project>
\ No newline at end of file
webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptConsole.java
0 → 100644
View file @
81bb809d
package
us
.
codecraft
.
webmagic
.
scripts
;
import
com.google.common.collect.Sets
;
import
org.apache.commons.cli.*
;
import
us.codecraft.webmagic.Spider
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Set
;
/**
* @author code4crafter@gmail.com
* @since 0.4.1
*/
public
class
ScriptConsole
{
private
static
class
Params
{
Language
language
=
Language
.
JavaScript
;
String
scriptFileName
;
List
<
String
>
urls
;
int
thread
=
1
;
int
sleepTime
=
1000
;
private
static
Map
<
Language
,
Set
<
String
>>
alias
=
new
HashMap
<
Language
,
Set
<
String
>>();
static
{
alias
.
put
(
Language
.
JavaScript
,
Sets
.<
String
>
newHashSet
(
"js"
,
"javascript"
,
"JavaScript"
,
"JS"
));
alias
.
put
(
Language
.
JRuby
,
Sets
.<
String
>
newHashSet
(
"ruby"
,
"jruby"
,
"Ruby"
,
"JRuby"
));
}
public
void
setLanguagefromArg
(
String
arg
)
{
for
(
Map
.
Entry
<
Language
,
Set
<
String
>>
languageSetEntry
:
alias
.
entrySet
())
{
if
(
languageSetEntry
.
getValue
().
contains
(
arg
))
{
this
.
language
=
languageSetEntry
.
getKey
();
return
;
}
}
}
private
Language
getLanguage
()
{
return
language
;
}
private
void
setLanguage
(
Language
language
)
{
this
.
language
=
language
;
}
private
String
getScriptFileName
()
{
return
scriptFileName
;
}
private
void
setScriptFileName
(
String
scriptFileName
)
{
this
.
scriptFileName
=
scriptFileName
;
}
private
List
<
String
>
getUrls
()
{
return
urls
;
}
private
void
setUrls
(
List
<
String
>
urls
)
{
this
.
urls
=
urls
;
}
private
int
getThread
()
{
return
thread
;
}
private
void
setThread
(
int
thread
)
{
this
.
thread
=
thread
;
}
private
int
getSleepTime
()
{
return
sleepTime
;
}
private
void
setSleepTime
(
int
sleepTime
)
{
this
.
sleepTime
=
sleepTime
;
}
}
public
static
void
main
(
String
[]
args
)
{
Params
params
=
parseCommand
(
args
);
startSpider
(
params
);
}
private
static
void
startSpider
(
Params
params
)
{
ScriptProcessor
pageProcessor
=
ScriptProcessorBuilder
.
custom
()
.
language
(
params
.
getLanguage
()).
scriptFromFile
(
params
.
getScriptFileName
()).
build
();
pageProcessor
.
getSite
().
setSleepTime
(
params
.
getSleepTime
());
Spider
spider
=
Spider
.
create
(
pageProcessor
).
thread
(
params
.
getThread
());
if
(
params
.
getUrls
()
==
null
||
params
.
getUrls
().
size
()
==
0
)
{
System
.
err
.
println
(
"Need at least one argument"
);
System
.
out
.
println
(
"Usage: java -jar webmagic.jar [-l language] -f script file [-t threadnum] [-s sleep time] url1 [url2 url3]"
);
System
.
exit
(-
1
);
}
for
(
String
url
:
params
.
getUrls
())
{
spider
.
addUrl
(
url
);
}
spider
.
run
();
}
private
static
Params
parseCommand
(
String
[]
args
)
{
try
{
Options
options
=
new
Options
();
options
.
addOption
(
new
Option
(
"l"
,
true
,
"language"
));
options
.
addOption
(
new
Option
(
"t"
,
true
,
"thread"
));
options
.
addOption
(
new
Option
(
"f"
,
true
,
"script file"
));
CommandLineParser
commandLineParser
=
new
PosixParser
();
CommandLine
commandLine
=
commandLineParser
.
parse
(
options
,
args
);
return
readOptions
(
commandLine
);
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
exit
();
return
null
;
}
}
private
static
void
exit
()
{
System
.
err
.
println
(
"Format error"
);
System
.
out
.
println
(
"Usage: java -jar webmagic.jar [-l language] -f script file [-t threadnum] [-s sleep time] url1 [url2 url3]"
);
System
.
exit
(-
1
);
}
private
static
Params
readOptions
(
CommandLine
commandLine
)
{
Params
params
=
new
Params
();
if
(
commandLine
.
hasOption
(
"l"
))
{
String
language
=
commandLine
.
getOptionValue
(
"l"
);
params
.
setLanguagefromArg
(
language
);
}
if
(
commandLine
.
hasOption
(
"f"
))
{
String
scriptFilename
=
commandLine
.
getOptionValue
(
"f"
);
params
.
setScriptFileName
(
scriptFilename
);
}
else
{
exit
();
}
if
(
commandLine
.
hasOption
(
"s"
))
{
Integer
sleepTime
=
Integer
.
parseInt
(
commandLine
.
getOptionValue
(
"s"
));
params
.
setSleepTime
(
sleepTime
);
}
if
(
commandLine
.
hasOption
(
"t"
))
{
Integer
thread
=
Integer
.
parseInt
(
commandLine
.
getOptionValue
(
"t"
));
params
.
setThread
(
thread
);
}
params
.
setUrls
(
commandLine
.
getArgList
());
return
params
;
}
}
webmagic-scripts/src/main/resources/log4j.xml
0 → 100644
View file @
81bb809d
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE log4j:configuration SYSTEM "log4j.dtd">
<log4j:configuration
xmlns:log4j=
"http://jakarta.apache.org/log4j/"
>
<appender
name=
"stdout"
class=
"org.apache.log4j.ConsoleAppender"
>
<layout
class=
"org.apache.log4j.PatternLayout"
>
<param
name=
"ConversionPattern"
value=
"%d{yy-MM-dd HH:mm:ss,SSS} %-5p %c(%F:%L) ## %m%n"
/>
</layout>
</appender>
<logger
name=
"org.apache"
additivity=
"false"
>
<level
value=
"warn"
/>
<appender-ref
ref=
"stdout"
/>
</logger>
<root>
<level
value=
"info"
/>
<appender-ref
ref=
"stdout"
/>
</root>
</log4j:configuration>
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment