Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
W
webmagic
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
沈俊林
webmagic
Commits
44794282
Commit
44794282
authored
Nov 12, 2013
by
yihua.huang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add multithread support
parent
b5f2498c
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
92 additions
and
19 deletions
+92
-19
deploy.sh
webmagic-scripts/deploy.sh
+1
-1
ScriptConsole.java
...ain/java/us/codecraft/webmagic/scripts/ScriptConsole.java
+29
-5
ScriptEnginePool.java
.../java/us/codecraft/webmagic/scripts/ScriptEnginePool.java
+39
-0
ScriptProcessor.java
...n/java/us/codecraft/webmagic/scripts/ScriptProcessor.java
+14
-11
ScriptProcessorBuilder.java
...us/codecraft/webmagic/scripts/ScriptProcessorBuilder.java
+8
-1
log4j.xml
webmagic-scripts/src/main/resources/log4j.xml
+1
-1
No files found.
webmagic-scripts/deploy.sh
View file @
44794282
#!/bin/sh
VERSION
=
"0.4.1-SNAP
THOS
"
VERSION
=
"0.4.1-SNAP
SHOT
"
mvn clean package
cp
target/webmagic-scripts-
${
VERSION
}
.jar /usr/local/webmagic/webmagic-console.jar
rsync
-avz
--delete
target/lib/ /usr/local/webmagic/lib/
webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptConsole.java
View file @
44794282
...
...
@@ -2,6 +2,8 @@ package us.codecraft.webmagic.scripts;
import
com.google.common.collect.Sets
;
import
org.apache.commons.cli.*
;
import
org.apache.log4j.Level
;
import
org.apache.log4j.Logger
;
import
us.codecraft.webmagic.Spider
;
import
java.util.HashMap
;
...
...
@@ -85,7 +87,7 @@ public class ScriptConsole {
private
static
void
startSpider
(
Params
params
)
{
ScriptProcessor
pageProcessor
=
ScriptProcessorBuilder
.
custom
()
.
language
(
params
.
getLanguage
()).
scriptFromFile
(
params
.
getScriptFileName
()).
build
();
.
language
(
params
.
getLanguage
()).
scriptFromFile
(
params
.
getScriptFileName
()).
thread
(
params
.
getThread
()).
build
();
pageProcessor
.
getSite
().
setSleepTime
(
params
.
getSleepTime
());
pageProcessor
.
getSite
().
setAcceptStatCode
(
Sets
.<
Integer
>
newHashSet
(
200
,
404
,
500
));
Spider
spider
=
Spider
.
create
(
pageProcessor
).
thread
(
params
.
getThread
());
...
...
@@ -100,13 +102,15 @@ public class ScriptConsole {
spider
.
run
();
}
private
static
Params
parseCommand
(
String
[]
args
)
{
try
{
Options
options
=
new
Options
();
options
.
addOption
(
new
Option
(
"l"
,
true
,
"language"
));
options
.
addOption
(
new
Option
(
"t"
,
true
,
"thread"
));
options
.
addOption
(
new
Option
(
"f"
,
true
,
"script file"
));
options
.
addOption
(
new
Option
(
"s"
,
true
,
"sleep time"
));
options
.
addOption
(
new
Option
(
"l"
,
"language"
,
true
,
"language"
));
options
.
addOption
(
new
Option
(
"t"
,
"thread"
,
true
,
"thread"
));
options
.
addOption
(
new
Option
(
"f"
,
"file"
,
true
,
"script file"
));
options
.
addOption
(
new
Option
(
"s"
,
"sleep"
,
true
,
"sleep time"
));
options
.
addOption
(
new
Option
(
"g"
,
"logger"
,
true
,
"sleep time"
));
CommandLineParser
commandLineParser
=
new
PosixParser
();
CommandLine
commandLine
=
commandLineParser
.
parse
(
options
,
args
);
return
readOptions
(
commandLine
);
...
...
@@ -143,7 +147,27 @@ public class ScriptConsole {
Integer
thread
=
Integer
.
parseInt
(
commandLine
.
getOptionValue
(
"t"
));
params
.
setThread
(
thread
);
}
if
(
commandLine
.
hasOption
(
"g"
))
{
configLogger
(
commandLine
.
getOptionValue
(
"g"
));
}
params
.
setUrls
(
commandLine
.
getArgList
());
return
params
;
}
private
static
void
configLogger
(
String
value
)
{
Logger
rootLogger
=
Logger
.
getRootLogger
();
if
(
"debug"
.
equalsIgnoreCase
(
value
))
{
rootLogger
.
setLevel
(
Level
.
DEBUG
);
}
else
if
(
"info"
.
equalsIgnoreCase
(
value
))
{
rootLogger
.
setLevel
(
Level
.
INFO
);
}
else
if
(
"warn"
.
equalsIgnoreCase
(
value
))
{
rootLogger
.
setLevel
(
Level
.
WARN
);
}
else
if
(
"trace"
.
equalsIgnoreCase
(
value
))
{
rootLogger
.
setLevel
(
Level
.
TRACE
);
}
else
if
(
"off"
.
equalsIgnoreCase
(
value
))
{
rootLogger
.
setLevel
(
Level
.
OFF
);
}
else
if
(
"error"
.
equalsIgnoreCase
(
value
))
{
rootLogger
.
setLevel
(
Level
.
ERROR
);
}
}
}
webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptEnginePool.java
0 → 100644
View file @
44794282
package
us
.
codecraft
.
webmagic
.
scripts
;
import
javax.script.ScriptEngine
;
import
javax.script.ScriptEngineManager
;
import
java.util.concurrent.LinkedBlockingQueue
;
import
java.util.concurrent.atomic.AtomicInteger
;
/**
* @author code4crafter@gmail.com
* @since 0.4.1
*/
public
class
ScriptEnginePool
{
private
final
int
size
;
private
final
AtomicInteger
availableCount
;
private
final
LinkedBlockingQueue
<
ScriptEngine
>
scriptEngines
=
new
LinkedBlockingQueue
<
ScriptEngine
>();
public
ScriptEnginePool
(
Language
language
,
int
size
)
{
this
.
size
=
size
;
this
.
availableCount
=
new
AtomicInteger
(
size
);
for
(
int
i
=
0
;
i
<
size
;
i
++){
ScriptEngineManager
manager
=
new
ScriptEngineManager
();
ScriptEngine
engine
=
manager
.
getEngineByName
(
language
.
getEngineName
());
scriptEngines
.
add
(
engine
);
}
}
public
ScriptEngine
getEngine
()
{
availableCount
.
decrementAndGet
();
return
scriptEngines
.
poll
();
}
public
void
release
(
ScriptEngine
scriptEngine
){
scriptEngines
.
add
(
scriptEngine
);
}
}
webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptProcessor.java
View file @
44794282
...
...
@@ -7,7 +7,6 @@ import us.codecraft.webmagic.processor.PageProcessor;
import
javax.script.ScriptContext
;
import
javax.script.ScriptEngine
;
import
javax.script.ScriptEngineManager
;
import
javax.script.ScriptException
;
import
java.io.IOException
;
import
java.io.InputStream
;
...
...
@@ -18,7 +17,7 @@ import java.io.InputStream;
*/
public
class
ScriptProcessor
implements
PageProcessor
{
private
ScriptEngine
engine
;
private
ScriptEngine
Pool
enginePool
;
private
String
defines
;
...
...
@@ -28,13 +27,12 @@ public class ScriptProcessor implements PageProcessor {
private
Site
site
=
Site
.
me
();
public
ScriptProcessor
(
Language
language
,
String
script
)
{
public
ScriptProcessor
(
Language
language
,
String
script
,
int
threadNum
)
{
if
(
language
==
null
||
script
==
null
)
{
throw
new
IllegalArgumentException
(
"language and script must not be null!"
);
}
this
.
language
=
language
;
ScriptEngineManager
manager
=
new
ScriptEngineManager
();
engine
=
manager
.
getEngineByName
(
language
.
getEngineName
());
enginePool
=
new
ScriptEnginePool
(
language
,
threadNum
);
InputStream
resourceAsStream
=
this
.
getClass
().
getClassLoader
().
getResourceAsStream
(
language
.
getDefineFile
());
try
{
defines
=
IOUtils
.
toString
(
resourceAsStream
);
...
...
@@ -46,6 +44,8 @@ public class ScriptProcessor implements PageProcessor {
@Override
public
void
process
(
Page
page
)
{
ScriptEngine
engine
=
enginePool
.
getEngine
();
try
{
ScriptContext
context
=
engine
.
getContext
();
context
.
setAttribute
(
"page"
,
page
,
ScriptContext
.
ENGINE_SCOPE
);
context
.
setAttribute
(
"config"
,
site
,
ScriptContext
.
ENGINE_SCOPE
);
...
...
@@ -67,6 +67,9 @@ public class ScriptProcessor implements PageProcessor {
}
catch
(
ScriptException
e
)
{
e
.
printStackTrace
();
}
}
finally
{
enginePool
.
release
(
engine
);
}
}
@Override
...
...
webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptProcessorBuilder.java
View file @
44794282
...
...
@@ -18,6 +18,8 @@ public class ScriptProcessorBuilder {
private
String
script
;
private
int
threadNum
=
1
;
private
ScriptProcessorBuilder
()
{
}
...
...
@@ -57,8 +59,13 @@ public class ScriptProcessorBuilder {
return
this
;
}
public
ScriptProcessorBuilder
thread
(
int
threadNum
)
{
this
.
threadNum
=
threadNum
;
return
this
;
}
public
ScriptProcessor
build
(){
return
new
ScriptProcessor
(
language
,
script
);
return
new
ScriptProcessor
(
language
,
script
,
threadNum
);
}
}
webmagic-scripts/src/main/resources/log4j.xml
View file @
44794282
...
...
@@ -9,7 +9,7 @@
</appender>
<logger
name=
"org.apache"
additivity=
"false"
>
<level
value=
"
warn
"
/>
<level
value=
"
error
"
/>
<appender-ref
ref=
"stdout"
/>
</logger>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment