Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
W
webmagic
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
沈俊林
webmagic
Commits
bc5c30de
Commit
bc5c30de
authored
Nov 12, 2013
by
yihua.huang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update scripts
parent
59f67b1e
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
253 additions
and
31 deletions
+253
-31
pom.xml
pom.xml
+1
-0
log4j.xml
webmagic-core/src/main/resources/log4j.xml
+0
-10
JsScriptProcessor.java
...a/us/codecraft/webmagic/javascript/JsScriptProcessor.java
+82
-0
RubyScriptProcessor.java
...java/us/codecraft/webmagic/jruby/RubyScriptProcessor.java
+3
-2
Language.java
...src/main/java/us/codecraft/webmagic/scripts/Language.java
+35
-0
ScriptProcessor.java
...n/java/us/codecraft/webmagic/scripts/ScriptProcessor.java
+22
-19
ScriptProcessorBuilder.java
...us/codecraft/webmagic/scripts/ScriptProcessorBuilder.java
+64
-0
ScriptProcessorTest.java
...va/us/codecraft/webmagic/scripts/ScriptProcessorTest.java
+25
-0
log4j.xml
webmagic-scripts/src/test/resouces/log4j.xml
+21
-0
No files found.
pom.xml
View file @
bc5c30de
...
...
@@ -48,6 +48,7 @@
<modules>
<module>
webmagic-core
</module>
<module>
webmagic-extension/
</module>
<module>
webmagic-scripts/
</module>
</modules>
<dependencyManagement>
...
...
webmagic-core/src/main/resources/log4j.xml
View file @
bc5c30de
...
...
@@ -8,21 +8,11 @@
</layout>
</appender>
<logger
name=
"org.springframework"
additivity=
"false"
>
<level
value=
"warn"
/>
<appender-ref
ref=
"stdout"
/>
</logger>
<logger
name=
"org.apache"
additivity=
"false"
>
<level
value=
"warn"
/>
<appender-ref
ref=
"stdout"
/>
</logger>
<logger
name=
"net.sf.ehcache"
additivity=
"false"
>
<level
value=
"warn"
/>
<appender-ref
ref=
"stdout"
/>
</logger>
<root>
<level
value=
"info"
/>
<appender-ref
ref=
"stdout"
/>
...
...
webmagic-scripts/src/main/java/us/codecraft/webmagic/javascript/JsScriptProcessor.java
0 → 100644
View file @
bc5c30de
package
us
.
codecraft
.
webmagic
.
javascript
;
import
org.apache.commons.io.IOUtils
;
import
us.codecraft.webmagic.Page
;
import
us.codecraft.webmagic.Site
;
import
us.codecraft.webmagic.Spider
;
import
us.codecraft.webmagic.processor.PageProcessor
;
import
us.codecraft.webmagic.scripts.ScriptProcessor
;
import
us.codecraft.webmagic.scripts.ScriptProcessorBuilder
;
import
javax.script.ScriptContext
;
import
javax.script.ScriptEngine
;
import
javax.script.ScriptEngineManager
;
import
javax.script.ScriptException
;
import
java.io.FileInputStream
;
import
java.io.IOException
;
import
java.io.InputStream
;
/**
* @author code4crafter@gmail.com
* @since 0.4.1
*/
public
class
JsScriptProcessor
implements
PageProcessor
{
private
ScriptEngine
engine
;
private
String
defines
;
private
String
script
;
JsScriptProcessor
(
String
script
)
throws
IOException
{
ScriptEngineManager
manager
=
new
ScriptEngineManager
();
engine
=
manager
.
getEngineByName
(
"javascript"
);
InputStream
resourceAsStream
=
this
.
getClass
().
getClassLoader
().
getResourceAsStream
(
"js/defines.js"
);
defines
=
IOUtils
.
toString
(
resourceAsStream
);
this
.
script
=
script
;
}
public
static
JsScriptProcessor
fromFile
(
String
fileName
)
{
try
{
InputStream
resourceAsStream
=
new
FileInputStream
(
fileName
);
String
script
=
IOUtils
.
toString
(
resourceAsStream
);
return
new
JsScriptProcessor
(
script
);
}
catch
(
IOException
e
)
{
//wrap IOException because I prefer a runtime exception...
throw
new
IllegalArgumentException
(
e
);
}
}
public
static
JsScriptProcessor
fromClassPathFile
(
String
fileName
)
{
try
{
InputStream
resourceAsStream
=
JsScriptProcessor
.
class
.
getClassLoader
().
getResourceAsStream
(
fileName
);
String
script
=
IOUtils
.
toString
(
resourceAsStream
);
return
new
JsScriptProcessor
(
script
);
}
catch
(
IOException
e
)
{
//wrap IOException because I prefer a runtime exception...
throw
new
IllegalArgumentException
(
e
);
}
}
@Override
public
void
process
(
Page
page
)
{
ScriptContext
context
=
engine
.
getContext
();
context
.
setAttribute
(
"page"
,
page
,
ScriptContext
.
ENGINE_SCOPE
);
try
{
engine
.
eval
(
defines
+
script
,
context
);
}
catch
(
ScriptException
e
)
{
e
.
printStackTrace
();
}
}
@Override
public
Site
getSite
()
{
return
Site
.
me
();
}
public
static
void
main
(
String
[]
args
)
{
ScriptProcessor
pageProcessor
=
ScriptProcessorBuilder
.
custom
().
scriptFromClassPathFile
(
"js/oschina.js"
).
build
();
Spider
.
create
(
pageProcessor
).
addUrl
(
"http://my.oschina.net/flashsword/blog"
).
run
();
}
}
webmagic-scripts/src/main/java/us/codecraft/webmagic/
processor
/RubyScriptProcessor.java
→
webmagic-scripts/src/main/java/us/codecraft/webmagic/
jruby
/RubyScriptProcessor.java
View file @
bc5c30de
package
us
.
codecraft
.
webmagic
.
processor
;
package
us
.
codecraft
.
webmagic
.
jruby
;
import
org.apache.commons.io.IOUtils
;
import
us.codecraft.webmagic.Page
;
import
us.codecraft.webmagic.Site
;
import
us.codecraft.webmagic.Spider
;
import
us.codecraft.webmagic.processor.PageProcessor
;
import
javax.script.ScriptContext
;
import
javax.script.ScriptEngine
;
...
...
@@ -15,7 +16,7 @@ import java.io.InputStream;
/**
* @author code4crafter@gmail.com
*/
public
class
RubyScriptProcessor
implements
PageProcessor
{
public
class
RubyScriptProcessor
implements
PageProcessor
{
private
ScriptEngine
rubyEngine
;
...
...
webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/Language.java
0 → 100644
View file @
bc5c30de
package
us
.
codecraft
.
webmagic
.
scripts
;
/**
* @author code4crafter@gmail.com
*/
public
enum
Language
{
JavaScript
(
"javascript"
,
"js/defines.js"
,
""
),
JRuby
(
"jruby"
,
"ruby/defines.rb"
,
""
);
private
String
engineName
;
private
String
defineFile
;
private
String
gatherFile
;
Language
(
String
engineName
,
String
defineFile
,
String
gatherFile
)
{
this
.
engineName
=
engineName
;
this
.
defineFile
=
defineFile
;
this
.
gatherFile
=
gatherFile
;
}
public
String
getEngineName
()
{
return
engineName
;
}
public
String
getDefineFile
()
{
return
defineFile
;
}
public
String
getGatherFile
()
{
return
gatherFile
;
}
}
webmagic-scripts/src/main/java/us/codecraft/webmagic/
processor/Js
ScriptProcessor.java
→
webmagic-scripts/src/main/java/us/codecraft/webmagic/
scripts/
ScriptProcessor.java
View file @
bc5c30de
package
us
.
codecraft
.
webmagic
.
processor
;
package
us
.
codecraft
.
webmagic
.
scripts
;
import
org.apache.commons.io.IOUtils
;
import
us.codecraft.webmagic.Page
;
import
us.codecraft.webmagic.Site
;
import
us.codecraft.webmagic.
Spide
r
;
import
us.codecraft.webmagic.
processor.PageProcesso
r
;
import
javax.script.ScriptContext
;
import
javax.script.ScriptEngine
;
...
...
@@ -14,48 +14,51 @@ import java.io.InputStream;
/**
* @author code4crafter@gmail.com
* @since 0.4.1
*/
public
class
JsScriptProcessor
implements
PageProcessor
{
public
class
ScriptProcessor
implements
PageProcessor
{
private
ScriptEngine
rubyE
ngine
;
private
ScriptEngine
e
ngine
;
private
String
defines
;
private
String
script
;
public
JsScriptProcessor
(
String
filename
){
private
final
Language
language
;
private
Site
site
=
Site
.
me
();
public
ScriptProcessor
(
Language
language
,
String
script
)
{
if
(
language
==
null
||
script
==
null
)
{
throw
new
IllegalArgumentException
(
"language and script must not be null!"
);
}
this
.
language
=
language
;
ScriptEngineManager
manager
=
new
ScriptEngineManager
();
rubyEngine
=
manager
.
getEngineByName
(
"javascript"
);
InputStream
resourceAsStream
=
this
.
getClass
().
getClassLoader
().
getResourceAsStream
(
"js/defines.js"
);
engine
=
manager
.
getEngineByName
(
language
.
getEngineName
()
);
InputStream
resourceAsStream
=
this
.
getClass
().
getClassLoader
().
getResourceAsStream
(
language
.
getDefineFile
()
);
try
{
defines
=
IOUtils
.
toString
(
resourceAsStream
);
resourceAsStream
=
this
.
getClass
().
getClassLoader
().
getResourceAsStream
(
filename
);
script
=
IOUtils
.
toString
(
resourceAsStream
);
}
catch
(
IOException
e
)
{
e
.
printStackTrace
(
);
throw
new
IllegalArgumentException
(
e
);
}
this
.
script
=
script
;
}
@Override
public
void
process
(
Page
page
)
{
ScriptContext
context
=
rubyE
ngine
.
getContext
();
ScriptContext
context
=
e
ngine
.
getContext
();
context
.
setAttribute
(
"page"
,
page
,
ScriptContext
.
ENGINE_SCOPE
);
context
.
setAttribute
(
"config"
,
site
,
ScriptContext
.
ENGINE_SCOPE
);
try
{
rubyEngine
.
eval
(
defines
+
script
,
context
);
engine
.
eval
(
defines
+
script
,
context
);
}
catch
(
ScriptException
e
)
{
e
.
printStackTrace
();
}
}
@Override
public
Site
getSite
()
{
return
Site
.
me
()
;
return
site
;
}
public
static
void
main
(
String
[]
args
)
{
Spider
.
create
(
new
JsScriptProcessor
(
"js/oschina.js"
)).
addUrl
(
"http://my.oschina.net/flashsword/blog"
).
run
();
}
}
webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptProcessorBuilder.java
0 → 100644
View file @
bc5c30de
package
us
.
codecraft
.
webmagic
.
scripts
;
import
org.apache.commons.io.IOUtils
;
import
java.io.FileInputStream
;
import
java.io.IOException
;
import
java.io.InputStream
;
/**
* @author code4crafter@gmail.com
* @since 0.4.1
*/
public
class
ScriptProcessorBuilder
{
private
static
final
Language
DefaultLanguage
=
Language
.
JavaScript
;
private
Language
language
=
DefaultLanguage
;
private
String
script
;
private
ScriptProcessorBuilder
()
{
}
public
static
ScriptProcessorBuilder
custom
()
{
return
new
ScriptProcessorBuilder
();
}
public
ScriptProcessorBuilder
language
(
Language
language
)
{
this
.
language
=
language
;
return
this
;
}
public
ScriptProcessorBuilder
scriptFromFile
(
String
fileName
)
{
try
{
InputStream
resourceAsStream
=
new
FileInputStream
(
fileName
);
this
.
script
=
IOUtils
.
toString
(
resourceAsStream
);
}
catch
(
IOException
e
)
{
//wrap IOException because I prefer a runtime exception...
throw
new
IllegalArgumentException
(
e
);
}
return
this
;
}
public
ScriptProcessorBuilder
scriptFromClassPathFile
(
String
fileName
)
{
try
{
InputStream
resourceAsStream
=
ScriptProcessor
.
class
.
getClassLoader
().
getResourceAsStream
(
fileName
);
this
.
script
=
IOUtils
.
toString
(
resourceAsStream
);
}
catch
(
IOException
e
)
{
//wrap IOException because I prefer a runtime exception...
throw
new
IllegalArgumentException
(
e
);
}
return
this
;
}
public
ScriptProcessorBuilder
script
(
String
script
)
{
this
.
script
=
script
;
return
this
;
}
public
ScriptProcessor
build
(){
return
new
ScriptProcessor
(
language
,
script
);
}
}
webmagic-scripts/src/test/java/us/codecraft/webmagic/scripts/ScriptProcessorTest.java
0 → 100644
View file @
bc5c30de
package
us
.
codecraft
.
webmagic
.
scripts
;
import
org.junit.Test
;
import
us.codecraft.webmagic.Spider
;
/**
* @author code4crafter@gmail.com
* @since 0.4.1
*/
public
class
ScriptProcessorTest
{
@Test
public
void
testJavaScriptProcessor
()
{
ScriptProcessor
pageProcessor
=
ScriptProcessorBuilder
.
custom
().
language
(
Language
.
JavaScript
).
scriptFromClassPathFile
(
"js/oschina.js"
).
build
();
pageProcessor
.
getSite
().
setSleepTime
(
0
);
Spider
.
create
(
pageProcessor
).
addUrl
(
"http://my.oschina.net/flashsword/blog"
).
setSpawnUrl
(
false
).
run
();
}
@Test
public
void
testRubyProcessor
()
{
ScriptProcessor
pageProcessor
=
ScriptProcessorBuilder
.
custom
().
language
(
Language
.
JRuby
).
scriptFromClassPathFile
(
"ruby/oschina.rb"
).
build
();
pageProcessor
.
getSite
().
setSleepTime
(
0
);
Spider
.
create
(
pageProcessor
).
addUrl
(
"http://my.oschina.net/flashsword/blog"
).
setSpawnUrl
(
false
).
run
();
}
}
webmagic-scripts/src/test/resouces/log4j.xml
0 → 100644
View file @
bc5c30de
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE log4j:configuration SYSTEM "log4j.dtd">
<log4j:configuration
xmlns:log4j=
"http://jakarta.apache.org/log4j/"
>
<appender
name=
"stdout"
class=
"org.apache.log4j.ConsoleAppender"
>
<layout
class=
"org.apache.log4j.PatternLayout"
>
<param
name=
"ConversionPattern"
value=
"%d{yy-MM-dd HH:mm:ss,SSS} %-5p %c(%F:%L) ## %m%n"
/>
</layout>
</appender>
<logger
name=
"org.apache"
additivity=
"false"
>
<level
value=
"warn"
/>
<appender-ref
ref=
"stdout"
/>
</logger>
<root>
<level
value=
"debug"
/>
<appender-ref
ref=
"stdout"
/>
</root>
</log4j:configuration>
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment