Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
W
webmagic
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
沈俊林
webmagic
Commits
b1f023ea
Commit
b1f023ea
authored
Jun 19, 2013
by
yihua.huang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix spell error=.=
parent
7bed01c9
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
8 additions
and
8 deletions
+8
-8
Spider.java
...agic-core/src/main/java/us/codecraft/webmagic/Spider.java
+1
-1
FileCacheQueueScheduler.java
...codecraft/webmagic/schedular/FileCacheQueueScheduler.java
+2
-2
SpiderTest.java
...mples/src/test/java/us/codecraft/webmagic/SpiderTest.java
+2
-2
DiandianProcessorTest.java
...s/codecraft/webmagic/processor/DiandianProcessorTest.java
+1
-1
DiaoyuwengProcessorTest.java
...codecraft/webmagic/processor/DiaoyuwengProcessorTest.java
+1
-1
SinablogProcessorTest.java
...s/codecraft/webmagic/processor/SinablogProcessorTest.java
+1
-1
No files found.
webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java
View file @
b1f023ea
...
@@ -66,7 +66,7 @@ public class Spider implements Runnable, Task {
...
@@ -66,7 +66,7 @@ public class Spider implements Runnable, Task {
return
this
;
return
this
;
}
}
public
Spider
schedul
a
r
(
Scheduler
scheduler
)
{
public
Spider
schedul
e
r
(
Scheduler
scheduler
)
{
this
.
scheduler
=
scheduler
;
this
.
scheduler
=
scheduler
;
return
this
;
return
this
;
}
}
...
...
webmagic-core/src/main/java/us/codecraft/webmagic/schedular/FileCacheQueueScheduler.java
View file @
b1f023ea
...
@@ -63,7 +63,7 @@ public class FileCacheQueueScheduler implements Scheduler {
...
@@ -63,7 +63,7 @@ public class FileCacheQueueScheduler implements Scheduler {
initWriter
();
initWriter
();
initFlushThread
();
initFlushThread
();
inited
.
set
(
true
);
inited
.
set
(
true
);
logger
.
info
(
"init cache schedul
a
r success"
);
logger
.
info
(
"init cache schedul
e
r success"
);
}
}
private
void
initFlushThread
()
{
private
void
initFlushThread
()
{
...
@@ -80,7 +80,7 @@ public class FileCacheQueueScheduler implements Scheduler {
...
@@ -80,7 +80,7 @@ public class FileCacheQueueScheduler implements Scheduler {
fileUrlWriter
=
new
PrintWriter
(
new
FileWriter
(
getFileName
(
fileUrlAllName
),
true
));
fileUrlWriter
=
new
PrintWriter
(
new
FileWriter
(
getFileName
(
fileUrlAllName
),
true
));
fileCursorWriter
=
new
PrintWriter
(
new
FileWriter
(
getFileName
(
fileCursor
),
false
));
fileCursorWriter
=
new
PrintWriter
(
new
FileWriter
(
getFileName
(
fileCursor
),
false
));
}
catch
(
IOException
e
)
{
}
catch
(
IOException
e
)
{
throw
new
RuntimeException
(
"init cache schedul
a
r error"
,
e
);
throw
new
RuntimeException
(
"init cache schedul
e
r error"
,
e
);
}
}
}
}
...
...
webmagic-samples/src/test/java/us/codecraft/webmagic/SpiderTest.java
View file @
b1f023ea
...
@@ -26,12 +26,12 @@ public class SpiderTest {
...
@@ -26,12 +26,12 @@ public class SpiderTest {
@Test
@Test
public
void
testGlobalSpider
(){
public
void
testGlobalSpider
(){
// PageProcessor pageProcessor = new MeicanProcessor();
// PageProcessor pageProcessor = new MeicanProcessor();
// Spider.me().pipeline(new FilePipeline()).schedul
a
r(new FileCacheQueueScheduler(pageProcessor.getSite(),"/data/temp/webmagic/cache/")).
// Spider.me().pipeline(new FilePipeline()).schedul
e
r(new FileCacheQueueScheduler(pageProcessor.getSite(),"/data/temp/webmagic/cache/")).
// processor(pageProcessor).run();
// processor(pageProcessor).run();
SimplePageProcessor
pageProcessor2
=
new
SimplePageProcessor
(
"http://www.diaoyuweng.com/home.php?mod=space&uid=88304&do=thread&view=me&type=thread&from=space"
,
"http://www.diaoyuweng.com/thread-*-1-1.html"
);
SimplePageProcessor
pageProcessor2
=
new
SimplePageProcessor
(
"http://www.diaoyuweng.com/home.php?mod=space&uid=88304&do=thread&view=me&type=thread&from=space"
,
"http://www.diaoyuweng.com/thread-*-1-1.html"
);
System
.
out
.
println
(
pageProcessor2
.
getSite
().
getEncoding
());
System
.
out
.
println
(
pageProcessor2
.
getSite
().
getEncoding
());
pageProcessor2
.
getSite
().
setSleepTime
(
500
);
pageProcessor2
.
getSite
().
setSleepTime
(
500
);
Spider
.
create
(
pageProcessor2
).
pipeline
(
new
FilePipeline
()).
schedul
a
r
(
new
FileCacheQueueScheduler
(
"/data/temp/webmagic/cache/"
)).
Spider
.
create
(
pageProcessor2
).
pipeline
(
new
FilePipeline
()).
schedul
e
r
(
new
FileCacheQueueScheduler
(
"/data/temp/webmagic/cache/"
)).
run
();
run
();
...
...
webmagic-samples/src/test/java/us/codecraft/webmagic/processor/DiandianProcessorTest.java
View file @
b1f023ea
...
@@ -30,7 +30,7 @@ public class DiandianProcessorTest {
...
@@ -30,7 +30,7 @@ public class DiandianProcessorTest {
//ConsolePipeline输出结果到控制台
//ConsolePipeline输出结果到控制台
//FileCacheQueueSchedular保存url,支持断点续传,临时文件输出到/data/temp/webmagic/cache目录
//FileCacheQueueSchedular保存url,支持断点续传,临时文件输出到/data/temp/webmagic/cache目录
//Spider.run()执行
//Spider.run()执行
Spider
.
create
(
diaoyuwengProcessor
).
pipeline
(
new
ConsolePipeline
()).
pipeline
(
pipeline
).
schedul
a
r
(
new
FileCacheQueueScheduler
(
"/data/temp/webmagic/cache/"
)).
Spider
.
create
(
diaoyuwengProcessor
).
pipeline
(
new
ConsolePipeline
()).
pipeline
(
pipeline
).
schedul
e
r
(
new
FileCacheQueueScheduler
(
"/data/temp/webmagic/cache/"
)).
run
();
run
();
}
}
}
}
webmagic-samples/src/test/java/us/codecraft/webmagic/processor/DiaoyuwengProcessorTest.java
View file @
b1f023ea
...
@@ -22,7 +22,7 @@ public class DiaoyuwengProcessorTest {
...
@@ -22,7 +22,7 @@ public class DiaoyuwengProcessorTest {
public
void
test
()
throws
IOException
{
public
void
test
()
throws
IOException
{
DiaoyuwengProcessor
diaoyuwengProcessor
=
new
DiaoyuwengProcessor
();
DiaoyuwengProcessor
diaoyuwengProcessor
=
new
DiaoyuwengProcessor
();
FreemarkerPipeline
pipeline
=
new
FreemarkerPipeline
(
"wordpress.ftl"
);
FreemarkerPipeline
pipeline
=
new
FreemarkerPipeline
(
"wordpress.ftl"
);
Spider
.
create
(
diaoyuwengProcessor
).
pipeline
(
new
FilePipeline
()).
pipeline
(
pipeline
).
schedul
a
r
(
new
FileCacheQueueScheduler
(
"/data/temp/webmagic/cache/"
)).
Spider
.
create
(
diaoyuwengProcessor
).
pipeline
(
new
FilePipeline
()).
pipeline
(
pipeline
).
schedul
e
r
(
new
FileCacheQueueScheduler
(
"/data/temp/webmagic/cache/"
)).
run
();
run
();
}
}
}
}
webmagic-samples/src/test/java/us/codecraft/webmagic/processor/SinablogProcessorTest.java
View file @
b1f023ea
...
@@ -30,7 +30,7 @@ public class SinablogProcessorTest {
...
@@ -30,7 +30,7 @@ public class SinablogProcessorTest {
//ConsolePipeline输出结果到控制台
//ConsolePipeline输出结果到控制台
//FileCacheQueueSchedular保存url,支持断点续传,临时文件输出到/data/temp/webmagic/cache目录
//FileCacheQueueSchedular保存url,支持断点续传,临时文件输出到/data/temp/webmagic/cache目录
//Spider.run()执行
//Spider.run()执行
Spider
.
create
(
sinaBlogProcesser
).
pipeline
(
new
FilePipeline
()).
pipeline
(
pipeline
).
schedul
a
r
(
new
FileCacheQueueScheduler
(
"/data/temp/webmagic/cache/"
)).
Spider
.
create
(
sinaBlogProcesser
).
pipeline
(
new
FilePipeline
()).
pipeline
(
pipeline
).
schedul
e
r
(
new
FileCacheQueueScheduler
(
"/data/temp/webmagic/cache/"
)).
run
();
run
();
}
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment