Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
W
webmagic
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
沈俊林
webmagic
Commits
cf62d707
Commit
cf62d707
authored
Nov 27, 2013
by
yihua.huang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
#36 Spider does not exit when success
parent
a0131293
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
83 additions
and
9 deletions
+83
-9
Page.java
webmagic-core/src/main/java/us/codecraft/webmagic/Page.java
+2
-1
Spider.java
...agic-core/src/main/java/us/codecraft/webmagic/Spider.java
+8
-2
SpiderTest.java
...-core/src/test/java/us/codecraft/webmagic/SpiderTest.java
+64
-0
MockGithubDownloader.java
...s/codecraft/webmagic/downloader/MockGithubDownloader.java
+5
-3
GithubRepoTest.java
...test/java/us/codecraft/webmagic/model/GithubRepoTest.java
+2
-2
GithubRepoProcessor.java
.../us/codecraft/webmagic/processor/GithubRepoProcessor.java
+2
-1
No files found.
webmagic-core/src/main/java/us/codecraft/webmagic/Page.java
View file @
cf62d707
...
...
@@ -186,8 +186,9 @@ public class Page {
return
rawText
;
}
public
void
setRawText
(
String
rawText
)
{
public
Page
setRawText
(
String
rawText
)
{
this
.
rawText
=
rawText
;
return
this
;
}
@Override
...
...
webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java
View file @
cf62d707
...
...
@@ -98,6 +98,8 @@ public class Spider implements Runnable, Task {
private
Condition
newUrlCondition
=
newUrlLock
.
newCondition
();
private
final
AtomicInteger
threadAlive
=
new
AtomicInteger
(
0
);
/**
* create a spider with pageProcessor.
*
...
...
@@ -276,6 +278,7 @@ public class Spider implements Runnable, Task {
}
startRequests
.
clear
();
}
threadAlive
.
set
(
0
);
}
@Override
...
...
@@ -283,7 +286,6 @@ public class Spider implements Runnable, Task {
checkRunningStat
();
initComponent
();
logger
.
info
(
"Spider "
+
getUUID
()
+
" started!"
);
final
AtomicInteger
threadAlive
=
new
AtomicInteger
(
0
);
while
(!
Thread
.
currentThread
().
isInterrupted
()
&&
stat
.
get
()
==
STAT_RUNNING
)
{
Request
request
=
scheduler
.
poll
(
this
);
if
(
request
==
null
)
{
...
...
@@ -369,7 +371,7 @@ public class Spider implements Runnable, Task {
return
;
}
// for cycle retry
if
(
page
.
get
Html
()
==
null
)
{
if
(
page
.
get
RawText
()
==
null
)
{
extractAndAddRequests
(
page
);
sleep
(
site
.
getSleepTime
());
return
;
...
...
@@ -485,6 +487,10 @@ public class Spider implements Runnable, Task {
private
void
waitNewUrl
()
{
try
{
newUrlLock
.
lock
();
//double check
if
(
threadAlive
.
get
()
==
0
&&
exitWhenComplete
)
{
return
;
}
try
{
newUrlCondition
.
await
();
}
catch
(
InterruptedException
e
)
{
...
...
webmagic-core/src/test/java/us/codecraft/webmagic/SpiderTest.java
View file @
cf62d707
...
...
@@ -2,8 +2,14 @@ package us.codecraft.webmagic;
import
org.junit.Ignore
;
import
org.junit.Test
;
import
us.codecraft.webmagic.downloader.Downloader
;
import
us.codecraft.webmagic.pipeline.Pipeline
;
import
us.codecraft.webmagic.processor.PageProcessor
;
import
us.codecraft.webmagic.processor.SimplePageProcessor
;
import
us.codecraft.webmagic.scheduler.Scheduler
;
import
java.util.Random
;
import
java.util.concurrent.atomic.AtomicInteger
;
/**
* @author code4crafter@gmail.com
...
...
@@ -26,4 +32,62 @@ public class SpiderTest {
spider
.
start
();
Thread
.
sleep
(
10000
);
}
@Ignore
(
"long time"
)
@Test
public
void
testWaitAndNotify
()
throws
InterruptedException
{
for
(
int
i
=
0
;
i
<
10000
;
i
++)
{
System
.
out
.
println
(
"round"
+
i
);
testRound
();
}
}
private
void
testRound
()
{
Spider
spider
=
Spider
.
create
(
new
PageProcessor
()
{
private
AtomicInteger
count
=
new
AtomicInteger
();
@Override
public
void
process
(
Page
page
)
{
page
.
setSkip
(
true
);
}
@Override
public
Site
getSite
()
{
return
Site
.
me
().
setSleepTime
(
0
);
}
}).
setDownloader
(
new
Downloader
()
{
@Override
public
Page
download
(
Request
request
,
Task
task
)
{
return
new
Page
().
setRawText
(
""
);
}
@Override
public
void
setThread
(
int
threadNum
)
{
}
}).
setScheduler
(
new
Scheduler
()
{
private
AtomicInteger
count
=
new
AtomicInteger
();
private
Random
random
=
new
Random
();
@Override
public
void
push
(
Request
request
,
Task
task
)
{
}
@Override
public
synchronized
Request
poll
(
Task
task
)
{
if
(
count
.
incrementAndGet
()
>
1000
)
{
return
null
;
}
if
(
random
.
nextInt
(
100
)>
90
){
return
null
;
}
return
new
Request
(
"test"
);
}
}).
thread
(
10
);
spider
.
run
();
}
}
webmagic-extension/src/test/java/us/codecraft/webmagic/
Mock
Downloader.java
→
webmagic-extension/src/test/java/us/codecraft/webmagic/
downloader/MockGithub
Downloader.java
View file @
cf62d707
package
us
.
codecraft
.
webmagic
;
package
us
.
codecraft
.
webmagic
.
downloader
;
import
us.codecraft.webmagic.downloader.Downloader
;
import
us.codecraft.webmagic.Page
;
import
us.codecraft.webmagic.Request
;
import
us.codecraft.webmagic.Task
;
import
us.codecraft.webmagic.selector.Html
;
import
us.codecraft.webmagic.selector.PlainText
;
/**
* @author code4crafter@gmail.com
*/
public
class
MockDownloader
implements
Downloader
{
public
class
Mock
Github
Downloader
implements
Downloader
{
private
String
html
=
"\n"
+
"\n"
+
...
...
webmagic-extension/src/test/java/us/codecraft/webmagic/model/GithubRepoTest.java
View file @
cf62d707
...
...
@@ -2,7 +2,7 @@ package us.codecraft.webmagic.model;
import
junit.framework.Assert
;
import
org.junit.Test
;
import
us.codecraft.webmagic.
Mock
Downloader
;
import
us.codecraft.webmagic.
downloader.MockGithub
Downloader
;
import
us.codecraft.webmagic.Site
;
import
us.codecraft.webmagic.Task
;
import
us.codecraft.webmagic.example.GithubRepo
;
...
...
@@ -22,6 +22,6 @@ public class GithubRepoTest {
Assert
.
assertEquals
(
86
,
o
.
getStar
());
Assert
.
assertEquals
(
70
,
o
.
getFork
());
}
},
GithubRepo
.
class
).
setDownloader
(
new
MockDownloader
()).
test
(
"https://github.com/code4craft/webmagic"
);
},
GithubRepo
.
class
).
setDownloader
(
new
Mock
Github
Downloader
()).
test
(
"https://github.com/code4craft/webmagic"
);
}
}
webmagic-extension/src/test/java/us/codecraft/webmagic/processor/GithubRepoProcessor.java
View file @
cf62d707
...
...
@@ -3,6 +3,7 @@ package us.codecraft.webmagic.processor;
import
junit.framework.Assert
;
import
org.junit.Test
;
import
us.codecraft.webmagic.*
;
import
us.codecraft.webmagic.downloader.MockGithubDownloader
;
import
us.codecraft.webmagic.model.OOSpider
;
import
us.codecraft.webmagic.pipeline.Pipeline
;
...
...
@@ -29,7 +30,7 @@ public class GithubRepoProcessor implements PageProcessor {
Assert
.
assertEquals
(
"78"
,((
String
)
resultItems
.
get
(
"star"
)).
trim
());
Assert
.
assertEquals
(
"65"
,((
String
)
resultItems
.
get
(
"fork"
)).
trim
());
}
}).
setDownloader
(
new
MockDownloader
()).
test
(
"https://github.com/code4craft/webmagic"
);
}).
setDownloader
(
new
Mock
Github
Downloader
()).
test
(
"https://github.com/code4craft/webmagic"
);
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment