Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
W
webmagic
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
沈俊林
webmagic
Commits
c2d6d495
Commit
c2d6d495
authored
Nov 27, 2013
by
yihua.huang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
#41 add getThreadAlive(),getStatus,getPageCount() to spider
parent
cf62d707
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
61 additions
and
5 deletions
+61
-5
Spider.java
...agic-core/src/main/java/us/codecraft/webmagic/Spider.java
+61
-5
No files found.
webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java
View file @
c2d6d495
...
...
@@ -6,9 +6,9 @@ import org.apache.log4j.Logger;
import
us.codecraft.webmagic.downloader.Downloader
;
import
us.codecraft.webmagic.downloader.HttpClientDownloader
;
import
us.codecraft.webmagic.pipeline.CollectorPipeline
;
import
us.codecraft.webmagic.pipeline.ResultItemsCollectorPipeline
;
import
us.codecraft.webmagic.pipeline.ConsolePipeline
;
import
us.codecraft.webmagic.pipeline.Pipeline
;
import
us.codecraft.webmagic.pipeline.ResultItemsCollectorPipeline
;
import
us.codecraft.webmagic.processor.PageProcessor
;
import
us.codecraft.webmagic.scheduler.QueueScheduler
;
import
us.codecraft.webmagic.scheduler.Scheduler
;
...
...
@@ -18,12 +18,10 @@ import us.codecraft.webmagic.utils.UrlUtils;
import
java.io.Closeable
;
import
java.io.IOException
;
import
java.util.ArrayList
;
import
java.util.Collection
;
import
java.util.List
;
import
java.util.UUID
;
import
java.util.*
;
import
java.util.concurrent.ExecutorService
;
import
java.util.concurrent.atomic.AtomicInteger
;
import
java.util.concurrent.atomic.AtomicLong
;
import
java.util.concurrent.locks.Condition
;
import
java.util.concurrent.locks.ReentrantLock
;
...
...
@@ -100,6 +98,8 @@ public class Spider implements Runnable, Task {
private
final
AtomicInteger
threadAlive
=
new
AtomicInteger
(
0
);
private
final
AtomicLong
pageCount
=
new
AtomicLong
(
0
);
/**
* create a spider with pageProcessor.
*
...
...
@@ -306,6 +306,7 @@ public class Spider implements Runnable, Task {
logger
.
error
(
"download "
+
requestFinal
+
" error"
,
e
);
}
finally
{
threadAlive
.
decrementAndGet
();
pageCount
.
incrementAndGet
();
signalNewUrl
();
}
}
...
...
@@ -566,6 +567,61 @@ public class Spider implements Runnable, Task {
return
spawnUrl
;
}
/**
* Get page count downloaded by spider.
*
* @return total downloaded page count
* @since 0.4.1
*/
public
long
getPageCount
()
{
return
pageCount
.
get
();
}
/**
* Get running status by spider.
*
* @return running status
* @see Status
* @since 0.4.1
*/
public
Status
getStatus
(){
return
Status
.
fromValue
(
stat
.
get
());
}
public
enum
Status
{
Init
(
0
),
Running
(
1
),
Stopped
(
2
);
private
Status
(
int
value
)
{
this
.
value
=
value
;
}
private
int
value
;
int
getValue
()
{
return
value
;
}
public
static
Status
fromValue
(
int
value
)
{
for
(
Status
status
:
Status
.
values
())
{
if
(
status
.
getValue
()
==
value
)
{
return
status
;
}
}
//default value
return
Init
;
}
}
/**
* Get thread count which is running
* @return thread count which is running
* @since 0.4.1
*/
public
int
getThreadAlive
()
{
return
threadAlive
.
get
();
}
/**
* Whether add urls extracted to download.<br>
* Add urls to download when it is true, and just download seed urls when it is false. <br>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment