Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
W
webmagic
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
沈俊林
webmagic
Commits
05eb7831
Commit
05eb7831
authored
Apr 25, 2014
by
yihua.huang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
refactor and comments #110
parent
375e64e8
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
47 additions
and
11 deletions
+47
-11
Spider.java
...agic-core/src/main/java/us/codecraft/webmagic/Spider.java
+32
-8
CountableThreadPool.java
...decraft/webmagic/selector/thread/CountableThreadPool.java
+9
-3
ThreadUtils.java
...rc/main/java/us/codecraft/webmagic/utils/ThreadUtils.java
+6
-0
No files found.
webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java
View file @
05eb7831
...
...
@@ -13,7 +13,7 @@ import us.codecraft.webmagic.pipeline.ResultItemsCollectorPipeline;
import
us.codecraft.webmagic.processor.PageProcessor
;
import
us.codecraft.webmagic.scheduler.QueueScheduler
;
import
us.codecraft.webmagic.scheduler.Scheduler
;
import
us.codecraft.webmagic.selector.thread.ThreadPool
;
import
us.codecraft.webmagic.selector.thread.
Countable
ThreadPool
;
import
us.codecraft.webmagic.utils.UrlUtils
;
import
java.io.Closeable
;
...
...
@@ -74,7 +74,9 @@ public class Spider implements Runnable, Task {
protected
Logger
logger
=
LoggerFactory
.
getLogger
(
getClass
());
protected
ThreadPool
threadPool
;
protected
CountableThreadPool
threadPool
;
protected
ExecutorService
executorService
;
protected
int
threadNum
=
1
;
...
...
@@ -279,7 +281,11 @@ public class Spider implements Runnable, Task {
}
downloader
.
setThread
(
threadNum
);
if
(
threadPool
==
null
||
threadPool
.
isShutdown
())
{
threadPool
=
new
ThreadPool
(
threadNum
);
if
(
executorService
!=
null
&&
!
executorService
.
isShutdown
())
{
threadPool
=
new
CountableThreadPool
(
threadNum
,
executorService
);
}
else
{
threadPool
=
new
CountableThreadPool
(
threadNum
);
}
}
if
(
startRequests
!=
null
)
{
for
(
Request
request
:
startRequests
)
{
...
...
@@ -330,7 +336,7 @@ public class Spider implements Runnable, Task {
}
protected
void
onError
(
Request
request
)
{
if
(
CollectionUtils
.
isNotEmpty
(
spiderListeners
)){
if
(
CollectionUtils
.
isNotEmpty
(
spiderListeners
))
{
for
(
SpiderListener
spiderListener
:
spiderListeners
)
{
spiderListener
.
onError
(
request
);
}
...
...
@@ -338,7 +344,7 @@ public class Spider implements Runnable, Task {
}
protected
void
onSuccess
(
Request
request
)
{
if
(
CollectionUtils
.
isNotEmpty
(
spiderListeners
)){
if
(
CollectionUtils
.
isNotEmpty
(
spiderListeners
))
{
for
(
SpiderListener
spiderListener
:
spiderListeners
)
{
spiderListener
.
onSuccess
(
request
);
}
...
...
@@ -521,8 +527,7 @@ public class Spider implements Runnable, Task {
newUrlCondition
.
await
();
}
catch
(
InterruptedException
e
)
{
logger
.
warn
(
"waitNewUrl - interrupted, error {}"
,
e
);
}
finally
{
}
finally
{
newUrlLock
.
unlock
();
}
}
...
...
@@ -563,6 +568,21 @@ public class Spider implements Runnable, Task {
return
this
;
}
/**
* start with more than one threads
*
* @param threadNum
* @return this
*/
public
Spider
thread
(
ExecutorService
executorService
,
int
threadNum
)
{
checkIfRunning
();
this
.
threadNum
=
threadNum
;
if
(
threadNum
<=
0
)
{
throw
new
IllegalArgumentException
(
"threadNum should be more than one!"
);
}
return
this
;
}
public
boolean
isExitWhenComplete
()
{
return
exitWhenComplete
;
}
...
...
@@ -637,6 +657,9 @@ public class Spider implements Runnable, Task {
* @since 0.4.1
*/
public
int
getThreadAlive
()
{
if
(
threadPool
==
null
)
{
return
0
;
}
return
threadPool
.
getThreadAlive
();
}
...
...
@@ -667,7 +690,8 @@ public class Spider implements Runnable, Task {
}
public
Spider
setExecutorService
(
ExecutorService
executorService
)
{
this
.
threadPool
.
setExecutorService
(
executorService
);
checkIfRunning
();
this
.
executorService
=
executorService
;
return
this
;
}
...
...
webmagic-core/src/main/java/us/codecraft/webmagic/selector/thread/ThreadPool.java
→
webmagic-core/src/main/java/us/codecraft/webmagic/selector/thread/
Countable
ThreadPool.java
View file @
05eb7831
...
...
@@ -7,10 +7,16 @@ import java.util.concurrent.locks.Condition;
import
java.util.concurrent.locks.ReentrantLock
;
/**
* Thread pool for workers.<br></br>
* Use {@link java.util.concurrent.ExecutorService} as inner implement. <br></br>
* New feature: <br></br>
* 1. Block when thread pool is full to avoid poll many urls but not process. <br></br>
* 2. Count of thread alive for monitor.
*
* @author code4crafer@gmail.com
* @since 0.5.0
*/
public
class
ThreadPool
{
public
class
Countable
ThreadPool
{
private
int
threadNum
;
...
...
@@ -20,12 +26,12 @@ public class ThreadPool {
private
Condition
condition
=
reentrantLock
.
newCondition
();
public
ThreadPool
(
int
threadNum
)
{
public
Countable
ThreadPool
(
int
threadNum
)
{
this
.
threadNum
=
threadNum
;
this
.
executorService
=
Executors
.
newFixedThreadPool
(
threadNum
);
}
public
ThreadPool
(
int
threadNum
,
ExecutorService
executorService
)
{
public
Countable
ThreadPool
(
int
threadNum
,
ExecutorService
executorService
)
{
this
.
threadNum
=
threadNum
;
this
.
executorService
=
executorService
;
}
...
...
webmagic-core/src/main/java/us/codecraft/webmagic/utils/ThreadUtils.java
View file @
05eb7831
...
...
@@ -13,6 +13,12 @@ import java.util.concurrent.TimeUnit;
*/
public
class
ThreadUtils
{
/**
* @Deprecated
* @param threadSize
* @return
* @see us.codecraft.webmagic.selector.thread.CountableThreadPool
*/
public
static
ExecutorService
newFixedThreadPool
(
int
threadSize
)
{
if
(
threadSize
<=
0
)
{
throw
new
IllegalArgumentException
(
"ThreadSize must be greater than 0!"
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment