Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
W
webmagic
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
沈俊林
webmagic
Commits
57106145
Commit
57106145
authored
Feb 27, 2014
by
yihua.huang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
#58 add CYCLE_TRIED_TIMES support to QueueScheduler and PriorityScheduler
parent
a79ae6a9
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
44 additions
and
35 deletions
+44
-35
LocalDuplicatedRemovedScheduler.java
...t/webmagic/scheduler/LocalDuplicatedRemovedScheduler.java
+33
-0
PriorityScheduler.java
...va/us/codecraft/webmagic/scheduler/PriorityScheduler.java
+8
-18
QueueScheduler.java
.../java/us/codecraft/webmagic/scheduler/QueueScheduler.java
+3
-17
No files found.
webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/LocalDuplicatedRemovedScheduler.java
0 → 100644
View file @
57106145
package
us
.
codecraft
.
webmagic
.
scheduler
;
import
com.google.common.collect.Sets
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
us.codecraft.webmagic.Request
;
import
us.codecraft.webmagic.Task
;
import
java.util.Set
;
import
java.util.concurrent.ConcurrentHashMap
;
/**
* Base Scheduler with duplicated urls removed locally.
*
* @author code4crafter@gmail.com
* @since 0.5.0
*/
public
abstract
class
LocalDuplicatedRemovedScheduler
implements
Scheduler
{
protected
Logger
logger
=
LoggerFactory
.
getLogger
(
getClass
());
private
Set
<
String
>
urls
=
Sets
.
newSetFromMap
(
new
ConcurrentHashMap
<
String
,
Boolean
>());
@Override
public
void
push
(
Request
request
,
Task
task
)
{
logger
.
debug
(
"push to queue "
+
request
.
getUrl
());
if
(
request
.
getExtra
(
Request
.
CYCLE_TRIED_TIMES
)
!=
null
||
urls
.
add
(
request
.
getUrl
()))
{
pushWhenNoDuplicate
(
request
,
task
);
}
}
protected
abstract
void
pushWhenNoDuplicate
(
Request
request
,
Task
task
);
}
webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/PriorityScheduler.java
View file @
57106145
package
us
.
codecraft
.
webmagic
.
scheduler
;
import
org.apache.http.annotation.ThreadSafe
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
us.codecraft.webmagic.Request
;
import
us.codecraft.webmagic.Task
;
import
us.codecraft.webmagic.utils.NumberUtils
;
import
java.util.Comparator
;
import
java.util.HashSet
;
import
java.util.Set
;
import
java.util.concurrent.BlockingQueue
;
import
java.util.concurrent.LinkedBlockingQueue
;
import
java.util.concurrent.PriorityBlockingQueue
;
...
...
@@ -21,12 +17,10 @@ import java.util.concurrent.PriorityBlockingQueue;
* @since 0.2.1
*/
@ThreadSafe
public
class
PriorityScheduler
implements
Scheduler
{
public
class
PriorityScheduler
extends
LocalDuplicatedRemoved
Scheduler
{
public
static
final
int
INITIAL_CAPACITY
=
5
;
private
Logger
logger
=
LoggerFactory
.
getLogger
(
getClass
());
private
BlockingQueue
<
Request
>
noPriorityQueue
=
new
LinkedBlockingQueue
<
Request
>();
private
PriorityBlockingQueue
<
Request
>
priorityQueuePlus
=
new
PriorityBlockingQueue
<
Request
>(
INITIAL_CAPACITY
,
new
Comparator
<
Request
>()
{
...
...
@@ -43,19 +37,15 @@ public class PriorityScheduler implements Scheduler {
}
});
private
Set
<
String
>
urls
=
new
HashSet
<
String
>();
@Override
public
synchronized
void
push
(
Request
request
,
Task
task
)
{
logger
.
debug
(
"push to queue "
+
request
.
getUrl
());
if
(
urls
.
add
(
request
.
getUrl
()))
{
if
(
request
.
getPriority
()
==
0
)
{
noPriorityQueue
.
add
(
request
);
}
else
if
(
request
.
getPriority
()
>
0
)
{
priorityQueuePlus
.
put
(
request
);
}
else
{
priorityQueueMinus
.
put
(
request
);
}
public
void
pushWhenNoDuplicate
(
Request
request
,
Task
task
)
{
if
(
request
.
getPriority
()
==
0
)
{
noPriorityQueue
.
add
(
request
);
}
else
if
(
request
.
getPriority
()
>
0
)
{
priorityQueuePlus
.
put
(
request
);
}
else
{
priorityQueueMinus
.
put
(
request
);
}
}
...
...
webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/QueueScheduler.java
View file @
57106145
package
us
.
codecraft
.
webmagic
.
scheduler
;
import
org.apache.http.annotation.ThreadSafe
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
us.codecraft.webmagic.Request
;
import
us.codecraft.webmagic.Task
;
import
java.util.HashSet
;
import
java.util.Set
;
import
java.util.concurrent.BlockingQueue
;
import
java.util.concurrent.LinkedBlockingQueue
;
...
...
@@ -20,23 +16,13 @@ import java.util.concurrent.LinkedBlockingQueue;
* @since 0.1.0
*/
@ThreadSafe
public
class
QueueScheduler
implements
Scheduler
{
private
Logger
logger
=
LoggerFactory
.
getLogger
(
getClass
());
public
class
QueueScheduler
extends
LocalDuplicatedRemovedScheduler
{
private
BlockingQueue
<
Request
>
queue
=
new
LinkedBlockingQueue
<
Request
>();
private
Set
<
String
>
urls
=
new
HashSet
<
String
>();
@Override
public
synchronized
void
push
(
Request
request
,
Task
task
)
{
if
(
logger
.
isDebugEnabled
())
{
logger
.
debug
(
"push to queue "
+
request
.
getUrl
());
}
if
(
urls
.
add
(
request
.
getUrl
()))
{
queue
.
add
(
request
);
}
public
void
pushWhenNoDuplicate
(
Request
request
,
Task
task
)
{
queue
.
add
(
request
);
}
@Override
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment