Commit 5215a492 authored by yihua.huang's avatar yihua.huang

remove duplicate check for POST request #484

parent 45bf2b6f
...@@ -6,6 +6,7 @@ import us.codecraft.webmagic.Request; ...@@ -6,6 +6,7 @@ import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Task; import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.scheduler.component.DuplicateRemover; import us.codecraft.webmagic.scheduler.component.DuplicateRemover;
import us.codecraft.webmagic.scheduler.component.HashSetDuplicateRemover; import us.codecraft.webmagic.scheduler.component.HashSetDuplicateRemover;
import us.codecraft.webmagic.utils.HttpConstant;
/** /**
* Remove duplicate urls and only push urls which are not duplicate.<br><br> * Remove duplicate urls and only push urls which are not duplicate.<br><br>
...@@ -31,7 +32,7 @@ public abstract class DuplicateRemovedScheduler implements Scheduler { ...@@ -31,7 +32,7 @@ public abstract class DuplicateRemovedScheduler implements Scheduler {
@Override @Override
public void push(Request request, Task task) { public void push(Request request, Task task) {
logger.trace("get a candidate url {}", request.getUrl()); logger.trace("get a candidate url {}", request.getUrl());
if (!duplicatedRemover.isDuplicate(request, task) || shouldReserved(request)) { if (!duplicatedRemover.isDuplicate(request, task) || shouldReserved(request) || noNeedToRemoveDuplicate(request)) {
logger.debug("push to queue {}", request.getUrl()); logger.debug("push to queue {}", request.getUrl());
pushWhenNoDuplicate(request, task); pushWhenNoDuplicate(request, task);
} }
...@@ -41,6 +42,10 @@ public abstract class DuplicateRemovedScheduler implements Scheduler { ...@@ -41,6 +42,10 @@ public abstract class DuplicateRemovedScheduler implements Scheduler {
return request.getExtra(Request.CYCLE_TRIED_TIMES) != null; return request.getExtra(Request.CYCLE_TRIED_TIMES) != null;
} }
protected boolean noNeedToRemoveDuplicate(Request request) {
return HttpConstant.Method.POST.equalsIgnoreCase(request.getMethod());
}
protected void pushWhenNoDuplicate(Request request, Task task) { protected void pushWhenNoDuplicate(Request request, Task task) {
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment