Commit 1fc8e104 authored by yihua.huang's avatar yihua.huang

add cycle retry

parent 891d845e
...@@ -310,6 +310,12 @@ public class Spider implements Runnable, Task { ...@@ -310,6 +310,12 @@ public class Spider implements Runnable, Task {
sleep(site.getSleepTime()); sleep(site.getSleepTime());
return; return;
} }
//for cycle retry
if (page.getHtml()==null){
addRequest(page);
sleep(site.getSleepTime());
return;
}
pageProcessor.process(page); pageProcessor.process(page);
addRequest(page); addRequest(page);
if (!page.getResultItems().isSkip()) { if (!page.getResultItems().isSkip()) {
......
...@@ -4,6 +4,7 @@ import us.codecraft.webmagic.Site; ...@@ -4,6 +4,7 @@ import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Page; import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Spider; import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor; import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.scheduler.RedisScheduler;
import java.util.List; import java.util.List;
...@@ -24,10 +25,10 @@ public class F58PageProcesser implements PageProcessor { ...@@ -24,10 +25,10 @@ public class F58PageProcesser implements PageProcessor {
@Override @Override
public Site getSite() { public Site getSite() {
return Site.me().setDomain("sh.58.com").addStartUrl("http://sh.58.com/"); //To change body of implemented methods use File | Settings | File Templates. return Site.me().setDomain("sh.58.com").addStartUrl("http://sh1.51a8.com/").setCycleRetryTimes(2); //To change body of implemented methods use File | Settings | File Templates.
} }
public static void main(String[] args) { public static void main(String[] args) {
Spider.create(new F58PageProcesser()).run(); Spider.create(new F58PageProcesser()).setScheduler(new RedisScheduler("localhost")).run();
} }
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment