Commit 1fc8e104 authored by yihua.huang's avatar yihua.huang

add cycle retry

parent 891d845e
......@@ -310,6 +310,12 @@ public class Spider implements Runnable, Task {
sleep(site.getSleepTime());
return;
}
//for cycle retry
if (page.getHtml()==null){
addRequest(page);
sleep(site.getSleepTime());
return;
}
pageProcessor.process(page);
addRequest(page);
if (!page.getResultItems().isSkip()) {
......
......@@ -4,6 +4,7 @@ import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.scheduler.RedisScheduler;
import java.util.List;
......@@ -24,10 +25,10 @@ public class F58PageProcesser implements PageProcessor {
@Override
public Site getSite() {
return Site.me().setDomain("sh.58.com").addStartUrl("http://sh.58.com/"); //To change body of implemented methods use File | Settings | File Templates.
return Site.me().setDomain("sh.58.com").addStartUrl("http://sh1.51a8.com/").setCycleRetryTimes(2); //To change body of implemented methods use File | Settings | File Templates.
}
public static void main(String[] args) {
Spider.create(new F58PageProcesser()).run();
Spider.create(new F58PageProcesser()).setScheduler(new RedisScheduler("localhost")).run();
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment