Commit 6b9d21fc authored by Yihua Huang's avatar Yihua Huang

Merge pull request #188 from EdwardsBean/retry_time

add retry sleep time
parents 8ffc1a70 49786656
...@@ -39,6 +39,8 @@ public class Site { ...@@ -39,6 +39,8 @@ public class Site {
private int cycleRetryTimes = 0; private int cycleRetryTimes = 0;
private int retrySleepTime = 1000;
private int timeOut = 5000; private int timeOut = 5000;
private static final Set<Integer> DEFAULT_STATUS_CODE_SET = new HashSet<Integer>(); private static final Set<Integer> DEFAULT_STATUS_CODE_SET = new HashSet<Integer>();
...@@ -49,8 +51,8 @@ public class Site { ...@@ -49,8 +51,8 @@ public class Site {
private HttpHost httpProxy; private HttpHost httpProxy;
private ProxyPool httpProxyPool; private ProxyPool httpProxyPool;
private boolean useGzip = true; private boolean useGzip = true;
/** /**
...@@ -359,6 +361,20 @@ public class Site { ...@@ -359,6 +361,20 @@ public class Site {
return useGzip; return useGzip;
} }
public int getRetrySleepTime() {
return retrySleepTime;
}
/**
* Set retry sleep times when download fail, 1000 by default. <br>
*
* @param retrySleepTime
*/
public Site setRetrySleepTime(int retrySleepTime) {
this.retrySleepTime = retrySleepTime;
return this;
}
/** /**
* Whether use gzip. <br> * Whether use gzip. <br>
* Default is true, you can set it to false to disable gzip. * Default is true, you can set it to false to disable gzip.
...@@ -448,31 +464,31 @@ public class Site { ...@@ -448,31 +464,31 @@ public class Site {
* *
* @return this * @return this
*/ */
public Site setHttpProxyPool(List<String[]> httpProxyList) { public Site setHttpProxyPool(List<String[]> httpProxyList) {
this.httpProxyPool=new ProxyPool(httpProxyList); this.httpProxyPool=new ProxyPool(httpProxyList);
return this; return this;
} }
public Site enableHttpProxyPool() { public Site enableHttpProxyPool() {
this.httpProxyPool=new ProxyPool(); this.httpProxyPool=new ProxyPool();
return this; return this;
} }
public ProxyPool getHttpProxyPool() { public ProxyPool getHttpProxyPool() {
return httpProxyPool; return httpProxyPool;
} }
public HttpHost getHttpProxyFromPool() { public HttpHost getHttpProxyFromPool() {
return httpProxyPool.getProxy(); return httpProxyPool.getProxy();
} }
public void returnHttpProxyToPool(HttpHost proxy,int statusCode) { public void returnHttpProxyToPool(HttpHost proxy,int statusCode) {
httpProxyPool.returnProxy(proxy,statusCode); httpProxyPool.returnProxy(proxy,statusCode);
} }
public Site setProxyReuseInterval(int reuseInterval) { public Site setProxyReuseInterval(int reuseInterval) {
this.httpProxyPool.setReuseInterval(reuseInterval); this.httpProxyPool.setReuseInterval(reuseInterval);
return this; return this;
} }
} }
...@@ -407,14 +407,14 @@ public class Spider implements Runnable, Task { ...@@ -407,14 +407,14 @@ public class Spider implements Runnable, Task {
protected void processRequest(Request request) { protected void processRequest(Request request) {
Page page = downloader.download(request, this); Page page = downloader.download(request, this);
if (page == null) { if (page == null) {
sleep(site.getSleepTime()); sleep(site.getRetrySleepTime());
onError(request); onError(request);
return; return;
} }
// for cycle retry // for cycle retry
if (page.isNeedCycleRetry()) { if (page.isNeedCycleRetry()) {
extractAndAddRequests(page, true); extractAndAddRequests(page, true);
sleep(site.getSleepTime()); sleep(site.getRetrySleepTime());
return; return;
} }
pageProcessor.process(page); pageProcessor.process(page);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment