Commit 19474e47 authored by edwardsbean's avatar edwardsbean

add SimpleProxyPool and IProxyPool

parent 05a1f395
...@@ -4,7 +4,8 @@ import com.google.common.collect.HashBasedTable; ...@@ -4,7 +4,8 @@ import com.google.common.collect.HashBasedTable;
import com.google.common.collect.Table; import com.google.common.collect.Table;
import org.apache.http.HttpHost; import org.apache.http.HttpHost;
import us.codecraft.webmagic.proxy.ProxyPool; import us.codecraft.webmagic.proxy.IProxyPool;
import us.codecraft.webmagic.proxy.SimpleProxyPool;
import us.codecraft.webmagic.utils.UrlUtils; import us.codecraft.webmagic.utils.UrlUtils;
import java.util.*; import java.util.*;
...@@ -51,7 +52,7 @@ public class Site { ...@@ -51,7 +52,7 @@ public class Site {
private HttpHost httpProxy; private HttpHost httpProxy;
private ProxyPool httpProxyPool; private IProxyPool httpProxyPool;
private boolean useGzip = true; private boolean useGzip = true;
...@@ -464,17 +465,17 @@ public class Site { ...@@ -464,17 +465,17 @@ public class Site {
* *
* @return this * @return this
*/ */
public Site setHttpProxyPool(List<String[]> httpProxyList) { public Site setHttpProxyPool(IProxyPool proxyPool) {
this.httpProxyPool=new ProxyPool(httpProxyList); this.httpProxyPool = proxyPool;
return this; return this;
} }
public Site enableHttpProxyPool() { public Site enableHttpProxyPool() {
this.httpProxyPool=new ProxyPool(); this.httpProxyPool=new SimpleProxyPool();
return this; return this;
} }
public ProxyPool getHttpProxyPool() { public IProxyPool getHttpProxyPool() {
return httpProxyPool; return httpProxyPool;
} }
...@@ -486,9 +487,4 @@ public class Site { ...@@ -486,9 +487,4 @@ public class Site {
httpProxyPool.returnProxy(proxy,statusCode); httpProxyPool.returnProxy(proxy,statusCode);
} }
public Site setProxyReuseInterval(int reuseInterval) {
this.httpProxyPool.setReuseInterval(reuseInterval);
return this;
}
} }
package us.codecraft.webmagic.proxy;
import org.apache.http.HttpHost;
/**
* Created by edwardsbean on 15-2-28.
*/
public interface IProxyPool {
public void returnProxy(HttpHost host, int statusCode);
public HttpHost getProxy();
public boolean isEnable();
}
...@@ -22,7 +22,7 @@ import java.util.concurrent.DelayQueue; ...@@ -22,7 +22,7 @@ import java.util.concurrent.DelayQueue;
* @see Proxy * @see Proxy
* @since 0.5.1 * @since 0.5.1
*/ */
public class ProxyPool { public class ProxyPool implements IProxyPool{
private Logger logger = LoggerFactory.getLogger(getClass()); private Logger logger = LoggerFactory.getLogger(getClass());
......
package us.codecraft.webmagic.proxy;
import org.apache.http.HttpHost;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.DelayQueue;
/**
* Created by edwardsbean on 15-2-28.
*/
public class SimpleProxyPool implements IProxyPool{
private Logger logger = LoggerFactory.getLogger(getClass());
private BlockingQueue<Proxy> proxyQueue = new DelayQueue<Proxy>();
private Map<String, Proxy> allProxy = new ConcurrentHashMap<String, Proxy>();
private boolean isEnable = false;
private int reuseInterval = 1500;// ms
private int reviveTime = 2 * 60 * 60 * 1000;// ms
public SimpleProxyPool() {
this(null);
}
public SimpleProxyPool(List<String[]> httpProxyList) {
if (httpProxyList != null) {
addProxy(httpProxyList.toArray(new String[httpProxyList.size()][]));
}
}
public void addProxy(String[]... httpProxyList) {
isEnable = true;
for (String[] s : httpProxyList) {
try {
if (allProxy.containsKey(s[0])) {
continue;
}
HttpHost item = new HttpHost(InetAddress.getByName(s[0]), Integer.valueOf(s[1]));
Proxy p = new Proxy(item, reuseInterval);
proxyQueue.add(p);
allProxy.put(s[0], p);
} catch (NumberFormatException e) {
logger.error("HttpHost init error:", e);
} catch (UnknownHostException e) {
logger.error("HttpHost init error:", e);
}
}
logger.info("proxy pool size>>>>" + allProxy.size());
}
public void returnProxy(HttpHost host, int statusCode) {
Proxy p = allProxy.get(host.getAddress().getHostAddress());
if (p == null) {
return;
}
switch (statusCode) {
case Proxy.SUCCESS:
p.setFailedNum(0);
p.setFailedErrorType(new ArrayList<Integer>());
p.recordResponse();
p.successNumIncrement(1);
break;
case Proxy.ERROR_403:
// banned,try longer interval
p.fail(Proxy.ERROR_403);
break;
case Proxy.ERROR_BANNED:
p.fail(Proxy.ERROR_BANNED);
logger.warn("this proxy is banned >>>> " + p.getHttpHost());
break;
case Proxy.ERROR_404:
// p.fail(Proxy.ERROR_404);
// p.setReuseTimeInterval(reuseInterval * p.getFailedNum());
break;
default:
p.fail(statusCode);
break;
}
if (p.getFailedNum() > 3) {
logger.error("remove proxy >>>> " + host + ">>>>" + p.getFailedType() + " >>>> remain proxy >>>> " + proxyQueue.size());
return;
}
try {
proxyQueue.put(p);
} catch (InterruptedException e) {
logger.warn("proxyQueue return proxy error", e);
}
}
@Override
public HttpHost getProxy() {
Proxy proxy = null;
try {
proxy = proxyQueue.take();
} catch (InterruptedException e) {
logger.error("get proxy error", e);
}
if (proxy == null) {
throw new NoSuchElementException();
}
return proxy.getHttpHost();
}
@Override
public boolean isEnable() {
return isEnable;
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment