Commit 83c27ebb authored by Jon's avatar Jon Committed by yihua.huang

增加IP代理认证功能

parent ca072c55
...@@ -4,6 +4,7 @@ import com.google.common.collect.HashBasedTable; ...@@ -4,6 +4,7 @@ import com.google.common.collect.HashBasedTable;
import com.google.common.collect.Table; import com.google.common.collect.Table;
import org.apache.http.HttpHost; import org.apache.http.HttpHost;
import us.codecraft.webmagic.proxy.Proxy;
import us.codecraft.webmagic.proxy.ProxyPool; import us.codecraft.webmagic.proxy.ProxyPool;
import us.codecraft.webmagic.utils.UrlUtils; import us.codecraft.webmagic.utils.UrlUtils;
...@@ -474,6 +475,11 @@ public class Site { ...@@ -474,6 +475,11 @@ public class Site {
return this; return this;
} }
public Site setHttpProxyPool(List<String[]> httpProxyList, boolean isUseLastProxy) {
this.httpProxyPool=new ProxyPool(httpProxyList, isUseLastProxy);
return this;
}
public Site enableHttpProxyPool() { public Site enableHttpProxyPool() {
this.httpProxyPool=new ProxyPool(); this.httpProxyPool=new ProxyPool();
return this; return this;
...@@ -483,7 +489,7 @@ public class Site { ...@@ -483,7 +489,7 @@ public class Site {
return httpProxyPool; return httpProxyPool;
} }
public HttpHost getHttpProxyFromPool() { public Proxy getHttpProxyFromPool() {
return httpProxyPool.getProxy(); return httpProxyPool.getProxy();
} }
......
...@@ -24,6 +24,7 @@ import us.codecraft.webmagic.Page; ...@@ -24,6 +24,7 @@ import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Request; import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Site; import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Task; import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.proxy.Proxy;
import us.codecraft.webmagic.selector.PlainText; import us.codecraft.webmagic.selector.PlainText;
import us.codecraft.webmagic.utils.HttpConstant; import us.codecraft.webmagic.utils.HttpConstant;
import us.codecraft.webmagic.utils.UrlUtils; import us.codecraft.webmagic.utils.UrlUtils;
...@@ -50,9 +51,9 @@ public class HttpClientDownloader extends AbstractDownloader { ...@@ -50,9 +51,9 @@ public class HttpClientDownloader extends AbstractDownloader {
private HttpClientGenerator httpClientGenerator = new HttpClientGenerator(); private HttpClientGenerator httpClientGenerator = new HttpClientGenerator();
private CloseableHttpClient getHttpClient(Site site) { private CloseableHttpClient getHttpClient(Site site, Proxy proxy) {
if (site == null) { if (site == null) {
return httpClientGenerator.getClient(null); return httpClientGenerator.getClient(null, proxy);
} }
String domain = site.getDomain(); String domain = site.getDomain();
CloseableHttpClient httpClient = httpClients.get(domain); CloseableHttpClient httpClient = httpClients.get(domain);
...@@ -60,7 +61,7 @@ public class HttpClientDownloader extends AbstractDownloader { ...@@ -60,7 +61,7 @@ public class HttpClientDownloader extends AbstractDownloader {
synchronized (this) { synchronized (this) {
httpClient = httpClients.get(domain); httpClient = httpClients.get(domain);
if (httpClient == null) { if (httpClient == null) {
httpClient = httpClientGenerator.getClient(site); httpClient = httpClientGenerator.getClient(site, proxy);
httpClients.put(domain, httpClient); httpClients.put(domain, httpClient);
} }
} }
...@@ -88,8 +89,17 @@ public class HttpClientDownloader extends AbstractDownloader { ...@@ -88,8 +89,17 @@ public class HttpClientDownloader extends AbstractDownloader {
CloseableHttpResponse httpResponse = null; CloseableHttpResponse httpResponse = null;
int statusCode=0; int statusCode=0;
try { try {
HttpUriRequest httpUriRequest = getHttpUriRequest(request, site, headers); HttpHost proxyHost = null;
httpResponse = getHttpClient(site).execute(httpUriRequest); Proxy proxy = null; //TODO
if (site.getHttpProxyPool() != null && site.getHttpProxyPool().isEnable()) {
proxy = site.getHttpProxyFromPool();
proxyHost = proxy.getHttpHost();
} else if(site.getHttpProxy()!= null){
proxyHost = site.getHttpProxy();
}
HttpUriRequest httpUriRequest = getHttpUriRequest(request, site, headers, proxyHost);//���������˴���
httpResponse = getHttpClient(site, proxy).execute(httpUriRequest);//getHttpClient�������˴�����֤
statusCode = httpResponse.getStatusLine().getStatusCode(); statusCode = httpResponse.getStatusLine().getStatusCode();
request.putExtra(Request.STATUS_CODE, statusCode); request.putExtra(Request.STATUS_CODE, statusCode);
if (statusAccept(acceptStatCode, statusCode)) { if (statusAccept(acceptStatCode, statusCode)) {
...@@ -129,7 +139,7 @@ public class HttpClientDownloader extends AbstractDownloader { ...@@ -129,7 +139,7 @@ public class HttpClientDownloader extends AbstractDownloader {
return acceptStatCode.contains(statusCode); return acceptStatCode.contains(statusCode);
} }
protected HttpUriRequest getHttpUriRequest(Request request, Site site, Map<String, String> headers) { protected HttpUriRequest getHttpUriRequest(Request request, Site site, Map<String, String> headers,HttpHost proxy) {
RequestBuilder requestBuilder = selectRequestMethod(request).setUri(request.getUrl()); RequestBuilder requestBuilder = selectRequestMethod(request).setUri(request.getUrl());
if (headers != null) { if (headers != null) {
for (Map.Entry<String, String> headerEntry : headers.entrySet()) { for (Map.Entry<String, String> headerEntry : headers.entrySet()) {
...@@ -141,14 +151,9 @@ public class HttpClientDownloader extends AbstractDownloader { ...@@ -141,14 +151,9 @@ public class HttpClientDownloader extends AbstractDownloader {
.setSocketTimeout(site.getTimeOut()) .setSocketTimeout(site.getTimeOut())
.setConnectTimeout(site.getTimeOut()) .setConnectTimeout(site.getTimeOut())
.setCookieSpec(CookieSpecs.BEST_MATCH); .setCookieSpec(CookieSpecs.BEST_MATCH);
if (site.getHttpProxyPool() != null && site.getHttpProxyPool().isEnable()) { if (proxy !=null) {
HttpHost host = site.getHttpProxyFromPool(); requestConfigBuilder.setProxy(proxy);
requestConfigBuilder.setProxy(host); request.putExtra(Request.PROXY, proxy);
request.putExtra(Request.PROXY, host);
}else if(site.getHttpProxy()!= null){
HttpHost host = site.getHttpProxy();
requestConfigBuilder.setProxy(host);
request.putExtra(Request.PROXY, host);
} }
requestBuilder.setConfig(requestConfigBuilder.build()); requestBuilder.setConfig(requestConfigBuilder.build());
return requestBuilder.build(); return requestBuilder.build();
......
package us.codecraft.webmagic.downloader; package us.codecraft.webmagic.downloader;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.HttpException; import org.apache.http.HttpException;
import org.apache.http.HttpRequest; import org.apache.http.HttpRequest;
import org.apache.http.HttpRequestInterceptor; import org.apache.http.HttpRequestInterceptor;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.CookieStore; import org.apache.http.client.CookieStore;
import org.apache.http.client.CredentialsProvider;
import org.apache.http.config.Registry; import org.apache.http.config.Registry;
import org.apache.http.config.RegistryBuilder; import org.apache.http.config.RegistryBuilder;
import org.apache.http.config.SocketConfig; import org.apache.http.config.SocketConfig;
...@@ -15,6 +19,7 @@ import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; ...@@ -15,6 +19,7 @@ import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.apache.http.impl.cookie.BasicClientCookie; import org.apache.http.impl.cookie.BasicClientCookie;
import org.apache.http.protocol.HttpContext; import org.apache.http.protocol.HttpContext;
import us.codecraft.webmagic.Site; import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.proxy.Proxy;
import java.io.IOException; import java.io.IOException;
import java.util.Map; import java.util.Map;
...@@ -41,12 +46,24 @@ public class HttpClientGenerator { ...@@ -41,12 +46,24 @@ public class HttpClientGenerator {
return this; return this;
} }
public CloseableHttpClient getClient(Site site) { public CloseableHttpClient getClient(Site site, Proxy proxy) {
return generateClient(site); return generateClient(site, proxy);
} }
private CloseableHttpClient generateClient(Site site) { private CloseableHttpClient generateClient(Site site, Proxy proxy) {
HttpClientBuilder httpClientBuilder = HttpClients.custom().setConnectionManager(connectionManager); CredentialsProvider credsProvider = null;
HttpClientBuilder httpClientBuilder = HttpClients.custom();
if(proxy!=null && StringUtils.isNotBlank(proxy.getUser()) && StringUtils.isNotBlank(proxy.getPassword()))
{
credsProvider= new BasicCredentialsProvider();
credsProvider.setCredentials(
new AuthScope(proxy.getHttpHost().getAddress().getHostAddress(), proxy.getHttpHost().getPort()),
new UsernamePasswordCredentials(proxy.getUser(), proxy.getPassword()));
httpClientBuilder.setDefaultCredentialsProvider(credsProvider);
}
httpClientBuilder.setConnectionManager(connectionManager);
if (site != null && site.getUserAgent() != null) { if (site != null && site.getUserAgent() != null) {
httpClientBuilder.setUserAgent(site.getUserAgent()); httpClientBuilder.setUserAgent(site.getUserAgent());
} else { } else {
...@@ -61,7 +78,6 @@ public class HttpClientGenerator { ...@@ -61,7 +78,6 @@ public class HttpClientGenerator {
if (!request.containsHeader("Accept-Encoding")) { if (!request.containsHeader("Accept-Encoding")) {
request.addHeader("Accept-Encoding", "gzip"); request.addHeader("Accept-Encoding", "gzip");
} }
} }
}); });
} }
......
package us.codecraft.webmagic.proxy; package us.codecraft.webmagic.proxy;
import org.apache.http.HttpHost;
import java.io.Serializable; import java.io.Serializable;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.concurrent.Delayed; import java.util.concurrent.Delayed;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import org.apache.http.HttpHost;
/** /**
* >>>> Proxy lifecycle * >>>> Proxy lifecycle
...@@ -64,6 +64,9 @@ public class Proxy implements Delayed, Serializable { ...@@ -64,6 +64,9 @@ public class Proxy implements Delayed, Serializable {
public static final int SUCCESS = 200; public static final int SUCCESS = 200;
private final HttpHost httpHost; private final HttpHost httpHost;
private String user;
private String password;
private int reuseTimeInterval = 1500;// ms private int reuseTimeInterval = 1500;// ms
private Long canReuseTime = 0L; private Long canReuseTime = 0L;
...@@ -76,13 +79,17 @@ public class Proxy implements Delayed, Serializable { ...@@ -76,13 +79,17 @@ public class Proxy implements Delayed, Serializable {
private List<Integer> failedErrorType = new ArrayList<Integer>(); private List<Integer> failedErrorType = new ArrayList<Integer>();
Proxy(HttpHost httpHost) { Proxy(HttpHost httpHost, String user, String password) {
this.httpHost = httpHost; this.httpHost = httpHost;
this.user = user;
this.password = password;
this.canReuseTime = System.nanoTime() + TimeUnit.NANOSECONDS.convert(reuseTimeInterval, TimeUnit.MILLISECONDS); this.canReuseTime = System.nanoTime() + TimeUnit.NANOSECONDS.convert(reuseTimeInterval, TimeUnit.MILLISECONDS);
} }
Proxy(HttpHost httpHost, int reuseInterval) { Proxy(HttpHost httpHost, int reuseInterval, String user, String password) {
this.httpHost = httpHost; this.httpHost = httpHost;
this.user = user;
this.password = password;
this.canReuseTime = System.nanoTime() + TimeUnit.NANOSECONDS.convert(reuseInterval, TimeUnit.MILLISECONDS); this.canReuseTime = System.nanoTime() + TimeUnit.NANOSECONDS.convert(reuseInterval, TimeUnit.MILLISECONDS);
} }
...@@ -171,6 +178,17 @@ public class Proxy implements Delayed, Serializable { ...@@ -171,6 +178,17 @@ public class Proxy implements Delayed, Serializable {
} }
public String getUser()
{
return user;
}
public String getPassword()
{
return password;
}
public void borrowNumIncrement(int increment) { public void borrowNumIncrement(int increment) {
this.borrowNum += increment; this.borrowNum += increment;
} }
......
...@@ -156,14 +156,14 @@ public class ProxyPool { ...@@ -156,14 +156,14 @@ public class ProxyPool {
isEnable = true; isEnable = true;
for (String[] s : httpProxyList) { for (String[] s : httpProxyList) {
try { try {
if (allProxy.containsKey(s[0])) { if (allProxy.containsKey(s[2])) {
continue; continue;
} }
HttpHost item = new HttpHost(InetAddress.getByName(s[0]), Integer.valueOf(s[1])); HttpHost item = new HttpHost(InetAddress.getByName(s[2]), Integer.valueOf(s[3]));
if (!validateWhenInit || ProxyUtils.validateProxy(item)) { if (!validateWhenInit || ProxyUtils.validateProxy(item)) {
Proxy p = new Proxy(item, reuseInterval); Proxy p = new Proxy(item, reuseInterval, s[0], s[1]);
proxyQueue.add(p); proxyQueue.add(p);
allProxy.put(s[0], p); allProxy.put(s[2], p);
} }
} catch (NumberFormatException e) { } catch (NumberFormatException e) {
logger.error("HttpHost init error:", e); logger.error("HttpHost init error:", e);
...@@ -174,7 +174,7 @@ public class ProxyPool { ...@@ -174,7 +174,7 @@ public class ProxyPool {
logger.info("proxy pool size>>>>" + allProxy.size()); logger.info("proxy pool size>>>>" + allProxy.size());
} }
public HttpHost getProxy() { public Proxy getProxy() {
Proxy proxy = null; Proxy proxy = null;
try { try {
Long time = System.currentTimeMillis(); Long time = System.currentTimeMillis();
...@@ -192,7 +192,7 @@ public class ProxyPool { ...@@ -192,7 +192,7 @@ public class ProxyPool {
if (proxy == null) { if (proxy == null) {
throw new NoSuchElementException(); throw new NoSuchElementException();
} }
return proxy.getHttpHost(); return proxy;
} }
public void returnProxy(HttpHost host, int statusCode) { public void returnProxy(HttpHost host, int statusCode) {
......
...@@ -90,12 +90,12 @@ public class HttpClientDownloaderTest { ...@@ -90,12 +90,12 @@ public class HttpClientDownloaderTest {
private String getCharsetByUrl(String url) { private String getCharsetByUrl(String url) {
HttpClientDownloader downloader = new HttpClientDownloader(); HttpClientDownloader downloader = new HttpClientDownloader();
Site site = Site.me(); Site site = Site.me();
CloseableHttpClient httpClient = new HttpClientGenerator().getClient(site); CloseableHttpClient httpClient = new HttpClientGenerator().getClient(site, null);
// encoding in http header Content-Type // encoding in http header Content-Type
Request requestGBK = new Request(url); Request requestGBK = new Request(url);
CloseableHttpResponse httpResponse = null; CloseableHttpResponse httpResponse = null;
try { try {
httpResponse = httpClient.execute(downloader.getHttpUriRequest(requestGBK, site, null)); httpResponse = httpClient.execute(downloader.getHttpUriRequest(requestGBK, site, null,null));
} catch (IOException e) { } catch (IOException e) {
e.printStackTrace(); e.printStackTrace();
} }
......
...@@ -22,9 +22,9 @@ public class ProxyTest { ...@@ -22,9 +22,9 @@ public class ProxyTest {
public static void before() { public static void before() {
// String[] source = { "0.0.0.1:0", "0.0.0.2:0", "0.0.0.3:0", // String[] source = { "0.0.0.1:0", "0.0.0.2:0", "0.0.0.3:0",
// "0.0.0.4:0" }; // "0.0.0.4:0" };
String[] source = { "0.0.0.1:0", "0.0.0.2:0", "0.0.0.3:0", "0.0.0.4:0" }; String[] source = { "::0.0.0.1:0", "::0.0.0.2:0", "::0.0.0.3:0", "::0.0.0.4:0" };
for (String line : source) { for (String line : source) {
httpProxyList.add(new String[] { line.split(":")[0], line.split(":")[1] }); httpProxyList.add(new String[] {line.split(":")[0], line.split(":")[1], line.split(":")[2], line.split(":")[3] });
} }
} }
...@@ -37,7 +37,8 @@ public class ProxyTest { ...@@ -37,7 +37,8 @@ public class ProxyTest {
for (int i = 0; i < 2; i++) { for (int i = 0; i < 2; i++) {
List<Fetch> fetchList = new ArrayList<Fetch>(); List<Fetch> fetchList = new ArrayList<Fetch>();
while (proxyPool.getIdleNum() != 0) { while (proxyPool.getIdleNum() != 0) {
HttpHost httphost = proxyPool.getProxy(); Proxy proxy = proxyPool.getProxy();
HttpHost httphost = proxy.getHttpHost();
// httphostList.add(httphost); // httphostList.add(httphost);
System.out.println(httphost.getHostName() + ":" + httphost.getPort()); System.out.println(httphost.getHostName() + ":" + httphost.getPort());
Fetch tmp = new Fetch(httphost); Fetch tmp = new Fetch(httphost);
...@@ -69,4 +70,5 @@ public class ProxyTest { ...@@ -69,4 +70,5 @@ public class ProxyTest {
} }
} }
} }
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment