Commit 83c27ebb authored by Jon's avatar Jon Committed by yihua.huang

增加IP代理认证功能

parent ca072c55
......@@ -4,6 +4,7 @@ import com.google.common.collect.HashBasedTable;
import com.google.common.collect.Table;
import org.apache.http.HttpHost;
import us.codecraft.webmagic.proxy.Proxy;
import us.codecraft.webmagic.proxy.ProxyPool;
import us.codecraft.webmagic.utils.UrlUtils;
......@@ -474,6 +475,11 @@ public class Site {
return this;
}
public Site setHttpProxyPool(List<String[]> httpProxyList, boolean isUseLastProxy) {
this.httpProxyPool=new ProxyPool(httpProxyList, isUseLastProxy);
return this;
}
public Site enableHttpProxyPool() {
this.httpProxyPool=new ProxyPool();
return this;
......@@ -483,7 +489,7 @@ public class Site {
return httpProxyPool;
}
public HttpHost getHttpProxyFromPool() {
public Proxy getHttpProxyFromPool() {
return httpProxyPool.getProxy();
}
......
......@@ -24,6 +24,7 @@ import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.proxy.Proxy;
import us.codecraft.webmagic.selector.PlainText;
import us.codecraft.webmagic.utils.HttpConstant;
import us.codecraft.webmagic.utils.UrlUtils;
......@@ -50,9 +51,9 @@ public class HttpClientDownloader extends AbstractDownloader {
private HttpClientGenerator httpClientGenerator = new HttpClientGenerator();
private CloseableHttpClient getHttpClient(Site site) {
private CloseableHttpClient getHttpClient(Site site, Proxy proxy) {
if (site == null) {
return httpClientGenerator.getClient(null);
return httpClientGenerator.getClient(null, proxy);
}
String domain = site.getDomain();
CloseableHttpClient httpClient = httpClients.get(domain);
......@@ -60,7 +61,7 @@ public class HttpClientDownloader extends AbstractDownloader {
synchronized (this) {
httpClient = httpClients.get(domain);
if (httpClient == null) {
httpClient = httpClientGenerator.getClient(site);
httpClient = httpClientGenerator.getClient(site, proxy);
httpClients.put(domain, httpClient);
}
}
......@@ -88,8 +89,17 @@ public class HttpClientDownloader extends AbstractDownloader {
CloseableHttpResponse httpResponse = null;
int statusCode=0;
try {
HttpUriRequest httpUriRequest = getHttpUriRequest(request, site, headers);
httpResponse = getHttpClient(site).execute(httpUriRequest);
HttpHost proxyHost = null;
Proxy proxy = null; //TODO
if (site.getHttpProxyPool() != null && site.getHttpProxyPool().isEnable()) {
proxy = site.getHttpProxyFromPool();
proxyHost = proxy.getHttpHost();
} else if(site.getHttpProxy()!= null){
proxyHost = site.getHttpProxy();
}
HttpUriRequest httpUriRequest = getHttpUriRequest(request, site, headers, proxyHost);//���������˴���
httpResponse = getHttpClient(site, proxy).execute(httpUriRequest);//getHttpClient�������˴�����֤
statusCode = httpResponse.getStatusLine().getStatusCode();
request.putExtra(Request.STATUS_CODE, statusCode);
if (statusAccept(acceptStatCode, statusCode)) {
......@@ -129,7 +139,7 @@ public class HttpClientDownloader extends AbstractDownloader {
return acceptStatCode.contains(statusCode);
}
protected HttpUriRequest getHttpUriRequest(Request request, Site site, Map<String, String> headers) {
protected HttpUriRequest getHttpUriRequest(Request request, Site site, Map<String, String> headers,HttpHost proxy) {
RequestBuilder requestBuilder = selectRequestMethod(request).setUri(request.getUrl());
if (headers != null) {
for (Map.Entry<String, String> headerEntry : headers.entrySet()) {
......@@ -141,14 +151,9 @@ public class HttpClientDownloader extends AbstractDownloader {
.setSocketTimeout(site.getTimeOut())
.setConnectTimeout(site.getTimeOut())
.setCookieSpec(CookieSpecs.BEST_MATCH);
if (site.getHttpProxyPool() != null && site.getHttpProxyPool().isEnable()) {
HttpHost host = site.getHttpProxyFromPool();
requestConfigBuilder.setProxy(host);
request.putExtra(Request.PROXY, host);
}else if(site.getHttpProxy()!= null){
HttpHost host = site.getHttpProxy();
requestConfigBuilder.setProxy(host);
request.putExtra(Request.PROXY, host);
if (proxy !=null) {
requestConfigBuilder.setProxy(proxy);
request.putExtra(Request.PROXY, proxy);
}
requestBuilder.setConfig(requestConfigBuilder.build());
return requestBuilder.build();
......
package us.codecraft.webmagic.downloader;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.HttpException;
import org.apache.http.HttpRequest;
import org.apache.http.HttpRequestInterceptor;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.CookieStore;
import org.apache.http.client.CredentialsProvider;
import org.apache.http.config.Registry;
import org.apache.http.config.RegistryBuilder;
import org.apache.http.config.SocketConfig;
......@@ -15,6 +19,7 @@ import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.apache.http.impl.cookie.BasicClientCookie;
import org.apache.http.protocol.HttpContext;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.proxy.Proxy;
import java.io.IOException;
import java.util.Map;
......@@ -41,12 +46,24 @@ public class HttpClientGenerator {
return this;
}
public CloseableHttpClient getClient(Site site) {
return generateClient(site);
public CloseableHttpClient getClient(Site site, Proxy proxy) {
return generateClient(site, proxy);
}
private CloseableHttpClient generateClient(Site site) {
HttpClientBuilder httpClientBuilder = HttpClients.custom().setConnectionManager(connectionManager);
private CloseableHttpClient generateClient(Site site, Proxy proxy) {
CredentialsProvider credsProvider = null;
HttpClientBuilder httpClientBuilder = HttpClients.custom();
if(proxy!=null && StringUtils.isNotBlank(proxy.getUser()) && StringUtils.isNotBlank(proxy.getPassword()))
{
credsProvider= new BasicCredentialsProvider();
credsProvider.setCredentials(
new AuthScope(proxy.getHttpHost().getAddress().getHostAddress(), proxy.getHttpHost().getPort()),
new UsernamePasswordCredentials(proxy.getUser(), proxy.getPassword()));
httpClientBuilder.setDefaultCredentialsProvider(credsProvider);
}
httpClientBuilder.setConnectionManager(connectionManager);
if (site != null && site.getUserAgent() != null) {
httpClientBuilder.setUserAgent(site.getUserAgent());
} else {
......@@ -61,7 +78,6 @@ public class HttpClientGenerator {
if (!request.containsHeader("Accept-Encoding")) {
request.addHeader("Accept-Encoding", "gzip");
}
}
});
}
......
package us.codecraft.webmagic.proxy;
import org.apache.http.HttpHost;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Delayed;
import java.util.concurrent.TimeUnit;
import org.apache.http.HttpHost;
/**
* >>>> Proxy lifecycle
......@@ -64,6 +64,9 @@ public class Proxy implements Delayed, Serializable {
public static final int SUCCESS = 200;
private final HttpHost httpHost;
private String user;
private String password;
private int reuseTimeInterval = 1500;// ms
private Long canReuseTime = 0L;
......@@ -76,13 +79,17 @@ public class Proxy implements Delayed, Serializable {
private List<Integer> failedErrorType = new ArrayList<Integer>();
Proxy(HttpHost httpHost) {
Proxy(HttpHost httpHost, String user, String password) {
this.httpHost = httpHost;
this.user = user;
this.password = password;
this.canReuseTime = System.nanoTime() + TimeUnit.NANOSECONDS.convert(reuseTimeInterval, TimeUnit.MILLISECONDS);
}
Proxy(HttpHost httpHost, int reuseInterval) {
Proxy(HttpHost httpHost, int reuseInterval, String user, String password) {
this.httpHost = httpHost;
this.user = user;
this.password = password;
this.canReuseTime = System.nanoTime() + TimeUnit.NANOSECONDS.convert(reuseInterval, TimeUnit.MILLISECONDS);
}
......@@ -170,6 +177,17 @@ public class Proxy implements Delayed, Serializable {
return re;
}
public String getUser()
{
return user;
}
public String getPassword()
{
return password;
}
public void borrowNumIncrement(int increment) {
this.borrowNum += increment;
......
......@@ -156,14 +156,14 @@ public class ProxyPool {
isEnable = true;
for (String[] s : httpProxyList) {
try {
if (allProxy.containsKey(s[0])) {
if (allProxy.containsKey(s[2])) {
continue;
}
HttpHost item = new HttpHost(InetAddress.getByName(s[0]), Integer.valueOf(s[1]));
HttpHost item = new HttpHost(InetAddress.getByName(s[2]), Integer.valueOf(s[3]));
if (!validateWhenInit || ProxyUtils.validateProxy(item)) {
Proxy p = new Proxy(item, reuseInterval);
Proxy p = new Proxy(item, reuseInterval, s[0], s[1]);
proxyQueue.add(p);
allProxy.put(s[0], p);
allProxy.put(s[2], p);
}
} catch (NumberFormatException e) {
logger.error("HttpHost init error:", e);
......@@ -174,7 +174,7 @@ public class ProxyPool {
logger.info("proxy pool size>>>>" + allProxy.size());
}
public HttpHost getProxy() {
public Proxy getProxy() {
Proxy proxy = null;
try {
Long time = System.currentTimeMillis();
......@@ -192,7 +192,7 @@ public class ProxyPool {
if (proxy == null) {
throw new NoSuchElementException();
}
return proxy.getHttpHost();
return proxy;
}
public void returnProxy(HttpHost host, int statusCode) {
......
......@@ -90,12 +90,12 @@ public class HttpClientDownloaderTest {
private String getCharsetByUrl(String url) {
HttpClientDownloader downloader = new HttpClientDownloader();
Site site = Site.me();
CloseableHttpClient httpClient = new HttpClientGenerator().getClient(site);
CloseableHttpClient httpClient = new HttpClientGenerator().getClient(site, null);
// encoding in http header Content-Type
Request requestGBK = new Request(url);
CloseableHttpResponse httpResponse = null;
try {
httpResponse = httpClient.execute(downloader.getHttpUriRequest(requestGBK, site, null));
httpResponse = httpClient.execute(downloader.getHttpUriRequest(requestGBK, site, null,null));
} catch (IOException e) {
e.printStackTrace();
}
......
......@@ -22,9 +22,9 @@ public class ProxyTest {
public static void before() {
// String[] source = { "0.0.0.1:0", "0.0.0.2:0", "0.0.0.3:0",
// "0.0.0.4:0" };
String[] source = { "0.0.0.1:0", "0.0.0.2:0", "0.0.0.3:0", "0.0.0.4:0" };
String[] source = { "::0.0.0.1:0", "::0.0.0.2:0", "::0.0.0.3:0", "::0.0.0.4:0" };
for (String line : source) {
httpProxyList.add(new String[] { line.split(":")[0], line.split(":")[1] });
httpProxyList.add(new String[] {line.split(":")[0], line.split(":")[1], line.split(":")[2], line.split(":")[3] });
}
}
......@@ -37,7 +37,8 @@ public class ProxyTest {
for (int i = 0; i < 2; i++) {
List<Fetch> fetchList = new ArrayList<Fetch>();
while (proxyPool.getIdleNum() != 0) {
HttpHost httphost = proxyPool.getProxy();
Proxy proxy = proxyPool.getProxy();
HttpHost httphost = proxy.getHttpHost();
// httphostList.add(httphost);
System.out.println(httphost.getHostName() + ":" + httphost.getPort());
Fetch tmp = new Fetch(httphost);
......@@ -69,4 +70,5 @@ public class ProxyTest {
}
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment