Commit 1d86f7c0 authored by yihua.huang's avatar yihua.huang

compile passed in httpclientDownloader

parent b71f3795
......@@ -18,7 +18,6 @@ public class Request implements Serializable {
private static final long serialVersionUID = 2062192774891352043L;
public static final String CYCLE_TRIED_TIMES = "_cycle_tried_times";
public static final String STATUS_CODE = "statusCode";
public static final String PROXY = "proxy";
private String url;
......
......@@ -419,8 +419,6 @@ public class Spider implements Runnable, Task {
pipeline.process(page.getResultItems(), this);
}
}
//for proxy status management
request.putExtra(Request.STATUS_CODE, page.getStatusCode());
sleep(site.getSleepTime());
}
......
package us.codecraft.webmagic.downloader;
import org.apache.commons.io.IOUtils;
import org.apache.http.HttpHost;
import org.apache.http.HttpResponse;
import org.apache.http.annotation.ThreadSafe;
import org.apache.http.auth.AuthState;
......@@ -23,13 +22,11 @@ import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.proxy.Proxy;
import us.codecraft.webmagic.selector.PlainText;
import us.codecraft.webmagic.utils.CharsetUtils;
import us.codecraft.webmagic.utils.WMCollections;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
/**
......@@ -80,28 +77,22 @@ public class HttpClientDownloader extends AbstractDownloader {
CloseableHttpResponse httpResponse = null;
int statusCode = 0;
Site site = task.getSite();
try {
Proxy proxy = null;
if (site.getHttpProxyPool() != null && site.getHttpProxyPool().isEnable()) {
proxy = site.getHttpProxyFromPool();
} else if (site != null && site.getHttpProxy() != null){
proxy = site.getHttpProxy();
request.putExtra(Request.PROXY, site.getHttpProxy());
}
Proxy proxy = null;
HttpContext httpContext = new BasicHttpContext();
if (site.getHttpProxyPool() != null && site.getHttpProxyPool().isEnable()) {
proxy = site.getHttpProxyFromPool();
request.putExtra(Request.PROXY, proxy);
HttpContext httpContext = new BasicHttpContext();
HttpUriRequest httpUriRequest = httpUriRequestConverter.convert(request,site);
AuthState authState = new AuthState();
authState.update(new BasicScheme(), new UsernamePasswordCredentials("userName", "password"));
authState.update(new BasicScheme(), new UsernamePasswordCredentials(proxy.getUsername(), proxy.getPassword()));
httpContext.setAttribute(HttpClientContext.PROXY_AUTH_STATE, authState);
CloseableHttpClient httpClient = getHttpClient(site, proxy);
}
HttpUriRequest httpUriRequest = httpUriRequestConverter.convert(request, site);
CloseableHttpClient httpClient = getHttpClient(site);
try {
httpResponse = httpClient.execute(httpUriRequest, httpContext);
statusCode = httpResponse.getStatusLine().getStatusCode();
request.putExtra(Request.STATUS_CODE, statusCode);
if (statusAccept(acceptStatCode, statusCode)) {
Page page = handleResponse(request, charset, httpResponse, task);
if (site.getAcceptStatCode().contains(statusCode)) {
Page page = handleResponse(request, site.getCharset(), httpResponse, task);
onSuccess(request);
return page;
} else {
......@@ -120,10 +111,8 @@ public class HttpClientDownloader extends AbstractDownloader {
//ensure the connection is released back to pool
EntityUtils.consumeQuietly(httpResponse.getEntity());
}
request.putExtra(Request.STATUS_CODE, statusCode);
if (site != null && site.getHttpProxyPool() != null && site.getHttpProxyPool().isEnable()) {
site.returnHttpProxyToPool((HttpHost) request.getExtra(Request.PROXY), (Integer) request
.getExtra(Request.STATUS_CODE));
if (proxy != null) {
site.getHttpProxyPool().returnProxy(proxy, statusCode);
}
}
}
......@@ -133,10 +122,6 @@ public class HttpClientDownloader extends AbstractDownloader {
httpClientGenerator.setPoolSize(thread);
}
protected boolean statusAccept(Set<Integer> acceptStatCode, int statusCode) {
return acceptStatCode.contains(statusCode);
}
protected Page handleResponse(Request request, String charset, HttpResponse httpResponse, Task task) throws IOException {
String content = getContent(charset, httpResponse);
Page page = new Page();
......
......@@ -10,7 +10,6 @@ import org.apache.http.client.methods.RequestBuilder;
import org.apache.http.message.BasicNameValuePair;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.proxy.Proxy;
import us.codecraft.webmagic.utils.HttpConstant;
import java.nio.charset.Charset;
......@@ -26,7 +25,7 @@ import java.util.Map;
*/
public class HttpUriRequestConverter {
public HttpUriRequest convert(Request request, Site site, Proxy proxy) {
public HttpUriRequest convert(Request request, Site site) {
return null;
}
......
......@@ -7,12 +7,12 @@ package us.codecraft.webmagic.proxy;
public class Proxy {
private ProxyHost proxyHost;
private String user;
private String username;
private String password;
public Proxy(ProxyHost proxyHost, String user, String password) {
public Proxy(ProxyHost proxyHost, String username, String password) {
this.proxyHost = proxyHost;
this.user = user;
this.username = username;
this.password = password;
}
......@@ -28,12 +28,12 @@ public class Proxy {
this.proxyHost = proxyHost;
}
public String getUser() {
return user;
public String getUsername() {
return username;
}
public void setUser(String user) {
this.user = user;
public void setUsername(String username) {
this.username = username;
}
public String getPassword() {
......
package us.codecraft.webmagic.proxy;
import org.apache.http.HttpHost;
/**
* Created by edwardsbean on 15-2-28.
*/
public interface ProxyPool {
void returnProxy(HttpHost host, int statusCode);
void returnProxy(Proxy proxy, int statusCode);
Proxy getProxy();
......
......@@ -7,8 +7,6 @@ import us.codecraft.webmagic.utils.FilePersistentBase;
import us.codecraft.webmagic.utils.ProxyUtils;
import java.io.*;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.*;
import java.util.Map.Entry;
import java.util.concurrent.BlockingQueue;
......@@ -156,7 +154,7 @@ public class TimerReuseProxyPool implements ProxyPool {
isEnable = true;
for (Proxy proxy : httpProxyList) {
if (!validateWhenInit || ProxyUtils.validateProxy(proxy.getProxyHost())) {
TimerReuseProxy p = new TimerReuseProxy(proxy.getProxyHost(), proxy.getUser(), proxy.getPassword(), reuseInterval);
TimerReuseProxy p = new TimerReuseProxy(proxy.getProxyHost(), proxy.getUsername(), proxy.getPassword(), reuseInterval);
proxyQueue.add(p);
allProxy.put(p.getProxyHost().getHost(), p);
}
......@@ -185,8 +183,8 @@ public class TimerReuseProxyPool implements ProxyPool {
return proxy;
}
public void returnProxy(HttpHost host, int statusCode) {
TimerReuseProxy p = allProxy.get(host.getAddress().getHostAddress());
public void returnProxy(Proxy proxy, int statusCode) {
TimerReuseProxy p = allProxy.get(proxy.getProxyHost());
if (p == null) {
return;
}
......@@ -202,13 +200,13 @@ public class TimerReuseProxyPool implements ProxyPool {
// banned,try longer interval
p.fail(TimerReuseProxy.ERROR_403);
p.setReuseTimeInterval(reuseInterval * p.getFailedNum());
logger.info(host + " >>>> reuseTimeInterval is >>>> " + p.getReuseTimeInterval() / 1000.0);
logger.info(proxy + " >>>> reuseTimeInterval is >>>> " + p.getReuseTimeInterval() / 1000.0);
break;
case TimerReuseProxy.ERROR_BANNED:
p.fail(TimerReuseProxy.ERROR_BANNED);
p.setReuseTimeInterval(10 * 60 * 1000 * p.getFailedNum());
logger.warn("this proxy is banned >>>> " + p.getHttpHost());
logger.info(host + " >>>> reuseTimeInterval is >>>> " + p.getReuseTimeInterval() / 1000.0);
logger.info(proxy + " >>>> reuseTimeInterval is >>>> " + p.getReuseTimeInterval() / 1000.0);
break;
case TimerReuseProxy.ERROR_404:
// p.fail(Proxy.ERROR_404);
......@@ -220,13 +218,13 @@ public class TimerReuseProxyPool implements ProxyPool {
}
if (p.getFailedNum() > 20) {
p.setReuseTimeInterval(reviveTime);
logger.error("remove proxy >>>> " + host + ">>>>" + p.getFailedType() + " >>>> remain proxy >>>> " + proxyQueue.size());
logger.error("remove proxy >>>> " + proxy + ">>>>" + p.getFailedType() + " >>>> remain proxy >>>> " + proxyQueue.size());
return;
}
if (p.getFailedNum() > 0 && p.getFailedNum() % 5 == 0) {
if (!ProxyUtils.validateProxy(host)) {
if (!ProxyUtils.validateProxy(proxy)) {
p.setReuseTimeInterval(reviveTime);
logger.error("remove proxy >>>> " + host + ">>>>" + p.getFailedType() + " >>>> remain proxy >>>> " + proxyQueue.size());
logger.error("remove proxy >>>> " + proxy + ">>>>" + p.getFailedType() + " >>>> remain proxy >>>> " + proxyQueue.size());
return;
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment