Commit 1cfbd13a authored by yihua.huang's avatar yihua.huang

refacor in httpclientdownloader

parent 83ada974
package us.codecraft.webmagic.downloader; package us.codecraft.webmagic.downloader;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.http.Header;
import org.apache.http.HttpHost;
import org.apache.http.HttpResponse; import org.apache.http.HttpResponse;
import org.apache.http.annotation.ThreadSafe; import org.apache.http.annotation.ThreadSafe;
import org.apache.http.auth.AuthState;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.CookieStore;
import org.apache.http.client.config.CookieSpecs;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.client.methods.RequestBuilder;
import org.apache.http.client.protocol.HttpClientContext;
import org.apache.http.cookie.Cookie;
import org.apache.http.impl.auth.BasicScheme;
import org.apache.http.impl.client.BasicCookieStore;
import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.protocol.BasicHttpContext;
import org.apache.http.protocol.HttpContext;
import org.apache.http.util.EntityUtils; import org.apache.http.util.EntityUtils;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -30,16 +12,15 @@ import us.codecraft.webmagic.Page; ...@@ -30,16 +12,15 @@ import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Request; import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Site; import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Task; import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.proxy.Proxy;
import us.codecraft.webmagic.proxy.ProxyProvider; import us.codecraft.webmagic.proxy.ProxyProvider;
import us.codecraft.webmagic.selector.PlainText; import us.codecraft.webmagic.selector.PlainText;
import us.codecraft.webmagic.utils.CharsetUtils; import us.codecraft.webmagic.utils.CharsetUtils;
import us.codecraft.webmagic.utils.HttpClientUtils; import us.codecraft.webmagic.utils.HttpClientUtils;
import us.codecraft.webmagic.utils.HttpConstant;
import java.io.IOException; import java.io.IOException;
import java.nio.charset.Charset; import java.nio.charset.Charset;
import java.util.*; import java.util.HashMap;
import java.util.Map;
/** /**
...@@ -96,33 +77,12 @@ public class HttpClientDownloader extends AbstractDownloader { ...@@ -96,33 +77,12 @@ public class HttpClientDownloader extends AbstractDownloader {
} }
logger.debug("downloading page {}", request.getUrl()); logger.debug("downloading page {}", request.getUrl());
CloseableHttpResponse httpResponse = null; CloseableHttpResponse httpResponse = null;
int statusCode = 0;
Site site = task.getSite(); Site site = task.getSite();
Proxy proxy = null;
HttpClientContext httpContext = new HttpClientContext();
if (proxyProvider != null) {
proxy = proxyProvider.getProxy(task);
AuthState authState = new AuthState();
authState.update(new BasicScheme(), new UsernamePasswordCredentials(proxy.getUsername(), proxy.getPassword()));
httpContext.setAttribute(HttpClientContext.PROXY_AUTH_STATE, authState);
}
CloseableHttpClient httpClient = getHttpClient(site); CloseableHttpClient httpClient = getHttpClient(site);
HttpUriRequest httpUriRequest = httpUriRequestConverter.convert(request, site, proxy); HttpClientRequestContext requestContext = httpUriRequestConverter.convert(request, site, proxyProvider != null ? proxyProvider.getProxy(task) : null);
if (request.getCookies() != null && CollectionUtils.isNotEmpty(request.getCookies())) {
CookieStore cookieStore = new BasicCookieStore();
for (Cookie c : request.getCookies()) {
cookieStore.addCookie(c);
}
httpContext.setCookieStore(cookieStore);
}
if (request.getHeaders() != null && CollectionUtils.isNotEmpty(request.getHeaders())) {
for (Header h : request.getHeaders()) {
httpUriRequest.setHeader(h);
}
}
try { try {
httpResponse = httpClient.execute(httpUriRequest, httpContext); httpResponse = httpClient.execute(requestContext.getHttpUriRequest(), requestContext.getHttpClientContext());
statusCode = httpResponse.getStatusLine().getStatusCode(); int statusCode = httpResponse.getStatusLine().getStatusCode();
if (site.getAcceptStatCode().contains(statusCode)) { if (site.getAcceptStatCode().contains(statusCode)) {
Page page = handleResponse(request, site.getCharset(), httpResponse, task); Page page = handleResponse(request, site.getCharset(), httpResponse, task);
onSuccess(request); onSuccess(request);
......
package us.codecraft.webmagic.downloader;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.client.protocol.HttpClientContext;
/**
* @author code4crafter@gmail.com
* Date: 17/4/8
* Time: 19:43
*/
public class HttpClientRequestContext {
private HttpUriRequest httpUriRequest;
private HttpClientContext httpClientContext;
public HttpUriRequest getHttpUriRequest() {
return httpUriRequest;
}
public void setHttpUriRequest(HttpUriRequest httpUriRequest) {
this.httpUriRequest = httpUriRequest;
}
public HttpClientContext getHttpClientContext() {
return httpClientContext;
}
public void setHttpClientContext(HttpClientContext httpClientContext) {
this.httpClientContext = httpClientContext;
}
}
package us.codecraft.webmagic.downloader; package us.codecraft.webmagic.downloader;
import org.apache.commons.collections.CollectionUtils;
import org.apache.http.Header;
import org.apache.http.HttpHost; import org.apache.http.HttpHost;
import org.apache.http.auth.AuthState;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.CookieStore;
import org.apache.http.client.config.CookieSpecs; import org.apache.http.client.config.CookieSpecs;
import org.apache.http.client.config.RequestConfig; import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.HttpUriRequest; import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.client.methods.RequestBuilder; import org.apache.http.client.methods.RequestBuilder;
import org.apache.http.client.protocol.HttpClientContext;
import org.apache.http.cookie.Cookie;
import org.apache.http.entity.ByteArrayEntity; import org.apache.http.entity.ByteArrayEntity;
import org.apache.http.impl.auth.BasicScheme;
import org.apache.http.impl.client.BasicCookieStore;
import us.codecraft.webmagic.Request; import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Site; import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.proxy.Proxy; import us.codecraft.webmagic.proxy.Proxy;
...@@ -20,7 +29,29 @@ import java.util.Map; ...@@ -20,7 +29,29 @@ import java.util.Map;
*/ */
public class HttpUriRequestConverter { public class HttpUriRequestConverter {
public HttpUriRequest convert(Request request, Site site, Proxy proxy) { public HttpClientRequestContext convert(Request request, Site site, Proxy proxy) {
HttpClientRequestContext httpClientRequestContext = new HttpClientRequestContext();
httpClientRequestContext.setHttpUriRequest(convertHttpUriRequest(request, site, proxy));
httpClientRequestContext.setHttpClientContext(convertHttpClientContext(request, site, proxy));
return httpClientRequestContext;
}
private HttpClientContext convertHttpClientContext(Request request, Site site, Proxy proxy) {
HttpClientContext httpContext = new HttpClientContext();
AuthState authState = new AuthState();
authState.update(new BasicScheme(), new UsernamePasswordCredentials(proxy.getUsername(), proxy.getPassword()));
httpContext.setAttribute(HttpClientContext.PROXY_AUTH_STATE, authState);
if (request.getCookies() != null && CollectionUtils.isNotEmpty(request.getCookies())) {
CookieStore cookieStore = new BasicCookieStore();
for (Cookie c : request.getCookies()) {
cookieStore.addCookie(c);
}
httpContext.setCookieStore(cookieStore);
}
return httpContext;
}
private HttpUriRequest convertHttpUriRequest(Request request, Site site, Proxy proxy) {
RequestBuilder requestBuilder = selectRequestMethod(request).setUri(request.getUrl()); RequestBuilder requestBuilder = selectRequestMethod(request).setUri(request.getUrl());
if (site.getHeaders() != null) { if (site.getHeaders() != null) {
for (Map.Entry<String, String> headerEntry : site.getHeaders().entrySet()) { for (Map.Entry<String, String> headerEntry : site.getHeaders().entrySet()) {
...@@ -40,7 +71,13 @@ public class HttpUriRequestConverter { ...@@ -40,7 +71,13 @@ public class HttpUriRequestConverter {
requestConfigBuilder.setProxy(new HttpHost(proxy.getHost(), proxy.getPort())); requestConfigBuilder.setProxy(new HttpHost(proxy.getHost(), proxy.getPort()));
} }
requestBuilder.setConfig(requestConfigBuilder.build()); requestBuilder.setConfig(requestConfigBuilder.build());
return requestBuilder.build(); HttpUriRequest httpUriRequest = requestBuilder.build();
if (request.getHeaders() != null && CollectionUtils.isNotEmpty(request.getHeaders())) {
for (Header h : request.getHeaders()) {
httpUriRequest.setHeader(h);
}
}
return httpUriRequest;
} }
private RequestBuilder selectRequestMethod(Request request) { private RequestBuilder selectRequestMethod(Request request) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment