Commit 067f3ea0 authored by yihua.huang's avatar yihua.huang

add some null pointer check for httpclientdownloader

parent 6c61c547
...@@ -17,6 +17,8 @@ import us.codecraft.webmagic.selector.PlainText; ...@@ -17,6 +17,8 @@ import us.codecraft.webmagic.selector.PlainText;
import us.codecraft.webmagic.utils.UrlUtils; import us.codecraft.webmagic.utils.UrlUtils;
import java.io.IOException; import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
/** /**
...@@ -34,10 +36,23 @@ public class HttpClientDownloader implements Downloader { ...@@ -34,10 +36,23 @@ public class HttpClientDownloader implements Downloader {
@Override @Override
public Page download(Request request, Task task) { public Page download(Request request, Task task) {
Site site = task.getSite(); Site site = null;
if (task != null) {
site = task.getSite();
}
int retryTimes = 0;
Set<Integer> acceptStatCode;
String charset = null;
if (site != null) {
retryTimes = site.getRetryTimes();
acceptStatCode = site.getAcceptStatCode();
charset = site.getCharset();
} else {
acceptStatCode = new HashSet<Integer>();
acceptStatCode.add(200);
}
logger.info("downloading page " + request.getUrl()); logger.info("downloading page " + request.getUrl());
HttpClient httpClient = HttpClientPool.getInstance(poolSize).getClient(site); HttpClient httpClient = HttpClientPool.getInstance(poolSize).getClient(site);
String charset = site.getCharset();
try { try {
HttpGet httpGet = new HttpGet(request.getUrl()); HttpGet httpGet = new HttpGet(request.getUrl());
HttpResponse httpResponse = null; HttpResponse httpResponse = null;
...@@ -49,7 +64,8 @@ public class HttpClientDownloader implements Downloader { ...@@ -49,7 +64,8 @@ public class HttpClientDownloader implements Downloader {
retry = false; retry = false;
} catch (IOException e) { } catch (IOException e) {
tried++; tried++;
if (tried > site.getRetryTimes()) {
if (tried > retryTimes) {
logger.warn("download page " + request.getUrl() + " error", e); logger.warn("download page " + request.getUrl() + " error", e);
return null; return null;
} }
...@@ -58,7 +74,7 @@ public class HttpClientDownloader implements Downloader { ...@@ -58,7 +74,7 @@ public class HttpClientDownloader implements Downloader {
} }
} while (retry); } while (retry);
int statusCode = httpResponse.getStatusLine().getStatusCode(); int statusCode = httpResponse.getStatusLine().getStatusCode();
if (site.getAcceptStatCode().contains(statusCode)) { if (acceptStatCode.contains(statusCode)) {
//charset //charset
if (charset == null) { if (charset == null) {
String value = httpResponse.getEntity().getContentType().getValue(); String value = httpResponse.getEntity().getContentType().getValue();
...@@ -66,7 +82,7 @@ public class HttpClientDownloader implements Downloader { ...@@ -66,7 +82,7 @@ public class HttpClientDownloader implements Downloader {
} }
// //
handleGzip(httpResponse); handleGzip(httpResponse);
return handleResponse(request, charset, httpResponse,task); return handleResponse(request, charset, httpResponse, task);
} else { } else {
logger.warn("code error " + statusCode + "\t" + request.getUrl()); logger.warn("code error " + statusCode + "\t" + request.getUrl());
} }
...@@ -76,7 +92,7 @@ public class HttpClientDownloader implements Downloader { ...@@ -76,7 +92,7 @@ public class HttpClientDownloader implements Downloader {
return null; return null;
} }
protected Page handleResponse(Request request, String charset, HttpResponse httpResponse,Task task) throws IOException { protected Page handleResponse(Request request, String charset, HttpResponse httpResponse, Task task) throws IOException {
String content = IOUtils.toString(httpResponse.getEntity().getContent(), String content = IOUtils.toString(httpResponse.getEntity().getContent(),
charset); charset);
Page page = new Page(); Page page = new Page();
...@@ -88,7 +104,7 @@ public class HttpClientDownloader implements Downloader { ...@@ -88,7 +104,7 @@ public class HttpClientDownloader implements Downloader {
@Override @Override
public void setThread(int thread) { public void setThread(int thread) {
poolSize=thread; poolSize = thread;
} }
private void handleGzip(HttpResponse httpResponse) { private void handleGzip(HttpResponse httpResponse) {
......
...@@ -50,24 +50,30 @@ public class HttpClientPool { ...@@ -50,24 +50,30 @@ public class HttpClientPool {
private HttpClient generateClient(Site site) { private HttpClient generateClient(Site site) {
HttpParams params = new BasicHttpParams(); HttpParams params = new BasicHttpParams();
params.setParameter(CoreProtocolPNames.USER_AGENT, site.getUserAgent()); if (site != null && site.getUserAgent() != null) {
params.setParameter(CoreProtocolPNames.USER_AGENT, site.getUserAgent());
}
params.setIntParameter(CoreConnectionPNames.SO_TIMEOUT, 1000); params.setIntParameter(CoreConnectionPNames.SO_TIMEOUT, 1000);
params.setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, 2000); params.setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, 2000);
HttpProtocolParamBean paramsBean = new HttpProtocolParamBean(params); HttpProtocolParamBean paramsBean = new HttpProtocolParamBean(params);
paramsBean.setVersion(HttpVersion.HTTP_1_1); paramsBean.setVersion(HttpVersion.HTTP_1_1);
paramsBean.setContentCharset(site.getCharset()); if (site != null && site.getCharset() != null) {
paramsBean.setContentCharset(site.getCharset());
}
paramsBean.setUseExpectContinue(false); paramsBean.setUseExpectContinue(false);
SchemeRegistry schemeRegistry = new SchemeRegistry(); SchemeRegistry schemeRegistry = new SchemeRegistry();
schemeRegistry.register(new Scheme("http", 80, PlainSocketFactory.getSocketFactory())); schemeRegistry.register(new Scheme("http", 80, PlainSocketFactory.getSocketFactory()));
schemeRegistry.register(new Scheme("https", 443, SSLSocketFactory.getSocketFactory())); schemeRegistry.register(new Scheme("https", 443, SSLSocketFactory.getSocketFactory()));
PoolingClientConnectionManager connectionManager = new PoolingClientConnectionManager(schemeRegistry); PoolingClientConnectionManager connectionManager = new PoolingClientConnectionManager(schemeRegistry);
connectionManager.setMaxTotal(poolSize); connectionManager.setMaxTotal(poolSize);
connectionManager.setDefaultMaxPerRoute(100); connectionManager.setDefaultMaxPerRoute(100);
DefaultHttpClient httpClient = new DefaultHttpClient(connectionManager, params); DefaultHttpClient httpClient = new DefaultHttpClient(connectionManager, params);
generateCookie(httpClient, site); if (site != null) {
generateCookie(httpClient, site);
}
httpClient.getParams().setIntParameter("http.socket.timeout", 60000); httpClient.getParams().setIntParameter("http.socket.timeout", 60000);
httpClient.getParams().setParameter(ClientPNames.COOKIE_POLICY, CookiePolicy.BEST_MATCH); httpClient.getParams().setParameter(ClientPNames.COOKIE_POLICY, CookiePolicy.BEST_MATCH);
return httpClient; return httpClient;
...@@ -75,10 +81,12 @@ public class HttpClientPool { ...@@ -75,10 +81,12 @@ public class HttpClientPool {
private void generateCookie(DefaultHttpClient httpClient, Site site) { private void generateCookie(DefaultHttpClient httpClient, Site site) {
CookieStore cookieStore = new BasicCookieStore(); CookieStore cookieStore = new BasicCookieStore();
for (Map.Entry<String, String> cookieEntry : site.getCookies().entrySet()) { if (site.getCookies() != null) {
BasicClientCookie cookie = new BasicClientCookie(cookieEntry.getKey(), cookieEntry.getValue()); for (Map.Entry<String, String> cookieEntry : site.getCookies().entrySet()) {
cookie.setDomain(site.getDomain()); BasicClientCookie cookie = new BasicClientCookie(cookieEntry.getKey(), cookieEntry.getValue());
cookieStore.addCookie(cookie); cookie.setDomain(site.getDomain());
cookieStore.addCookie(cookie);
}
} }
httpClient.setCookieStore(cookieStore); httpClient.setCookieStore(cookieStore);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment