Commit 86a20eab authored by yihua.huang's avatar yihua.huang

fix a httpclient pool size bug

parent fcb09f2e
......@@ -39,7 +39,7 @@ import java.util.concurrent.atomic.AtomicInteger;
*/
public class Spider implements Runnable, Task {
private Downloader downloader = new HttpClientDownloader();
private Downloader downloader;
private List<Pipeline> pipelines = new ArrayList<Pipeline>();
......@@ -139,12 +139,18 @@ public class Spider implements Runnable, Task {
return this;
}
protected void checkComponent() {
if (downloader == null) {
this.downloader = new HttpClientDownloader();
}
}
@Override
public void run() {
if (!stat.compareAndSet(STAT_INIT, STAT_RUNNING)) {
throw new IllegalStateException("Spider is already running!");
}
checkComponent();
if (startUrls != null) {
for (String startUrl : startUrls) {
scheduler.push(new Request(startUrl), this);
......@@ -247,6 +253,7 @@ public class Spider implements Runnable, Task {
if (threadNum <= 0) {
throw new IllegalArgumentException("threadNum should be more than one!");
}
downloader = new HttpClientDownloader(threadNum);
if (threadNum == 1) {
return this;
}
......
......@@ -21,6 +21,7 @@ import java.io.IOException;
/**
* 封装了HttpClient的下载器。已实现指定次数重试、处理gzip、自定义UA/cookie等功能。<br>
*
* @author code4crafter@gmail.com <br>
* Date: 13-4-21
* Time: 下午12:15
......@@ -29,11 +30,21 @@ public class HttpClientDownloader implements Downloader {
private Logger logger = Logger.getLogger(getClass());
private int poolSize;
public HttpClientDownloader(int poolSize) {
this.poolSize = poolSize;
}
public HttpClientDownloader() {
this(5);
}
@Override
public Page download(Request request, Task task) {
Site site = task.getSite();
logger.info("downloading page " + request.getUrl());
HttpClient httpClient = HttpClientPool.getInstance().getClient(site);
HttpClient httpClient = HttpClientPool.getInstance(poolSize).getClient(site);
String charset = site.getCharset();
try {
HttpGet httpGet = new HttpGet(request.getUrl());
......@@ -50,7 +61,7 @@ public class HttpClientDownloader implements Downloader {
logger.warn("download page " + request.getUrl() + " error", e);
return null;
}
logger.info("download page " + request.getUrl() + " error, retry the "+tried+" time!");
logger.info("download page " + request.getUrl() + " error, retry the " + tried + " time!");
retry = true;
}
} while (retry);
......
......@@ -24,9 +24,16 @@ import java.util.Map;
*/
public class HttpClientPool {
public static final HttpClientPool INSTANCE = new HttpClientPool(5);
public static volatile HttpClientPool INSTANCE;
public static HttpClientPool getInstance() {
public static HttpClientPool getInstance(int poolSize) {
if (INSTANCE == null) {
synchronized (HttpClientPool.class) {
if (INSTANCE == null) {
INSTANCE = new HttpClientPool(poolSize);
}
}
}
return INSTANCE;
}
......@@ -48,7 +55,7 @@ public class HttpClientPool {
HttpProtocolParamBean paramsBean = new HttpProtocolParamBean(params);
paramsBean.setVersion(HttpVersion.HTTP_1_1);
paramsBean.setContentCharset("UTF-8");
paramsBean.setContentCharset(site.getCharset());
paramsBean.setUseExpectContinue(false);
SchemeRegistry schemeRegistry = new SchemeRegistry();
......
......@@ -27,8 +27,8 @@ public class GlobalProcessor implements PageProcessor {
@Override
public Site getSite() {
if (site==null){
site = Site.me().setDomain("www.2345.com")
if (site == null) {
site = Site.me().setDomain("www.2345.com").setSleepTime(0)
.addStartUrl("http://www.2345.com/").addStartUrl("http://hao.360.cn/")
.addStartUrl("http://www.baidu.com/s?wd=%E7%BD%91%E7%AB%99%E5%AF%BC%E8%88%AA&rsv_spt=1&issp=1&rsv_bp=0&ie=utf-8&tn=80039098_oem_dg&rsv_n=2&rsv_sug3=6&rsv_sug4=698&rsv_sug=0&rsv_sug1=3")
.setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment