Commit e87aabf8 authored by yihua.huang's avatar yihua.huang

为downloader增加了一个新方法,可设置线程数

parent 6a87a778
......@@ -58,6 +58,8 @@ public class Spider implements Runnable, Task {
private ExecutorService executorService;
private int threadNum = 1;
private AtomicInteger stat = new AtomicInteger(STAT_INIT);
private final static int STAT_INIT = 0;
......@@ -144,6 +146,10 @@ public class Spider implements Runnable, Task {
if (downloader == null) {
this.downloader = new HttpClientDownloader();
}
if (pipelines.isEmpty()) {
pipelines.add(new ConsolePipeline());
}
downloader.setThread(threadNum);
}
@Override
......@@ -158,9 +164,6 @@ public class Spider implements Runnable, Task {
}
}
Request request = scheduler.poll(this);
if (pipelines.isEmpty()) {
pipelines.add(new ConsolePipeline());
}
//singel thread
if (executorService == null) {
while (request != null) {
......@@ -211,9 +214,9 @@ public class Spider implements Runnable, Task {
}
}
private void destroyEach(Object object){
private void destroyEach(Object object) {
if (object instanceof Destroyable) {
((Destroyable)object).destroy();
((Destroyable) object).destroy();
}
}
......@@ -267,12 +270,10 @@ public class Spider implements Runnable, Task {
*/
public Spider thread(int threadNum) {
checkIfNotRunning();
this.threadNum = threadNum;
if (threadNum <= 0) {
throw new IllegalArgumentException("threadNum should be more than one!");
}
if (downloader==null || downloader instanceof HttpClientDownloader){
downloader = new HttpClientDownloader(threadNum);
}
if (threadNum == 1) {
return this;
}
......
......@@ -6,9 +6,10 @@ import us.codecraft.webmagic.Task;
/**
* Downloader是webmagic下载页面的接口。webmagic默认使用了HttpComponent作为下载器,一般情况,你无需自己实现这个接口。<br>
*
* @author code4crafter@gmail.com <br>
* Date: 13-4-21
* Time: 下午12:14
* Date: 13-4-21
* Time: 下午12:14
*/
public interface Downloader {
......@@ -20,4 +21,12 @@ public interface Downloader {
* @return page
*/
public Page download(Request request, Task task);
/**
* 设置线程数,多线程程序一般需要Downloader支持<br>
* 如果不考虑多线程的可以不实现这个方法<br>
*
* @param thread 线程数量
*/
public void setThread(int thread);
}
......@@ -67,6 +67,11 @@ public class FileDownloader implements Downloader {
return page;
}
@Override
public void setThread(int thread) {
}
private String getHtml(BufferedReader bufferedReader) throws IOException {
String line;
StringBuilder htmlBuilder= new StringBuilder();
......
......@@ -32,14 +32,6 @@ public class HttpClientDownloader implements Downloader {
private int poolSize;
public HttpClientDownloader(int poolSize) {
this.poolSize = poolSize;
}
public HttpClientDownloader() {
this(5);
}
@Override
public Page download(Request request, Task task) {
Site site = task.getSite();
......@@ -90,6 +82,11 @@ public class HttpClientDownloader implements Downloader {
return null;
}
@Override
public void setThread(int thread) {
poolSize=thread;
}
private void handleGzip(HttpResponse httpResponse) {
Header ceheader = httpResponse.getEntity().getContentEncoding();
if (ceheader != null) {
......
......@@ -27,12 +27,14 @@ import java.util.Map;
*/
public class SeleniumDownloader implements Downloader, Destroyable {
private WebDriverPool webDriverPool;
private volatile WebDriverPool webDriverPool;
private Logger logger = Logger.getLogger(getClass());
private int sleepTime = 0;
private int poolSize = 1;
/**
* 新建
*
......@@ -40,16 +42,11 @@ public class SeleniumDownloader implements Downloader, Destroyable {
*/
public SeleniumDownloader(String chromeDriverPath) {
System.getProperties().setProperty("webdriver.chrome.driver", chromeDriverPath);
webDriverPool = new WebDriverPool();
}
public SeleniumDownloader(String chromeDriverPath, int poolSize) {
System.getProperties().setProperty("webdriver.chrome.driver", chromeDriverPath);
webDriverPool = new WebDriverPool(poolSize);
}
/**
* set sleep time to wait until load success
*
* @param sleepTime
* @return this
*/
......@@ -60,6 +57,7 @@ public class SeleniumDownloader implements Downloader, Destroyable {
@Override
public Page download(Request request, Task task) {
checkInit();
WebDriver webDriver;
try {
webDriver = webDriverPool.get();
......@@ -93,6 +91,19 @@ public class SeleniumDownloader implements Downloader, Destroyable {
return page;
}
private void checkInit() {
if (webDriverPool == null) {
synchronized (this){
webDriverPool = new WebDriverPool(poolSize);
}
}
}
@Override
public void setThread(int thread) {
this.poolSize = thread;
}
@Override
public void destroy() {
webDriverPool.closeAll();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment