Commit 6535928d authored by shenjunlin's avatar shenjunlin

加上超时机制

parent b051f3e9
......@@ -6,7 +6,7 @@
<version>7</version>
</parent>
<groupId>us.codecraft.duiba</groupId>
<version>0.8.0-SNAPSHOT</version>
<version>0.8.1-SNAPSHOT</version>
<modelVersion>4.0.0</modelVersion>
<packaging>pom</packaging>
<properties>
......
......@@ -3,7 +3,7 @@
<parent>
<groupId>us.codecraft.duiba</groupId>
<artifactId>webmagic-parent</artifactId>
<version>0.8.0-SNAPSHOT</version>
<version>0.8.1-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
......
......@@ -3,7 +3,7 @@
<parent>
<groupId>us.codecraft.duiba</groupId>
<artifactId>webmagic-parent</artifactId>
<version>0.8.0-SNAPSHOT</version>
<version>0.8.1-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
......
......@@ -21,6 +21,7 @@ import us.codecraft.webmagic.selector.PlainText;
import java.io.Closeable;
import java.io.IOException;
import java.util.Map;
import java.util.concurrent.TimeUnit;
/**
......@@ -45,7 +46,7 @@ public class SeleniumDownloader implements Downloader, Closeable {
private Logger logger = LoggerFactory.getLogger(getClass());
private int sleepTime = 20;
private int sleepTime = 100;
private int poolSize = 1;
......@@ -136,6 +137,7 @@ public class SeleniumDownloader implements Downloader, Closeable {
private Page downLoadPage(WebDriver webDriver, Task task, Request request){
logger.info("downloading page " + request.getUrl());
try {
webDriver.manage().timeouts().pageLoadTimeout(10, TimeUnit.SECONDS);
webDriver.get(request.getUrl());
Thread.sleep(sleepTime);
} catch (Exception e) {
......@@ -144,6 +146,7 @@ public class SeleniumDownloader implements Downloader, Closeable {
} else {
logger.error(e.getMessage(), e);
}
handleWebDriverAfterDownload(webDriver);
}
WebDriver.Options manage = webDriver.manage();
......
package us.codecraft.webmagic.downloader;
import io.github.bonigarcia.wdm.*;
import org.apache.commons.lang3.SystemUtils;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.edge.EdgeDriver;
......@@ -11,9 +10,6 @@ import org.openqa.selenium.opera.OperaDriver;
import org.openqa.selenium.phantomjs.PhantomJSDriver;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.proxy.ProxyProvider;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
......@@ -104,11 +100,12 @@ public class WebDriverPool {
*/
public WebDriver get() throws InterruptedException {
checkRunning();
logger.info("11现在的driver{}", webDriverList.size());
WebDriver poll = innerQueue.poll();
if (poll != null) {
return poll;
}
logger.info("现在的driver{}", webDriverList.size());
logger.info("22现在的driver{}", webDriverList.size());
if (webDriverList.size() < capacity) {
synchronized (webDriverList) {
if (webDriverList.size() < capacity) {
......
......@@ -3,7 +3,7 @@
<parent>
<groupId>us.codecraft.duiba</groupId>
<artifactId>webmagic-parent</artifactId>
<version>0.8.0-SNAPSHOT</version>
<version>0.8.1-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
......
......@@ -51,29 +51,31 @@ public class WeiboTopSpider implements PageProcessor {
public static void main(String[] args) throws JMException {
// SeleniumDownloader downloader = new SeleniumDownloader();
// downloader.setProxyProvider(new Data5UProxyProvider());
ProxyProvider proxyProvider = SimpleProxyProvider.from(new Proxy("116.233.89.22",8060));
// downloader.setProxyProvider(SimpleProxyProvider.from(new Proxy("116.233.89.22",8060)));
ScheduledExecutorService executorService = Executors.newSingleThreadScheduledExecutor();
// Spider weiboSpider = Spider.create(new WeiboTopSpider())
//// .addUrl("http://s.weibo.com/top/summary").setDownloader(new SeleniumDownloader())
//// .addUrl("http://www.baidu.com","http://www.163.com","http://www.sina.com.cn/","http://www.qq.com/","http://www.iqiyi.com/").setDownloader(new SeleniumDownloader())
// .addUrl("http://www.sina.com.cn/").setDownloader(new SeleniumDownloader())
// .thread(1);
// weiboSpider.start();
for(int i = 0; i< 10 ; i++) {
for (int i =0; i < 2; i++) {
executorService.scheduleAtFixedRate(new Runnable() {
@Override
public void run() {
SeleniumDownloader seleniumDownloader = new SeleniumDownloader();
seleniumDownloader.setProxyProvider(proxyProvider);
// SeleniumDownloader seleniumDownloader = new SeleniumDownloader();
// seleniumDownloader.setProxyProvider(proxyProvider);
Spider weiboSpider = Spider.create(new WeiboTopSpider())
.addUrl("http://s.weibo.com/top/summary").setDownloader(seleniumDownloader)
.addUrl("http://s.weibo.com/top/summary").setDownloader(new SeleniumDownloader())
// .addUrl("http://127.1.1.1:9090/").setDownloader(new SeleniumDownloader())
.thread(1);
weiboSpider.start();
}
}, 1, 30, TimeUnit.SECONDS);
}, 1, 60, TimeUnit.SECONDS);
}
}
}
}
......@@ -3,7 +3,7 @@
<parent>
<groupId>us.codecraft.duiba</groupId>
<artifactId>webmagic-parent</artifactId>
<version>0.8.0-SNAPSHOT</version>
<version>0.8.1-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
......
......@@ -3,7 +3,7 @@
<parent>
<groupId>us.codecraft.duiba</groupId>
<artifactId>webmagic-parent</artifactId>
<version>0.8.0-SNAPSHOT</version>
<version>0.8.1-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment