Commit 6a87a778 authored by yihua.huang's avatar yihua.huang

add selenium download timeout

parent 18fefa0c
......@@ -20,18 +20,22 @@ import java.util.Map;
/**
* 使用Selenium调用浏览器进行渲染。目前仅支持chrome。<br>
* 需要下载Selenium driver支持。<br>
*
* @author yihua.huang@dianping.com <br>
* @date: 13-7-26 <br>
* Time: 下午1:37 <br>
*/
public class SeleniumDownloader implements Downloader,Destroyable {
public class SeleniumDownloader implements Downloader, Destroyable {
private WebDriverPool webDriverPool;
private Logger logger = Logger.getLogger(getClass());
private int sleepTime = 0;
/**
* 新建
*
* @param chromeDriverPath
*/
public SeleniumDownloader(String chromeDriverPath) {
......@@ -44,6 +48,16 @@ public class SeleniumDownloader implements Downloader,Destroyable {
webDriverPool = new WebDriverPool(poolSize);
}
/**
* set sleep time to wait until load success
* @param sleepTime
* @return this
*/
public SeleniumDownloader setSleepTime(int sleepTime) {
this.sleepTime = sleepTime;
return this;
}
@Override
public Page download(Request request, Task task) {
WebDriver webDriver;
......@@ -55,6 +69,11 @@ public class SeleniumDownloader implements Downloader,Destroyable {
}
logger.info("downloading page " + request.getUrl());
webDriver.get(request.getUrl());
try {
Thread.sleep(sleepTime);
} catch (InterruptedException e) {
e.printStackTrace();
}
WebDriver.Options manage = webDriver.manage();
Site site = task.getSite();
if (site.getCookies() != null) {
......@@ -65,6 +84,7 @@ public class SeleniumDownloader implements Downloader,Destroyable {
}
WebElement webElement = webDriver.findElement(By.xpath("/html"));
String content = webElement.getAttribute("outerHTML");
//
Page page = new Page();
page.setHtml(new Html(UrlUtils.fixAllRelativeHrefs(content, request.getUrl())));
page.setUrl(new PlainText(request.getUrl()));
......
......@@ -14,7 +14,7 @@ import us.codecraft.webmagic.Task;
*/
public class SeleniumDownloaderTest {
private String chromeDriverPath = "";
private String chromeDriverPath = "/Users/yihua/Downloads/chromedriver";
@Ignore("need chrome driver")
@Test
......@@ -37,4 +37,5 @@ public class SeleniumDownloaderTest {
}
System.out.println(System.currentTimeMillis() - time1);
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment