Commit 6a87a778 authored by yihua.huang's avatar yihua.huang

add selenium download timeout

parent 18fefa0c
...@@ -20,18 +20,22 @@ import java.util.Map; ...@@ -20,18 +20,22 @@ import java.util.Map;
/** /**
* 使用Selenium调用浏览器进行渲染。目前仅支持chrome。<br> * 使用Selenium调用浏览器进行渲染。目前仅支持chrome。<br>
* 需要下载Selenium driver支持。<br> * 需要下载Selenium driver支持。<br>
*
* @author yihua.huang@dianping.com <br> * @author yihua.huang@dianping.com <br>
* @date: 13-7-26 <br> * @date: 13-7-26 <br>
* Time: 下午1:37 <br> * Time: 下午1:37 <br>
*/ */
public class SeleniumDownloader implements Downloader,Destroyable { public class SeleniumDownloader implements Downloader, Destroyable {
private WebDriverPool webDriverPool; private WebDriverPool webDriverPool;
private Logger logger = Logger.getLogger(getClass()); private Logger logger = Logger.getLogger(getClass());
private int sleepTime = 0;
/** /**
* 新建 * 新建
*
* @param chromeDriverPath * @param chromeDriverPath
*/ */
public SeleniumDownloader(String chromeDriverPath) { public SeleniumDownloader(String chromeDriverPath) {
...@@ -44,6 +48,16 @@ public class SeleniumDownloader implements Downloader,Destroyable { ...@@ -44,6 +48,16 @@ public class SeleniumDownloader implements Downloader,Destroyable {
webDriverPool = new WebDriverPool(poolSize); webDriverPool = new WebDriverPool(poolSize);
} }
/**
* set sleep time to wait until load success
* @param sleepTime
* @return this
*/
public SeleniumDownloader setSleepTime(int sleepTime) {
this.sleepTime = sleepTime;
return this;
}
@Override @Override
public Page download(Request request, Task task) { public Page download(Request request, Task task) {
WebDriver webDriver; WebDriver webDriver;
...@@ -55,6 +69,11 @@ public class SeleniumDownloader implements Downloader,Destroyable { ...@@ -55,6 +69,11 @@ public class SeleniumDownloader implements Downloader,Destroyable {
} }
logger.info("downloading page " + request.getUrl()); logger.info("downloading page " + request.getUrl());
webDriver.get(request.getUrl()); webDriver.get(request.getUrl());
try {
Thread.sleep(sleepTime);
} catch (InterruptedException e) {
e.printStackTrace();
}
WebDriver.Options manage = webDriver.manage(); WebDriver.Options manage = webDriver.manage();
Site site = task.getSite(); Site site = task.getSite();
if (site.getCookies() != null) { if (site.getCookies() != null) {
...@@ -65,6 +84,7 @@ public class SeleniumDownloader implements Downloader,Destroyable { ...@@ -65,6 +84,7 @@ public class SeleniumDownloader implements Downloader,Destroyable {
} }
WebElement webElement = webDriver.findElement(By.xpath("/html")); WebElement webElement = webDriver.findElement(By.xpath("/html"));
String content = webElement.getAttribute("outerHTML"); String content = webElement.getAttribute("outerHTML");
//
Page page = new Page(); Page page = new Page();
page.setHtml(new Html(UrlUtils.fixAllRelativeHrefs(content, request.getUrl()))); page.setHtml(new Html(UrlUtils.fixAllRelativeHrefs(content, request.getUrl())));
page.setUrl(new PlainText(request.getUrl())); page.setUrl(new PlainText(request.getUrl()));
......
...@@ -14,7 +14,7 @@ import us.codecraft.webmagic.Task; ...@@ -14,7 +14,7 @@ import us.codecraft.webmagic.Task;
*/ */
public class SeleniumDownloaderTest { public class SeleniumDownloaderTest {
private String chromeDriverPath = ""; private String chromeDriverPath = "/Users/yihua/Downloads/chromedriver";
@Ignore("need chrome driver") @Ignore("need chrome driver")
@Test @Test
...@@ -37,4 +37,5 @@ public class SeleniumDownloaderTest { ...@@ -37,4 +37,5 @@ public class SeleniumDownloaderTest {
} }
System.out.println(System.currentTimeMillis() - time1); System.out.println(System.currentTimeMillis() - time1);
} }
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment