Commit 644a90c2 authored by yihua.huang's avatar yihua.huang

complete selenium

parent 86a20eab
......@@ -2,16 +2,20 @@ package us.codecraft.webmagic.selenium.downloader;
import org.apache.log4j.Logger;
import org.openqa.selenium.By;
import org.openqa.selenium.Cookie;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.downloader.Downloader;
import us.codecraft.webmagic.selector.Html;
import us.codecraft.webmagic.selector.PlainText;
import us.codecraft.webmagic.utils.UrlUtils;
import java.util.Map;
/**
* @author yihua.huang@dianping.com <br>
* @date: 13-7-26 <br>
......@@ -25,24 +29,40 @@ public class SeleniumDownloader implements Downloader {
public SeleniumDownloader(String chromeDriverPath) {
System.getProperties().setProperty("webdriver.chrome.driver", chromeDriverPath);
webDriverPool = new WebDriverPool();
}
public SeleniumDownloader(String chromeDriverPath, int poolSize) {
System.getProperties().setProperty("webdriver.chrome.driver", chromeDriverPath);
webDriverPool = new WebDriverPool(poolSize);
}
@Override
public Page download(Request request, Task task) {
WebDriver webDriver = null;
WebDriver webDriver;
try {
webDriver = webDriverPool.get();
} catch (InterruptedException e) {
logger.warn("interrupted",e);
logger.warn("interrupted", e);
return null;
}
webDriver.get(request.getUrl());
WebDriver.Options manage = webDriver.manage();
Site site = task.getSite();
if (site.getCookies() != null) {
for (Map.Entry<String, String> cookieEntry : site.getCookies().entrySet()) {
Cookie cookie = new Cookie(cookieEntry.getKey(), cookieEntry.getValue());
manage.addCookie(cookie);
}
}
WebElement webElement = webDriver.findElement(By.xpath("/html"));
String content = webElement.getAttribute("outerHTML");
Page page = new Page();
page.setHtml(new Html(UrlUtils.fixAllRelativeHrefs(content, request.getUrl())));
page.setUrl(new PlainText(request.getUrl()));
page.setRequest(request);
webDriverPool.returnToPool(webDriver);
return page;
}
}
package us.codecraft.webmagic.selenium.downloader;
import org.junit.Ignore;
import org.junit.Test;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Task;
/**
* @author yihua.huang@dianping.com <br>
* @date: 13-7-26 <br>
* Time: 下午2:46 <br>
*/
public class SeleniumDownloaderTest {
private String chromeDriverPath = "/Users/yihua/Downloads/chromedriver";
@Ignore("need chrome driver")
@Test
public void test(){
SeleniumDownloader seleniumDownloader = new SeleniumDownloader(chromeDriverPath);
Page page = seleniumDownloader.download(new Request("http://huaban.com/"), new Task() {
@Override
public String getUUID() {
return "huaban.com";
}
@Override
public Site getSite() {
return Site.me();
}
});
System.out.println(page.getHtml().$("#waterfall").links().regex(".*pins.*").all());
}
}
......@@ -10,12 +10,13 @@ import org.openqa.selenium.WebDriver;
*/
public class WebDriverPoolTest {
private String chromeDriverPath = "/Users/yihua/Downloads/chromedriver";
@Test
public void test(){
String chromeDriverPath = "/Users/yihua/Downloads/chromedriver";
public void test() {
System.getProperties().setProperty("webdriver.chrome.driver", chromeDriverPath);
WebDriverPool webDriverPool =new WebDriverPool(5);
for (int i=0;i<5;i++){
WebDriverPool webDriverPool = new WebDriverPool(5);
for (int i = 0; i < 5; i++) {
try {
WebDriver webDriver = webDriverPool.get();
System.out.println(i);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment