Commit fcb09f2e authored by yihua.huang's avatar yihua.huang

invite selenium

parent 74bd74a0
package us.codecraft.webmagic.selenium.downloader;
import org.apache.log4j.Logger;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.downloader.Downloader;
import us.codecraft.webmagic.selector.Html;
import us.codecraft.webmagic.selector.PlainText;
import us.codecraft.webmagic.utils.UrlUtils;
/**
* @author yihua.huang@dianping.com <br>
* @date: 13-7-26 <br>
* Time: 下午1:37 <br>
*/
public class SeleniumDownloader implements Downloader {
private WebDriverPool webDriverPool;
private Logger logger = Logger.getLogger(getClass());
public SeleniumDownloader(String chromeDriverPath) {
System.getProperties().setProperty("webdriver.chrome.driver", chromeDriverPath);
}
@Override
public Page download(Request request, Task task) {
WebDriver webDriver = null;
try {
webDriver = webDriverPool.get();
} catch (InterruptedException e) {
logger.warn("interrupted",e);
return null;
}
webDriver.get(request.getUrl());
WebElement webElement = webDriver.findElement(By.xpath("/html"));
String content = webElement.getAttribute("outerHTML");
Page page = new Page();
page.setHtml(new Html(UrlUtils.fixAllRelativeHrefs(content, request.getUrl())));
page.setUrl(new PlainText(request.getUrl()));
page.setRequest(request);
return page;
}
}
package us.codecraft.webmagic.selenium.downloader;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.BlockingDeque;
import java.util.concurrent.LinkedBlockingDeque;
import java.util.concurrent.atomic.AtomicInteger;
/**
* @author yihua.huang@dianping.com <br>
* @date: 13-7-26 <br>
* Time: 下午1:41 <br>
*/
class WebDriverPool {
private final static int DEFAULT_CAPACITY = 5;
private final int capacity;
private final static int STAT_RUNNING = 1;
private final static int STAT_CLODED = 2;
private AtomicInteger stat = new AtomicInteger(STAT_RUNNING);
private List<WebDriver> webDriverList = Collections.synchronizedList(new ArrayList<WebDriver>());
public WebDriverPool(int capacity) {
this.capacity = capacity;
}
public WebDriverPool() {
this(DEFAULT_CAPACITY);
}
private BlockingDeque<WebDriver> innerQueue = new LinkedBlockingDeque<WebDriver>();
public WebDriver get() throws InterruptedException {
checkRunning();
WebDriver poll = innerQueue.poll();
if (poll != null) {
return poll;
}
if (webDriverList.size() < capacity) {
synchronized (webDriverList) {
if (webDriverList.size() < capacity) {
ChromeDriver e = new ChromeDriver();
innerQueue.add(e);
webDriverList.add(e);
}
}
}
return innerQueue.take();
}
public void returnToPool(WebDriver webDriver) {
checkRunning();
innerQueue.add(webDriver);
}
protected void checkRunning() {
if (!stat.compareAndSet(STAT_RUNNING, STAT_RUNNING)) {
throw new IllegalStateException("Already closed!");
}
}
public void closeAll() {
boolean b = stat.compareAndSet(STAT_RUNNING, STAT_CLODED);
if (!b) {
throw new IllegalStateException("Already closed!");
}
for (WebDriver webDriver : webDriverList) {
webDriver.close();
}
}
}
package us.codecraft.webmagic.selenium;
import org.junit.Test;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import java.util.List;
/**
* @author yihua.huang@dianping.com <br>
* @date: 13-7-26 <br>
* Time: 下午12:27 <br>
*/
public class SeleniumTest {
@Test
public void test(){
System.getProperties().setProperty("webdriver.chrome.driver","/Users/yihua/Downloads/chromedriver");
WebDriver webDriver = new ChromeDriver();
webDriver.get("http://huaban.com/");
List<WebElement> elements = webDriver.findElements(By.xpath("/html"));
for (WebElement element : elements) {
System.out.println(element.getAttribute("outerHTML"));
}
webDriver.close();
}
}
package us.codecraft.webmagic.selenium.downloader;
import org.junit.Test;
import org.openqa.selenium.WebDriver;
/**
* @author yihua.huang@dianping.com <br>
* @date: 13-7-26 <br>
* Time: 下午2:12 <br>
*/
public class WebDriverPoolTest {
@Test
public void test(){
String chromeDriverPath = "/Users/yihua/Downloads/chromedriver";
System.getProperties().setProperty("webdriver.chrome.driver", chromeDriverPath);
WebDriverPool webDriverPool =new WebDriverPool(5);
for (int i=0;i<5;i++){
try {
WebDriver webDriver = webDriverPool.get();
System.out.println(i);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
webDriverPool.closeAll();
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment