Commit 2c97dd90 authored by yihua.huang's avatar yihua.huang

fix redisScheduler thread problem

parent 95ba7846
...@@ -7,13 +7,17 @@ import us.codecraft.webmagic.Request; ...@@ -7,13 +7,17 @@ import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Task; import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.schedular.Scheduler; import us.codecraft.webmagic.schedular.Scheduler;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.ReentrantLock;
/** /**
* 使用redis管理url,构建一个分布式的爬虫。<br> * 使用redis管理url,构建一个分布式的爬虫。<br>
*
* @author yihua.huang@dianping.com <br> * @author yihua.huang@dianping.com <br>
* @date: 13-7-25 <br> * @date: 13-7-25 <br>
* Time: 上午7:07 <br> * Time: 上午7:07 <br>
*/ */
public class RedisScheduler implements Scheduler{ public class RedisScheduler implements Scheduler {
private JedisPool pool; private JedisPool pool;
...@@ -21,7 +25,11 @@ public class RedisScheduler implements Scheduler{ ...@@ -21,7 +25,11 @@ public class RedisScheduler implements Scheduler{
private static final String SET_PREFIX = "set_"; private static final String SET_PREFIX = "set_";
public RedisScheduler(String host){ private ReentrantLock lock = new ReentrantLock();
private Condition condition = lock.newCondition();
public RedisScheduler(String host) {
pool = new JedisPool(new JedisPoolConfig(), host); pool = new JedisPool(new JedisPoolConfig(), host);
} }
...@@ -29,10 +37,16 @@ public class RedisScheduler implements Scheduler{ ...@@ -29,10 +37,16 @@ public class RedisScheduler implements Scheduler{
public synchronized void push(Request request, Task task) { public synchronized void push(Request request, Task task) {
Jedis jedis = pool.getResource(); Jedis jedis = pool.getResource();
//使用SortedSet进行url去重 //使用SortedSet进行url去重
if (jedis.zrank(SET_PREFIX+task.getUUID(),request.getUrl())==null){ if (jedis.zrank(SET_PREFIX + task.getUUID(), request.getUrl()) == null) {
//使用List保存队列 try {
jedis.rpush(QUEUE_PREFIX+task.getUUID(),request.getUrl()); lock.lock();
jedis.zadd(SET_PREFIX+task.getUUID(),System.currentTimeMillis(),request.getUrl()); //使用List保存队列
jedis.rpush(QUEUE_PREFIX + task.getUUID(), request.getUrl());
jedis.zadd(SET_PREFIX + task.getUUID(), System.currentTimeMillis(), request.getUrl());
condition.signal();
} finally {
lock.unlock();
}
} }
pool.returnResource(jedis); pool.returnResource(jedis);
} }
...@@ -40,7 +54,21 @@ public class RedisScheduler implements Scheduler{ ...@@ -40,7 +54,21 @@ public class RedisScheduler implements Scheduler{
@Override @Override
public synchronized Request poll(Task task) { public synchronized Request poll(Task task) {
Jedis jedis = pool.getResource(); Jedis jedis = pool.getResource();
String url = jedis.lpop(QUEUE_PREFIX+task.getUUID()); String url = jedis.lpop(QUEUE_PREFIX + task.getUUID());
if (url == null) {
try {
lock.lock();
while (url == null) {
try {
condition.await();
url = jedis.lpop(QUEUE_PREFIX + task.getUUID());
} catch (InterruptedException e) {
}
}
} finally {
lock.unlock();
}
}
pool.returnResource(jedis); pool.returnResource(jedis);
return new Request(url); return new Request(url);
} }
......
...@@ -6,6 +6,11 @@ import org.openqa.selenium.By; ...@@ -6,6 +6,11 @@ import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver; import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement; import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver; import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.remote.DesiredCapabilities;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
/** /**
* @author yihua.huang@dianping.com <br> * @author yihua.huang@dianping.com <br>
...@@ -18,7 +23,16 @@ public class SeleniumTest { ...@@ -18,7 +23,16 @@ public class SeleniumTest {
@Test @Test
public void testSelenium() { public void testSelenium() {
System.getProperties().setProperty("webdriver.chrome.driver", "/Users/yihua/Downloads/chromedriver"); System.getProperties().setProperty("webdriver.chrome.driver", "/Users/yihua/Downloads/chromedriver");
WebDriver webDriver = new ChromeDriver(); Map<String, Object> contentSettings = new HashMap<String, Object>();
contentSettings.put("images", 2);
Map<String, Object> preferences = new HashMap<String, Object>();
preferences.put("profile.default_content_settings", contentSettings);
DesiredCapabilities caps = DesiredCapabilities.chrome();
caps.setCapability("chrome.prefs", preferences);
caps.setCapability("chrome.switches", Arrays.asList("--user-data-dir=/Users/yihua/temp/chrome"));
WebDriver webDriver = new ChromeDriver(caps);
webDriver.get("http://huaban.com/"); webDriver.get("http://huaban.com/");
WebElement webElement = webDriver.findElement(By.xpath("/html")); WebElement webElement = webDriver.findElement(By.xpath("/html"));
System.out.println(webElement.getAttribute("outerHTML")); System.out.println(webElement.getAttribute("outerHTML"));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment