Commit b0af45f4 authored by yihua.huang's avatar yihua.huang

complete redis support

parent f3a29d93
package us.codecraft.webmagic;
import java.io.Serializable;
import java.util.HashMap;
import java.util.Map;
......@@ -26,7 +27,9 @@ import java.util.Map;
* Date: 13-4-21
* Time: 上午11:37
*/
public class Request {
public class Request implements Serializable {
private static final long serialVersionUID = 2062192774891352043L;
private String url;
......@@ -56,17 +59,17 @@ public class Request {
}
public Object getExtra(String key) {
if (extras==null){
if (extras == null) {
return null;
}
return extras.get(key);
}
public Request putExtra(String key,Object value) {
if (extras==null){
public Request putExtra(String key, Object value) {
if (extras == null) {
extras = new HashMap<String, Object>();
}
extras.put(key,value);
extras.put(key, value);
return this;
}
......@@ -91,6 +94,10 @@ public class Request {
return true;
}
public Map<String, Object> getExtras() {
return extras;
}
@Override
public int hashCode() {
return url.hashCode();
......
......@@ -17,6 +17,11 @@
<artifactId>freemarker</artifactId>
<version>2.3.15</version>
</dependency>
<dependency>
<groupId>org.resthub</groupId>
<artifactId>hessian</artifactId>
<version>4.0.8</version>
</dependency>
<dependency>
<groupId>redis.clients</groupId>
<artifactId>jedis</artifactId>
......
package us.codecraft.webmagic.scheduler;
import com.caucho.hessian.io.Hessian2Input;
import com.caucho.hessian.io.Hessian2Output;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
/**
* @author yihua.huang@dianping.com <br>
* @date: 13-7-14 <br>
* Time: 下午9:20 <br>
*/
public enum HessianSerializer {
INSTANCE;
public <T> byte[] serialize(T v) throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
Hessian2Output hessian2Output = new Hessian2Output(baos);
hessian2Output.writeObject(v);
hessian2Output.close();
return baos.toByteArray();
}
@SuppressWarnings("unchecked")
public <T> T deSerialize(byte[] bytes) throws IOException {
ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
Hessian2Input hessian2Input = new Hessian2Input(bais);
T t = (T) hessian2Input.readObject();
hessian2Input.close();
return t;
}
}
package us.codecraft.webmagic.scheduler;
import org.apache.commons.codec.digest.DigestUtils;
import redis.clients.jedis.Jedis;
import redis.clients.jedis.JedisPool;
import redis.clients.jedis.JedisPoolConfig;
......@@ -7,6 +8,8 @@ import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.schedular.Scheduler;
import java.io.IOException;
/**
* 使用redis管理url,构建一个分布式的爬虫。<br>
*
......@@ -22,6 +25,8 @@ public class RedisScheduler implements Scheduler {
private static final String SET_PREFIX = "set_";
private static final String ITEM_PREFIX = "item_";
public RedisScheduler(String host) {
pool = new JedisPool(new JedisPoolConfig(), host);
}
......@@ -34,6 +39,15 @@ public class RedisScheduler implements Scheduler {
//使用List保存队列
jedis.rpush(QUEUE_PREFIX + task.getUUID(), request.getUrl());
jedis.zadd(SET_PREFIX + task.getUUID(), request.getPriority(), request.getUrl());
if (request.getExtras() != null) {
String key = ITEM_PREFIX + DigestUtils.shaHex(request.getUrl());
try {
byte[] serialize = HessianSerializer.INSTANCE.serialize(request);
jedis.set(key.getBytes(), serialize);
} catch (IOException e) {
e.printStackTrace();
}
}
}
pool.returnResource(jedis);
}
......@@ -42,8 +56,16 @@ public class RedisScheduler implements Scheduler {
public synchronized Request poll(Task task) {
Jedis jedis = pool.getResource();
String url = jedis.lpop(QUEUE_PREFIX + task.getUUID());
String key = ITEM_PREFIX + DigestUtils.shaHex(url);
byte[] bytes = jedis.get(key.getBytes());
try {
Object o = HessianSerializer.INSTANCE.deSerialize(bytes);
return (Request)o;
} catch (Exception e) {
e.printStackTrace();
}
pool.returnResource(jedis);
if (url==null){
if (url == null) {
return null;
}
return new Request(url);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment