Commit 49a4ad66 authored by yihua.huang's avatar yihua.huang

add uuid to spider

parent 6428e205
...@@ -12,11 +12,6 @@ public class Site { ...@@ -12,11 +12,6 @@ public class Site {
private String domain; private String domain;
/**
* for identify a task
*/
private String identifier;
private String userAgent; private String userAgent;
private Map<String, String> cookies = new LinkedHashMap<String, String>(); private Map<String, String> cookies = new LinkedHashMap<String, String>();
...@@ -66,15 +61,6 @@ public class Site { ...@@ -66,15 +61,6 @@ public class Site {
return this; return this;
} }
public String getIdentifier() {
return identifier;
}
public Site setIdentifier(String identifier) {
this.identifier = identifier;
return this;
}
public String getEncoding() { public String getEncoding() {
return encoding; return encoding;
} }
...@@ -97,7 +83,7 @@ public class Site { ...@@ -97,7 +83,7 @@ public class Site {
return startUrls; return startUrls;
} }
public Site setStartUrl(String startUrl) { public Site addStartUrl(String startUrl) {
this.startUrls.add(startUrl); this.startUrls.add(startUrl);
return this; return this;
} }
......
...@@ -18,7 +18,7 @@ import java.util.List; ...@@ -18,7 +18,7 @@ import java.util.List;
* Date: 13-4-21 * Date: 13-4-21
* Time: 上午6:53 * Time: 上午6:53
*/ */
public class Spider implements Runnable { public class Spider implements Runnable, Task {
private Downloader downloader = new HttpClientDownloader(); private Downloader downloader = new HttpClientDownloader();
...@@ -26,6 +26,12 @@ public class Spider implements Runnable { ...@@ -26,6 +26,12 @@ public class Spider implements Runnable {
private PageProcessor pageProcessor; private PageProcessor pageProcessor;
private List<String> startUrls;
private Site site;
private String uuid;
private Schedular schedular = new QueueSchedular(); private Schedular schedular = new QueueSchedular();
private Logger logger = Logger.getLogger(getClass()); private Logger logger = Logger.getLogger(getClass());
...@@ -36,9 +42,18 @@ public class Spider implements Runnable { ...@@ -36,9 +42,18 @@ public class Spider implements Runnable {
public Spider processor(PageProcessor pageProcessor) { public Spider processor(PageProcessor pageProcessor) {
this.pageProcessor = pageProcessor; this.pageProcessor = pageProcessor;
for (String startUrl : pageProcessor.getSite().getStartUrls()) { this.site = pageProcessor.getSite();
schedular.push(new Request(startUrl), pageProcessor.getSite()); return this;
} }
public Spider startUrls(List<String> startUrls) {
this.startUrls = startUrls;
return this;
}
public Spider startUrl(String startUrl) {
startUrls = new ArrayList<String>();
startUrls.add(startUrl);
return this; return this;
} }
...@@ -59,13 +74,15 @@ public class Spider implements Runnable { ...@@ -59,13 +74,15 @@ public class Spider implements Runnable {
@Override @Override
public void run() { public void run() {
Site site = pageProcessor.getSite(); for (String startUrl : pageProcessor.getSite().getStartUrls()) {
Request request = schedular.poll(site); schedular.push(new Request(startUrl), this);
if (pipelines.isEmpty()){ }
Request request = schedular.poll(this);
if (pipelines.isEmpty()) {
pipelines.add(new ConsolePipeline()); pipelines.add(new ConsolePipeline());
} }
while (request != null) { while (request != null) {
Page page = downloader.download(request,site); Page page = downloader.download(request, site);
if (page == null) { if (page == null) {
sleep(site.getSleepTime()); sleep(site.getSleepTime());
continue; continue;
...@@ -73,13 +90,19 @@ public class Spider implements Runnable { ...@@ -73,13 +90,19 @@ public class Spider implements Runnable {
pageProcessor.process(page); pageProcessor.process(page);
addRequest(page); addRequest(page);
for (Pipeline pipeline : pipelines) { for (Pipeline pipeline : pipelines) {
pipeline.process(page,site); pipeline.process(page, this);
} }
sleep(site.getSleepTime()); sleep(site.getSleepTime());
request = schedular.poll(site); request = schedular.poll(this);
} }
} }
public Spider setUUID(String uuid) {
this.uuid = uuid;
return this;
}
private void sleep(int time) { private void sleep(int time) {
try { try {
Thread.sleep(time); Thread.sleep(time);
...@@ -91,8 +114,19 @@ public class Spider implements Runnable { ...@@ -91,8 +114,19 @@ public class Spider implements Runnable {
private void addRequest(Page page) { private void addRequest(Page page) {
if (CollectionUtils.isNotEmpty(page.getTargetRequests())) { if (CollectionUtils.isNotEmpty(page.getTargetRequests())) {
for (Request request : page.getTargetRequests()) { for (Request request : page.getTargetRequests()) {
schedular.push(request,pageProcessor.getSite()); schedular.push(request, this);
} }
} }
} }
@Override
public String getUUID() {
if (uuid != null) {
return uuid;
}
if (site != null) {
return site.getDomain();
}
return null;
}
} }
package us.codecraft.webmagic;
/**
* Author: code4crafer@gmail.com
* Date: 13-6-18
* Time: 下午2:57
*/
public interface Task {
public String getUUID();
}
package us.codecraft.webmagic.pipeline; package us.codecraft.webmagic.pipeline;
import us.codecraft.webmagic.Page; import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site; import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.selector.Selectable; import us.codecraft.webmagic.selector.Selectable;
import java.util.Map; import java.util.Map;
...@@ -14,7 +14,7 @@ import java.util.Map; ...@@ -14,7 +14,7 @@ import java.util.Map;
public class ConsolePipeline implements Pipeline{ public class ConsolePipeline implements Pipeline{
@Override @Override
public void process(Page page,Site site) { public void process(Page page,Task task) {
System.out.println("get page: "+page.getUrl()); System.out.println("get page: "+page.getUrl());
for (Map.Entry<String, Selectable> entry : page.getFields().entrySet()) { for (Map.Entry<String, Selectable> entry : page.getFields().entrySet()) {
System.out.println(entry.getKey()+":\t"+entry.getValue().toStrings()); System.out.println(entry.getKey()+":\t"+entry.getValue().toStrings());
......
...@@ -2,9 +2,8 @@ package us.codecraft.webmagic.pipeline; ...@@ -2,9 +2,8 @@ package us.codecraft.webmagic.pipeline;
import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.codec.digest.DigestUtils;
import us.codecraft.webmagic.Page; import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site; import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.selector.Selectable; import us.codecraft.webmagic.selector.Selectable;
import us.codecraft.webmagic.utils.UrlUtils;
import java.io.File; import java.io.File;
import java.io.FileWriter; import java.io.FileWriter;
...@@ -30,10 +29,8 @@ public class FilePipeline implements Pipeline { ...@@ -30,10 +29,8 @@ public class FilePipeline implements Pipeline {
} }
@Override @Override
public void process(Page page, Site site) { public void process(Page page, Task task) {
String domain = site.getDomain(); String path = this.path + "/" + task.getUUID() + "/";
domain = UrlUtils.getDomain(domain);
String path = this.path + "" + domain + "#" + site.getIdentifier() + "/";
File file = new File(path); File file = new File(path);
if (!file.exists()) { if (!file.exists()) {
file.mkdirs(); file.mkdirs();
......
package us.codecraft.webmagic.pipeline; package us.codecraft.webmagic.pipeline;
import us.codecraft.webmagic.Page; import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site; import us.codecraft.webmagic.Task;
/** /**
* Author: code4crafter@gmail.com * Author: code4crafter@gmail.com
...@@ -10,5 +10,5 @@ import us.codecraft.webmagic.Site; ...@@ -10,5 +10,5 @@ import us.codecraft.webmagic.Site;
*/ */
public interface Pipeline { public interface Pipeline {
public void process(Page page,Site site); public void process(Page page,Task task);
} }
...@@ -20,7 +20,7 @@ public class SimplePageProcessor implements PageProcessor { ...@@ -20,7 +20,7 @@ public class SimplePageProcessor implements PageProcessor {
private Site site; private Site site;
public SimplePageProcessor(String startUrl, String urlPattern) { public SimplePageProcessor(String startUrl, String urlPattern) {
this.site = Site.me().setStartUrl(startUrl). this.site = Site.me().addStartUrl(startUrl).
setDomain(UrlUtils.getDomain(startUrl)).setUserAgent(UA); setDomain(UrlUtils.getDomain(startUrl)).setUserAgent(UA);
this.urlPattern = "("+urlPattern.replace(".","\\.").replace("*","[^\"'#]*")+")"; this.urlPattern = "("+urlPattern.replace(".","\\.").replace("*","[^\"'#]*")+")";
......
...@@ -2,8 +2,8 @@ package us.codecraft.webmagic.schedular; ...@@ -2,8 +2,8 @@ package us.codecraft.webmagic.schedular;
import org.apache.commons.lang3.math.NumberUtils; import org.apache.commons.lang3.math.NumberUtils;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Request; import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Task;
import java.io.*; import java.io.*;
import java.util.LinkedHashSet; import java.util.LinkedHashSet;
...@@ -28,7 +28,7 @@ public class FileCacheQueueSchedular implements Schedular { ...@@ -28,7 +28,7 @@ public class FileCacheQueueSchedular implements Schedular {
private String fileUrlAllName = ".urls.txt"; private String fileUrlAllName = ".urls.txt";
private Site site; private Task task;
private String fileCursor = ".cursor.txt"; private String fileCursor = ".cursor.txt";
...@@ -44,13 +44,13 @@ public class FileCacheQueueSchedular implements Schedular { ...@@ -44,13 +44,13 @@ public class FileCacheQueueSchedular implements Schedular {
private Set<String> urls; private Set<String> urls;
public FileCacheQueueSchedular(Site site) { public FileCacheQueueSchedular(Task task) {
this.site = site; this.task = task;
} }
public FileCacheQueueSchedular(Site site, String filePath) { public FileCacheQueueSchedular(Task task, String filePath) {
this.filePath = filePath; this.filePath = filePath;
this.site = site; this.task = task;
} }
private void flush() { private void flush() {
...@@ -106,7 +106,7 @@ public class FileCacheQueueSchedular implements Schedular { ...@@ -106,7 +106,7 @@ public class FileCacheQueueSchedular implements Schedular {
urls.add(line.trim()); urls.add(line.trim());
lineReaded++; lineReaded++;
if (lineReaded > cursor.get()) { if (lineReaded > cursor.get()) {
queue.add(new Request(line, site)); queue.add(new Request(line));
} }
} }
} }
...@@ -121,11 +121,11 @@ public class FileCacheQueueSchedular implements Schedular { ...@@ -121,11 +121,11 @@ public class FileCacheQueueSchedular implements Schedular {
} }
private String getFileName(String filename) { private String getFileName(String filename) {
return filePath + site.getDomain() + "#" + site.getIdentifier() + filename; return filePath + task.getUUID() + "/" + filename;
} }
@Override @Override
public synchronized void push(Request request, Site site) { public synchronized void push(Request request, Task task) {
if (!inited.get()) { if (!inited.get()) {
init(); init();
} }
...@@ -140,7 +140,7 @@ public class FileCacheQueueSchedular implements Schedular { ...@@ -140,7 +140,7 @@ public class FileCacheQueueSchedular implements Schedular {
} }
@Override @Override
public synchronized Request poll(Site site) { public synchronized Request poll(Task task) {
if (!inited.get()) { if (!inited.get()) {
init(); init();
} }
......
...@@ -2,7 +2,7 @@ package us.codecraft.webmagic.schedular; ...@@ -2,7 +2,7 @@ package us.codecraft.webmagic.schedular;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import us.codecraft.webmagic.Request; import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Site; import us.codecraft.webmagic.Task;
import java.util.HashSet; import java.util.HashSet;
import java.util.Set; import java.util.Set;
...@@ -23,7 +23,7 @@ public class QueueSchedular implements Schedular { ...@@ -23,7 +23,7 @@ public class QueueSchedular implements Schedular {
private Set<String> urls = new HashSet<String>(); private Set<String> urls = new HashSet<String>();
@Override @Override
public synchronized void push(Request request,Site site) { public synchronized void push(Request request,Task task) {
if (logger.isDebugEnabled()){ if (logger.isDebugEnabled()){
logger.debug("push to queue "+request.getUrl()); logger.debug("push to queue "+request.getUrl());
} }
...@@ -34,7 +34,7 @@ public class QueueSchedular implements Schedular { ...@@ -34,7 +34,7 @@ public class QueueSchedular implements Schedular {
} }
@Override @Override
public synchronized Request poll(Site site) { public synchronized Request poll(Task task) {
return queue.poll(); return queue.poll();
} }
} }
package us.codecraft.webmagic.schedular; package us.codecraft.webmagic.schedular;
import us.codecraft.webmagic.Request; import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Site; import us.codecraft.webmagic.Task;
/** /**
* Author: code4crafter@gmail.com * Author: code4crafter@gmail.com
...@@ -10,8 +10,8 @@ import us.codecraft.webmagic.Site; ...@@ -10,8 +10,8 @@ import us.codecraft.webmagic.Site;
*/ */
public interface Schedular { public interface Schedular {
public void push(Request request,Site site); public void push(Request request,Task task);
public Request poll(Site site); public Request poll(Task task);
} }
...@@ -37,7 +37,7 @@ public class DiandianBlogProcessor implements PageProcessor { ...@@ -37,7 +37,7 @@ public class DiandianBlogProcessor implements PageProcessor {
public Site getSite() { public Site getSite() {
//site定义抽取配置,以及开始url等 //site定义抽取配置,以及开始url等
if (site == null) { if (site == null) {
site = Site.me().setDomain("progressdaily.diandian.com").setStartUrl("http://progressdaily.diandian.com/"). site = Site.me().setDomain("progressdaily.diandian.com").addStartUrl("http://progressdaily.diandian.com/").
setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31"); setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31");
} }
return site; return site;
......
...@@ -27,7 +27,7 @@ public class DianpingBlogProcessor implements PageProcessor { ...@@ -27,7 +27,7 @@ public class DianpingBlogProcessor implements PageProcessor {
@Override @Override
public Site getSite() { public Site getSite() {
return Site.me().setDomain("www.dianping.com").setStartUrl("http://www.dianping.com/"). return Site.me().setDomain("www.dianping.com").addStartUrl("http://www.dianping.com/").
setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31"); setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31");
} }
} }
...@@ -33,7 +33,7 @@ public class DiaoyuwengProcessor implements PageProcessor { ...@@ -33,7 +33,7 @@ public class DiaoyuwengProcessor implements PageProcessor {
@Override @Override
public Site getSite() { public Site getSite() {
if (site==null){ if (site==null){
site= Site.me().setDomain("www.diaoyuweng.com").setStartUrl("http://www.diaoyuweng.com/home.php?mod=space&uid=88304&do=thread&view=me&type=thread&from=space"). site= Site.me().setDomain("www.diaoyuweng.com").addStartUrl("http://www.diaoyuweng.com/home.php?mod=space&uid=88304&do=thread&view=me&type=thread&from=space").
setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31").setEncoding("GBK").setSleepTime(500); setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31").setEncoding("GBK").setSleepTime(500);
} }
return site; return site;
......
...@@ -23,6 +23,6 @@ public class F58PageProcesser implements PageProcessor { ...@@ -23,6 +23,6 @@ public class F58PageProcesser implements PageProcessor {
@Override @Override
public Site getSite() { public Site getSite() {
return Site.me().setDomain("sh.58.com").setStartUrl("http://sh.58.com/"); //To change body of implemented methods use File | Settings | File Templates. return Site.me().setDomain("sh.58.com").addStartUrl("http://sh.58.com/"); //To change body of implemented methods use File | Settings | File Templates.
} }
} }
...@@ -23,7 +23,7 @@ public class HuxiuProcessor implements PageProcessor { ...@@ -23,7 +23,7 @@ public class HuxiuProcessor implements PageProcessor {
@Override @Override
public Site getSite() { public Site getSite() {
return Site.me().setDomain("www.huxiu.com").setStartUrl("http://www.huxiu.com/"). return Site.me().setDomain("www.huxiu.com").addStartUrl("http://www.huxiu.com/").
setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31"); setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31");
} }
} }
...@@ -21,7 +21,7 @@ public class KaichibaProcessor implements PageProcessor { ...@@ -21,7 +21,7 @@ public class KaichibaProcessor implements PageProcessor {
@Override @Override
public Site getSite() { public Site getSite() {
return Site.me().setDomain("kaichiba.com").setStartUrl("http://kaichiba.com/shop/41725781").setEncoding("utf-8"). return Site.me().setDomain("kaichiba.com").addStartUrl("http://kaichiba.com/shop/41725781").setEncoding("utf-8").
setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31"); setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31");
} }
} }
...@@ -27,7 +27,7 @@ public class MeicanProcessor implements PageProcessor { ...@@ -27,7 +27,7 @@ public class MeicanProcessor implements PageProcessor {
@Override @Override
public Site getSite() { public Site getSite() {
return Site.me().setDomain("meican.com").setStartUrl("http://www.meican.com/shanghai/districts").setEncoding("utf-8"). return Site.me().setDomain("meican.com").addStartUrl("http://www.meican.com/shanghai/districts").setEncoding("utf-8").
setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31"); setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31");
} }
} }
...@@ -22,7 +22,7 @@ public class NjuBBSProcessor implements PageProcessor { ...@@ -22,7 +22,7 @@ public class NjuBBSProcessor implements PageProcessor {
@Override @Override
public Site getSite() { public Site getSite() {
return Site.me().setDomain("bbs.nju.edu.cn").setStartUrl("http://bbs.nju.edu.cn/board?board=Pictures"). return Site.me().setDomain("bbs.nju.edu.cn").addStartUrl("http://bbs.nju.edu.cn/board?board=Pictures").
setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31"); setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31");
} }
} }
...@@ -24,7 +24,7 @@ public class OschinaBlogPageProcesser implements PageProcessor { ...@@ -24,7 +24,7 @@ public class OschinaBlogPageProcesser implements PageProcessor {
@Override @Override
public Site getSite() { public Site getSite() {
return Site.me().setDomain("my.oschina.net").setStartUrl("http://www.oschina.net/"). return Site.me().setDomain("my.oschina.net").addStartUrl("http://www.oschina.net/").
setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31"); setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31");
} }
} }
...@@ -23,7 +23,7 @@ public class OschinaPageProcesser implements PageProcessor { ...@@ -23,7 +23,7 @@ public class OschinaPageProcesser implements PageProcessor {
@Override @Override
public Site getSite() { public Site getSite() {
return Site.me().setDomain("www.oschina.net").setStartUrl("http://www.oschina.net/"). return Site.me().setDomain("www.oschina.net").addStartUrl("http://www.oschina.net/").
setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31"); setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31");
} }
} }
...@@ -26,7 +26,7 @@ public class QzoneBlogProcessor implements PageProcessor { ...@@ -26,7 +26,7 @@ public class QzoneBlogProcessor implements PageProcessor {
@Override @Override
public Site getSite() { public Site getSite() {
return Site.me().setDomain("www.diandian.com").setStartUrl("http://17dujingdian.com/"). return Site.me().setDomain("www.diandian.com").addStartUrl("http://17dujingdian.com/").
setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31"); setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31");
} }
} }
...@@ -26,7 +26,7 @@ public class SinaBlogProcesser implements PageProcessor { ...@@ -26,7 +26,7 @@ public class SinaBlogProcesser implements PageProcessor {
@Override @Override
public Site getSite() { public Site getSite() {
if (site==null){ if (site==null){
site = Site.me().setDomain("blog.sina.com.cn").setStartUrl("http://blog.sina.com.cn/flashsword20").setSleepTime(3000). site = Site.me().setDomain("blog.sina.com.cn").addStartUrl("http://blog.sina.com.cn/flashsword20").setSleepTime(3000).
setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31"); setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31");
} }
return site; return site;
......
...@@ -23,6 +23,6 @@ public class TianyaPageProcesser implements PageProcessor { ...@@ -23,6 +23,6 @@ public class TianyaPageProcesser implements PageProcessor {
@Override @Override
public Site getSite() { public Site getSite() {
return Site.me().setDomain("http://bbs.tianya.cn/").setStartUrl("http://bbs.tianya.cn/"); //To change body of implemented methods use File | Settings | File Templates. return Site.me().setDomain("http://bbs.tianya.cn/").addStartUrl("http://bbs.tianya.cn/"); //To change body of implemented methods use File | Settings | File Templates.
} }
} }
<item> <item>
<title>${title}</title> <title>${title}</title>
<link>http://127.0.0.1/wordpress/?p=${id}</link> <link>http://127.0.0.1/wordpress/?p=${uuid}</link>
<pubDate>${date}</pubDate> <pubDate>${date}</pubDate>
<dc:creator>admin</dc:creator> <dc:creator>admin</dc:creator>
<guid isPermaLink="false">http://127.0.0.1/wordpress/?p=${id}</guid> <guid isPermaLink="false">http://127.0.0.1/wordpress/?p=${uuid}</guid>
<description></description> <description></description>
<content:encoded><![CDATA[${content}]]></content:encoded> <content:encoded><![CDATA[${content}]]></content:encoded>
<excerpt:encoded><![CDATA[]]></excerpt:encoded> <excerpt:encoded><![CDATA[]]></excerpt:encoded>
<wp:post_id>${id}</wp:post_id> <wp:post_id>${uuid}</wp:post_id>
<wp:post_date>${date}</wp:post_date> <wp:post_date>${date}</wp:post_date>
<wp:post_date_gmt>${date}</wp:post_date_gmt> <wp:post_date_gmt>${date}</wp:post_date_gmt>
<wp:comment_status>open</wp:comment_status> <wp:comment_status>open</wp:comment_status>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment