Commit 787b9529 authored by yihua.huang's avatar yihua.huang

release notes and docs

parent 1f86ce77
......@@ -28,13 +28,15 @@ Release Notes
}
增加一个Spider.test(url)方法,用于开发爬虫时进行调试。
增加基于redis的分布式支持。
增加XPath2.0语法支持(webmagic-saxon模块)。
增加基于Selenium的浏览器渲染支持,用于抓取动态加载内容(webmagic-selenium模块)。
修复一些已有bug。
修复了不支持https的bug。
补充了文档:[webmagic-0.2.0用户手册](http://code4craft.github.io/webmagic/)
......
......@@ -220,10 +220,18 @@ public class Spider implements Runnable, Task {
}
}
public void test(String url){
/**
* 用某些特定URL进行爬虫测试
* @param urls 要抓取的url
*/
public void test(String... urls){
checkComponent();
if (urls.length>0){
for (String url : urls) {
processRequest(new Request(url));
}
}
}
private void processRequest(Request request) {
Page page = downloader.download(request, this);
......
......@@ -8,6 +8,7 @@ import us.codecraft.webmagic.model.annotation.ExtractByUrl;
import us.codecraft.webmagic.model.annotation.HelpUrl;
import us.codecraft.webmagic.model.annotation.TargetUrl;
import us.codecraft.webmagic.pipeline.JsonFilePageModelPipeline;
import us.codecraft.webmagic.scheduler.FileCacheQueueScheduler;
import java.util.List;
......@@ -32,12 +33,19 @@ public class GithubRepo implements HasKey {
@ExtractBy(value = "//div[@class='repository-lang-stats']//li//span[@class='lang']",multi = true)
private List<String> language;
@ExtractBy("//a[@class='social-count js-social-count']/text()")
private String star;
@ExtractBy("//a[@class='social-count js-social-count']/text()")
private String fork;
@ExtractByUrl
private String url;
public static void main(String[] args) {
OOSpider.create(Site.me().addStartUrl("https://github.com/explore").setSleepTime(0),
new JsonFilePageModelPipeline(), GithubRepo.class).thread(15).run();
OOSpider.create(Site.me().addStartUrl("https://github.com/explore").setSleepTime(0).setRetryTimes(3),
new JsonFilePageModelPipeline(), GithubRepo.class)
.scheduler(new FileCacheQueueScheduler("/data/webmagic/cache/")).thread(15).run();
}
@Override
......@@ -64,4 +72,12 @@ public class GithubRepo implements HasKey {
public String getUrl() {
return url;
}
public String getStar() {
return star;
}
public String getFork() {
return fork;
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment