Commit 7fbe18b8 authored by yihua.huang's avatar yihua.huang

implementation of PageMapper #120

parent 5dc9fe95
...@@ -78,4 +78,17 @@ public class GithubRepo implements HasKey { ...@@ -78,4 +78,17 @@ public class GithubRepo implements HasKey {
public int getFork() { public int getFork() {
return fork; return fork;
} }
@Override
public String toString() {
return "GithubRepo{" +
"name='" + name + '\'' +
", author='" + author + '\'' +
", readme='" + readme + '\'' +
", language=" + language +
", star=" + star +
", fork=" + fork +
", url='" + url + '\'' +
'}';
}
} }
...@@ -10,9 +10,9 @@ import us.codecraft.webmagic.processor.PageProcessor; ...@@ -10,9 +10,9 @@ import us.codecraft.webmagic.processor.PageProcessor;
* @author code4crafter@gmail.com <br> * @author code4crafter@gmail.com <br>
* @since 0.3.2 * @since 0.3.2
*/ */
public class GithubRepoPageProcessor implements PageProcessor { public class GithubRepoPageMapper implements PageProcessor {
private Site site = Site.me().setRetryTimes(3); private Site site = Site.me().setRetryTimes(3).setSleepTime(0);
private PageMapper<GithubRepo> githubRepoPageMapper = new PageMapper<GithubRepo>(GithubRepo.class); private PageMapper<GithubRepo> githubRepoPageMapper = new PageMapper<GithubRepo>(GithubRepo.class);
...@@ -21,7 +21,12 @@ public class GithubRepoPageProcessor implements PageProcessor { ...@@ -21,7 +21,12 @@ public class GithubRepoPageProcessor implements PageProcessor {
page.addTargetRequests(page.getHtml().links().regex("(https://github\\.com/\\w+/\\w+)").all()); page.addTargetRequests(page.getHtml().links().regex("(https://github\\.com/\\w+/\\w+)").all());
page.addTargetRequests(page.getHtml().links().regex("(https://github\\.com/\\w+)").all()); page.addTargetRequests(page.getHtml().links().regex("(https://github\\.com/\\w+)").all());
GithubRepo githubRepo = githubRepoPageMapper.get(page); GithubRepo githubRepo = githubRepoPageMapper.get(page);
page.putField("repo",githubRepo); if (githubRepo == null) {
page.setSkip(true);
} else {
page.putField("repo", githubRepo);
}
} }
@Override @Override
...@@ -30,6 +35,6 @@ public class GithubRepoPageProcessor implements PageProcessor { ...@@ -30,6 +35,6 @@ public class GithubRepoPageProcessor implements PageProcessor {
} }
public static void main(String[] args) { public static void main(String[] args) {
Spider.create(new GithubRepoPageProcessor()).addUrl("https://github.com/code4craft").thread(5).run(); Spider.create(new GithubRepoPageMapper()).addUrl("https://github.com/code4craft").thread(5).run();
} }
} }
\ No newline at end of file
...@@ -2,6 +2,8 @@ package us.codecraft.webmagic.model; ...@@ -2,6 +2,8 @@ package us.codecraft.webmagic.model;
import us.codecraft.webmagic.Page; import us.codecraft.webmagic.Page;
import java.util.List;
/** /**
* @author code4crafer@gmail.com * @author code4crafer@gmail.com
* @since 0.5.2 * @since 0.5.2
...@@ -10,11 +12,18 @@ public class PageMapper<T> { ...@@ -10,11 +12,18 @@ public class PageMapper<T> {
private Class<T> clazz; private Class<T> clazz;
private PageModelExtractor pageModelExtractor;
public PageMapper(Class<T> clazz) { public PageMapper(Class<T> clazz) {
this.clazz = clazz; this.clazz = clazz;
this.pageModelExtractor = PageModelExtractor.create(clazz);
}
public T get(Page page) {
return (T) pageModelExtractor.process(page);
} }
public T get(Page page){ public List<T> getAll(Page page) {
return null; return (List<T>) pageModelExtractor.process(page);
} }
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment