Commit f08ffc34 authored by yihua.huang's avatar yihua.huang

rename

parent c5cf0564
...@@ -11,7 +11,7 @@ import java.lang.annotation.Target; ...@@ -11,7 +11,7 @@ import java.lang.annotation.Target;
*/ */
@Retention(java.lang.annotation.RetentionPolicy.RUNTIME) @Retention(java.lang.annotation.RetentionPolicy.RUNTIME)
@Target({ElementType.FIELD}) @Target({ElementType.FIELD})
public @interface Fetcher { public @interface ExtractBy {
String value(); String value();
......
...@@ -9,13 +9,13 @@ import java.lang.reflect.Field; ...@@ -9,13 +9,13 @@ import java.lang.reflect.Field;
* @date: 13-8-1 <br> * @date: 13-8-1 <br>
* Time: 下午9:48 <br> * Time: 下午9:48 <br>
*/ */
class FieldFetcher { class FieldExtractor {
private final Field field; private final Field field;
private final Selector selector; private final Selector selector;
FieldFetcher(Field field, Selector selector) { FieldExtractor(Field field, Selector selector) {
this.field = field; this.field = field;
this.selector = selector; this.selector = selector;
} }
......
...@@ -18,36 +18,36 @@ import java.util.regex.Pattern; ...@@ -18,36 +18,36 @@ import java.util.regex.Pattern;
*/ */
public class ObjectPageProcessor implements PageProcessor { public class ObjectPageProcessor implements PageProcessor {
private List<PageModelFetcher> pageModelFetcherList; private List<PageModelExtractor> pageModelExtractorList;
private Site site; private Site site;
private Set<Pattern> targetUrlPatterns; private Set<Pattern> targetUrlPatterns;
public static ObjectPageProcessor create(Site site, Class... clazzs) { public static ObjectPageProcessor create(Site site, Class... clazzs) {
List<PageModelFetcher> pageModelFetcherList = new ArrayList<PageModelFetcher>(); List<PageModelExtractor> pageModelExtractorList = new ArrayList<PageModelExtractor>();
for (Class clazz : clazzs) { for (Class clazz : clazzs) {
PageModelFetcher pageModelFetcher = PageModelFetcher.create(clazz); PageModelExtractor pageModelExtractor = PageModelExtractor.create(clazz);
pageModelFetcherList.add(pageModelFetcher); pageModelExtractorList.add(pageModelExtractor);
} }
ObjectPageProcessor objectPageProcessor = new ObjectPageProcessor(site, pageModelFetcherList); ObjectPageProcessor objectPageProcessor = new ObjectPageProcessor(site, pageModelExtractorList);
return objectPageProcessor; return objectPageProcessor;
} }
private ObjectPageProcessor(Site site, List<PageModelFetcher> pageModelFetcherList) { private ObjectPageProcessor(Site site, List<PageModelExtractor> pageModelExtractorList) {
this.site = site; this.site = site;
this.pageModelFetcherList = pageModelFetcherList; this.pageModelExtractorList = pageModelExtractorList;
targetUrlPatterns = new HashSet<Pattern>(); targetUrlPatterns = new HashSet<Pattern>();
for (PageModelFetcher pageModelFetcher : pageModelFetcherList) { for (PageModelExtractor pageModelExtractor : pageModelExtractorList) {
targetUrlPatterns.addAll(pageModelFetcher.getTargetUrlPatterns()); targetUrlPatterns.addAll(pageModelExtractor.getTargetUrlPatterns());
} }
} }
@Override @Override
public void process(Page page) { public void process(Page page) {
for (PageModelFetcher pageModelFetcher : pageModelFetcherList) { for (PageModelExtractor pageModelExtractor : pageModelExtractorList) {
Object process = pageModelFetcher.process(page); Object process = pageModelExtractor.process(page);
page.putField(pageModelFetcher.getClazz().getCanonicalName(), process); page.putField(pageModelExtractor.getClazz().getCanonicalName(), process);
} }
for (String link : page.getHtml().links().all()) { for (String link : page.getHtml().links().all()) {
for (Pattern targetUrlPattern : targetUrlPatterns) { for (Pattern targetUrlPattern : targetUrlPatterns) {
......
...@@ -17,30 +17,30 @@ import java.util.regex.Pattern; ...@@ -17,30 +17,30 @@ import java.util.regex.Pattern;
* @date: 13-8-1 <br> * @date: 13-8-1 <br>
* Time: 下午9:33 <br> * Time: 下午9:33 <br>
*/ */
class PageModelFetcher { class PageModelExtractor {
private List<Pattern> targetUrlPatterns; private List<Pattern> targetUrlPatterns;
private Class clazz; private Class clazz;
private List<FieldFetcher> fieldFetchers; private List<FieldExtractor> fieldExtractors;
public static PageModelFetcher create(Class clazz) { public static PageModelExtractor create(Class clazz) {
PageModelFetcher pageModelFetcher = new PageModelFetcher(); PageModelExtractor pageModelExtractor = new PageModelExtractor();
pageModelFetcher.init(clazz); pageModelExtractor.init(clazz);
return pageModelFetcher; return pageModelExtractor;
} }
private void init(Class clazz) { private void init(Class clazz) {
this.clazz = clazz; this.clazz = clazz;
initTargetUrlPatterns(); initTargetUrlPatterns();
fieldFetchers = new ArrayList<FieldFetcher>(); fieldExtractors = new ArrayList<FieldExtractor>();
for (Field field : clazz.getDeclaredFields()) { for (Field field : clazz.getDeclaredFields()) {
field.setAccessible(true); field.setAccessible(true);
Fetcher fetcher = field.getAnnotation(Fetcher.class); ExtractBy extractBy = field.getAnnotation(ExtractBy.class);
String value = fetcher.value(); String value = extractBy.value();
Selector selector; Selector selector;
switch (fetcher.type()) { switch (extractBy.type()) {
case Css: case Css:
selector = new CssSelector(value); selector = new CssSelector(value);
break; break;
...@@ -53,7 +53,7 @@ class PageModelFetcher { ...@@ -53,7 +53,7 @@ class PageModelFetcher {
default: default:
selector = new XpathSelector(value); selector = new XpathSelector(value);
} }
fieldFetchers.add(new FieldFetcher(field, selector)); fieldExtractors.add(new FieldExtractor(field, selector));
} }
} }
...@@ -83,8 +83,8 @@ class PageModelFetcher { ...@@ -83,8 +83,8 @@ class PageModelFetcher {
Object o = null; Object o = null;
try { try {
o = clazz.newInstance(); o = clazz.newInstance();
for (FieldFetcher fieldFetcher : fieldFetchers) { for (FieldExtractor fieldExtractor : fieldExtractors) {
fieldFetcher.getField().set(o, fieldFetcher.getSelector().select(page.getHtml().toString())); fieldExtractor.getField().set(o, fieldExtractor.getSelector().select(page.getHtml().toString()));
} }
} catch (InstantiationException e) { } catch (InstantiationException e) {
e.printStackTrace(); e.printStackTrace();
......
...@@ -8,10 +8,10 @@ package us.codecraft.webmagic.annotation; ...@@ -8,10 +8,10 @@ package us.codecraft.webmagic.annotation;
@TargetUrl("http://my.oschina.net/flashsword/blog/*") @TargetUrl("http://my.oschina.net/flashsword/blog/*")
public class Blog { public class Blog {
@Fetcher("//title") @ExtractBy("//title")
private String title; private String title;
@Fetcher(value = "div.BlogContent",type = Fetcher.Type.Css) @ExtractBy(value = "div.BlogContent",type = ExtractBy.Type.Css)
private String content; private String content;
@Override @Override
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment