Commit 250cc5e6 authored by yihua.huang's avatar yihua.huang

change formatter to class

parent b1821624
package us.codecraft.webmagic.model; package us.codecraft.webmagic.example;
import junit.framework.Assert; import us.codecraft.webmagic.model.HasKey;
import org.junit.Test;
import us.codecraft.webmagic.MockDownloader;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.model.annotation.ExtractBy; import us.codecraft.webmagic.model.annotation.ExtractBy;
import us.codecraft.webmagic.model.annotation.ExtractByUrl; import us.codecraft.webmagic.model.annotation.ExtractByUrl;
import us.codecraft.webmagic.model.annotation.HelpUrl; import us.codecraft.webmagic.model.annotation.HelpUrl;
...@@ -25,10 +21,10 @@ public class GithubRepo implements HasKey { ...@@ -25,10 +21,10 @@ public class GithubRepo implements HasKey {
@ExtractByUrl("https://github\\.com/(\\w+)/.*") @ExtractByUrl("https://github\\.com/(\\w+)/.*")
private String author; private String author;
@ExtractBy("//div[@id='readme']") @ExtractBy("//div[@id='readme']/tidyText()")
private String readme; private String readme;
@ExtractBy(value = "//div[@class='repository-lang-stats']//li//span[@class='lang']", multi = true) @ExtractBy(value = "//div[@class='repository-lang-stats']//li//span[@class='lang']/text()", multi = true)
private List<String> language; private List<String> language;
@ExtractBy("//ul[@class='pagehead-actions']/li[2]//a[@class='social-count js-social-count']/text()") @ExtractBy("//ul[@class='pagehead-actions']/li[2]//a[@class='social-count js-social-count']/text()")
...@@ -40,18 +36,6 @@ public class GithubRepo implements HasKey { ...@@ -40,18 +36,6 @@ public class GithubRepo implements HasKey {
@ExtractByUrl @ExtractByUrl
private String url; private String url;
@Test
public void test() {
OOSpider.create(Site.me().addStartUrl("https://github.com/code4craft/webmagic").setSleepTime(0)
, new PageModelPipeline<GithubRepo>() {
@Override
public void process(GithubRepo o, Task task) {
Assert.assertEquals(78, o.getStar());
Assert.assertEquals(65, o.getFork());
}
}, GithubRepo.class).setDownloader(new MockDownloader()).test("https://github.com/code4craft/webmagic");
}
@Override @Override
public String key() { public String key() {
return author + ":" + name; return author + ":" + name;
......
...@@ -105,15 +105,15 @@ class PageModelExtractor { ...@@ -105,15 +105,15 @@ class PageModelExtractor {
Formatter formatter = field.getAnnotation(Formatter.class); Formatter formatter = field.getAnnotation(Formatter.class);
if (formatter != null) { if (formatter != null) {
if (!formatter.formatter().equals(ObjectFormatter.class)) { if (!formatter.formatter().equals(ObjectFormatter.class)) {
return initFormatter(formatter); return initFormatter(formatter.formatter());
} }
} }
return ObjectFormatters.get(fieldClazz); return initFormatter(ObjectFormatters.get(fieldClazz));
} }
private ObjectFormatter initFormatter(Formatter formatter) { private ObjectFormatter initFormatter(Class<? extends ObjectFormatter> formatterClazz) {
try { try {
return formatter.formatter().newInstance(); return formatterClazz.newInstance();
} catch (InstantiationException e) { } catch (InstantiationException e) {
logger.error("init ObjectFormatter fail", e); logger.error("init ObjectFormatter fail", e);
} catch (IllegalAccessException e) { } catch (IllegalAccessException e) {
......
...@@ -25,9 +25,9 @@ public abstract class BasicTypeFormatter<T> implements ObjectFormatter<T> { ...@@ -25,9 +25,9 @@ public abstract class BasicTypeFormatter<T> implements ObjectFormatter<T> {
protected abstract T formatTrimmed(String raw) throws Exception; protected abstract T formatTrimmed(String raw) throws Exception;
public static final List<ObjectFormatter> basicTypeFormatters = Arrays.<ObjectFormatter>asList(new IntegerFormatter(), public static final List<Class<? extends ObjectFormatter>> basicTypeFormatters = Arrays.<Class<? extends ObjectFormatter>>asList(IntegerFormatter.class,
new LongFormatter(), new DoubleFormatter(), new FloatFormatter(), new ShortFormatter(), LongFormatter.class, DoubleFormatter.class, FloatFormatter.class, ShortFormatter.class,
new CharactorFormatter(), new ByteFormatter(), new BooleanFormatter()); CharactorFormatter.class, ByteFormatter.class, BooleanFormatter.class);
public static Class<?> detectBasicClass(Class<?> type) { public static Class<?> detectBasicClass(Class<?> type) {
if (type.equals(Integer.TYPE) || type.equals(Integer.class)) { if (type.equals(Integer.TYPE) || type.equals(Integer.class)) {
......
...@@ -9,19 +9,26 @@ import java.util.concurrent.ConcurrentHashMap; ...@@ -9,19 +9,26 @@ import java.util.concurrent.ConcurrentHashMap;
*/ */
public class ObjectFormatters { public class ObjectFormatters {
private static Map<Class, ObjectFormatter> formatterMap = new ConcurrentHashMap<Class, ObjectFormatter>(); private static Map<Class, Class<? extends ObjectFormatter>> formatterMap = new ConcurrentHashMap<Class, Class<? extends ObjectFormatter>>();
static { static {
for (ObjectFormatter basicTypeFormatter : BasicTypeFormatter.basicTypeFormatters) { for (Class<? extends ObjectFormatter> basicTypeFormatter : BasicTypeFormatter.basicTypeFormatters) {
put(basicTypeFormatter); put(basicTypeFormatter);
} }
put(DateFormatter.class);
} }
public static void put(ObjectFormatter objectFormatter) { public static void put(Class<? extends ObjectFormatter> objectFormatter) {
formatterMap.put(objectFormatter.clazz(), objectFormatter); try {
formatterMap.put(objectFormatter.newInstance().clazz(), objectFormatter);
} catch (InstantiationException e) {
e.printStackTrace();
} catch (IllegalAccessException e) {
e.printStackTrace();
}
} }
public static <T> ObjectFormatter<T> get(Class<T> clazz){ public static Class<? extends ObjectFormatter> get(Class<?> clazz){
return formatterMap.get(clazz); return formatterMap.get(clazz);
} }
} }
package us.codecraft.webmagic.model;
import junit.framework.Assert;
import org.junit.Test;
import us.codecraft.webmagic.MockDownloader;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.example.GithubRepo;
/**
* @author code4crafter@gmail.com <br>
*/
public class GithubRepoTest {
@Test
public void test() {
OOSpider.create(Site.me().addStartUrl("https://github.com/code4craft/webmagic").setSleepTime(0)
, new PageModelPipeline<GithubRepo>() {
@Override
public void process(GithubRepo o, Task task) {
Assert.assertEquals(78, o.getStar());
Assert.assertEquals(65, o.getFork());
}
}, GithubRepo.class).setDownloader(new MockDownloader()).test("https://github.com/code4craft/webmagic");
}
}
package us.codecraft.webmagic.model.samples; package us.codecraft.webmagic.model.samples;
import us.codecraft.webmagic.Site; import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.model.HasKey;
import us.codecraft.webmagic.model.OOSpider; import us.codecraft.webmagic.model.OOSpider;
import us.codecraft.webmagic.model.annotation.ExtractBy; import us.codecraft.webmagic.model.annotation.ExtractBy;
import us.codecraft.webmagic.model.annotation.TargetUrl; import us.codecraft.webmagic.model.annotation.TargetUrl;
import us.codecraft.webmagic.pipeline.JsonFilePageModelPipeline; import us.codecraft.webmagic.pipeline.JsonFilePageModelPipeline;
import java.util.Date;
import java.util.List; import java.util.List;
/** /**
* @author code4crafter@gmail.com <br> * @author code4crafter@gmail.com <br>
*/ */
@TargetUrl("http://my.oschina.net/flashsword/blog/\\d+") @TargetUrl("http://my.oschina.net/flashsword/blog/\\d+")
public class OschinaBlog implements HasKey{ public class OschinaBlog{
@ExtractBy("//title") @ExtractBy("//title")
private String title; private String title;
...@@ -24,16 +24,14 @@ public class OschinaBlog implements HasKey{ ...@@ -24,16 +24,14 @@ public class OschinaBlog implements HasKey{
@ExtractBy(value = "//div[@class='BlogTags']/a/text()", multi = true) @ExtractBy(value = "//div[@class='BlogTags']/a/text()", multi = true)
private List<String> tags; private List<String> tags;
@ExtractBy("//div[class='BlogStat']/regex('\\d{4}-\\d{1,2}-\\d{1,2} \\d{1,2}:\\d{1,2}')")
private Date date;
public static void main(String[] args) { public static void main(String[] args) {
OOSpider.create(Site.me().addStartUrl("http://my.oschina.net/flashsword/blog") OOSpider.create(Site.me().addStartUrl("http://my.oschina.net/flashsword/blog")
,new JsonFilePageModelPipeline(), OschinaBlog.class).run(); ,new JsonFilePageModelPipeline(), OschinaBlog.class).run();
} }
@Override
public String key() {
return title;
}
public String getTitle() { public String getTitle() {
return title; return title;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment