Commit a6f8ed54 authored by yihua.huang's avatar yihua.huang

complete formatter refactor by ObjectFormatterBuilder #586

parent b1ef61b2
...@@ -5,9 +5,8 @@ import org.slf4j.Logger; ...@@ -5,9 +5,8 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import us.codecraft.webmagic.Page; import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.model.annotation.*; import us.codecraft.webmagic.model.annotation.*;
import us.codecraft.webmagic.model.formatter.BasicTypeFormatter;
import us.codecraft.webmagic.model.formatter.ObjectFormatter; import us.codecraft.webmagic.model.formatter.ObjectFormatter;
import us.codecraft.webmagic.model.formatter.ObjectFormatters; import us.codecraft.webmagic.model.formatter.ObjectFormatterBuilder;
import us.codecraft.webmagic.selector.*; import us.codecraft.webmagic.selector.*;
import us.codecraft.webmagic.utils.ClassUtils; import us.codecraft.webmagic.utils.ClassUtils;
import us.codecraft.webmagic.utils.ExtractorUtils; import us.codecraft.webmagic.utils.ExtractorUtils;
...@@ -70,58 +69,12 @@ class PageModelExtractor { ...@@ -70,58 +69,12 @@ class PageModelExtractor {
fieldExtractor = fieldExtractorTmp; fieldExtractor = fieldExtractorTmp;
} }
if (fieldExtractor != null) { if (fieldExtractor != null) {
checkFormat(field, fieldExtractor); fieldExtractor.setObjectFormatter(new ObjectFormatterBuilder().setField(field).build());
fieldExtractors.add(fieldExtractor); fieldExtractors.add(fieldExtractor);
} }
} }
} }
private void checkFormat(Field field, FieldExtractor fieldExtractor) {
//check custom formatter
Formatter formatter = field.getAnnotation(Formatter.class);
if (formatter == null) {
return;
}
if (!formatter.formatter().equals(Formatter.DEFAULT_FORMATTER)) {
ObjectFormatter objectFormatter = initFormatter(formatter.formatter(), formatter.value());
fieldExtractor.setObjectFormatter(objectFormatter);
return;
}
if (!fieldExtractor.isMulti() && !String.class.isAssignableFrom(field.getType())) {
Class<?> fieldClazz = BasicTypeFormatter.detectBasicClass(field.getType());
ObjectFormatter objectFormatter = initFormatter(ObjectFormatters.get(fieldClazz), formatter.value());
if (objectFormatter == null) {
throw new IllegalStateException("Can't find formatter for field " + field.getName() + " of type " + fieldClazz);
} else {
fieldExtractor.setObjectFormatter(objectFormatter);
}
} else if (fieldExtractor.isMulti()) {
if (!List.class.isAssignableFrom(field.getType())) {
throw new IllegalStateException("Field " + field.getName() + " must be list");
}
if (!formatter.subClazz().equals(Void.class)) {
ObjectFormatter objectFormatter = initFormatter(ObjectFormatters.get(formatter.subClazz()), formatter.value());
if (objectFormatter == null) {
throw new IllegalStateException("Can't find formatter for field " + field.getName() + " of type " + formatter.subClazz());
} else {
fieldExtractor.setObjectFormatter(objectFormatter);
}
}
}
}
private ObjectFormatter initFormatter(Class<? extends ObjectFormatter> formatterClazz, String[] params) {
try {
ObjectFormatter objectFormatter = formatterClazz.newInstance();
objectFormatter.initParam(params);
return objectFormatter;
} catch (InstantiationException e) {
throw new RuntimeException(e);
} catch (IllegalAccessException e) {
throw new RuntimeException(e);
}
}
private FieldExtractor getAnnotationExtractByUrl(Class clazz, Field field) { private FieldExtractor getAnnotationExtractByUrl(Class clazz, Field field) {
FieldExtractor fieldExtractor = null; FieldExtractor fieldExtractor = null;
ExtractByUrl extractByUrl = field.getAnnotation(ExtractByUrl.class); ExtractByUrl extractByUrl = field.getAnnotation(ExtractByUrl.class);
......
package us.codecraft.webmagic.model.formatter;
import us.codecraft.webmagic.model.annotation.Formatter;
import java.lang.reflect.Field;
import java.util.List;
/**
* @author code4crafter@gmail.com
* @since 0.7.0
* Date: 2017/6/3
*/
public class ObjectFormatterBuilder {
private Field field;
public ObjectFormatterBuilder setField(Field field) {
this.field = field;
return this;
}
private ObjectFormatter initFormatterForType(Class<?> fieldClazz, String[] params) {
if (fieldClazz.equals(String.class) || List.class.isAssignableFrom(fieldClazz)){
return null;
}
Class<? extends ObjectFormatter> formatterClass = ObjectFormatters.get(BasicTypeFormatter.detectBasicClass(fieldClazz));
if (formatterClass == null) {
throw new IllegalStateException("Can't find formatter for field " + field.getName() + " of type " + fieldClazz);
}
return initFormatter(formatterClass, params);
}
private ObjectFormatter initFormatter(Class<? extends ObjectFormatter> formatterClazz, String[] params) {
try {
ObjectFormatter objectFormatter = formatterClazz.newInstance();
objectFormatter.initParam(params);
return objectFormatter;
} catch (InstantiationException e) {
throw new RuntimeException(e);
} catch (IllegalAccessException e) {
throw new RuntimeException(e);
}
}
public ObjectFormatter build() {
Formatter formatter = field.getAnnotation(Formatter.class);
if (formatter != null && !formatter.formatter().equals(Formatter.DEFAULT_FORMATTER)) {
return initFormatter(formatter.formatter(), formatter.value());
}
if (formatter == null || formatter.subClazz().equals(Void.class)) {
return initFormatterForType(field.getType(), formatter != null ? formatter.value() : null);
} else {
return initFormatterForType(formatter.subClazz(), formatter.value());
}
}
}
...@@ -22,9 +22,9 @@ public class ObjectFormatters { ...@@ -22,9 +22,9 @@ public class ObjectFormatters {
try { try {
formatterMap.put(objectFormatter.newInstance().clazz(), objectFormatter); formatterMap.put(objectFormatter.newInstance().clazz(), objectFormatter);
} catch (InstantiationException e) { } catch (InstantiationException e) {
e.printStackTrace(); throw new RuntimeException(e);
} catch (IllegalAccessException e) { } catch (IllegalAccessException e) {
e.printStackTrace(); throw new RuntimeException(e);
} }
} }
......
package us.codecraft.webmagic.model; package us.codecraft.webmagic.model;
import org.apache.commons.lang3.time.DateFormatUtils; import org.apache.commons.lang3.time.DateFormatUtils;
import org.apache.commons.lang3.time.DateUtils;
import org.junit.Test; import org.junit.Test;
import us.codecraft.webmagic.model.annotation.ExtractBy; import us.codecraft.webmagic.model.annotation.ExtractBy;
import us.codecraft.webmagic.model.annotation.Formatter; import us.codecraft.webmagic.model.annotation.Formatter;
...@@ -44,7 +45,7 @@ public class PageModelExtractorTest { ...@@ -44,7 +45,7 @@ public class PageModelExtractorTest {
public static class ModelStringList { public static class ModelStringList {
@ExtractBy("//a/@href") @ExtractBy("//li[@class='list']/a/@href")
private List<String> links; private List<String> links;
} }
...@@ -86,18 +87,18 @@ public class PageModelExtractorTest { ...@@ -86,18 +87,18 @@ public class PageModelExtractorTest {
@Test @Test
public void testExtractList() throws Exception { public void testExtractList() throws Exception {
ModelStringList modelDate = (ModelStringList) PageModelExtractor.create(ModelStringList.class).process(pageMocker.getMockPage()); ModelStringList modelDate = (ModelStringList) PageModelExtractor.create(ModelStringList.class).process(pageMocker.getMockPage());
assertThat(modelDate.links).hasSize(8); assertThat(modelDate.links).containsExactly("http://webmagic.io/list/1","http://webmagic.io/list/2","http://webmagic.io/list/3","http://webmagic.io/list/4");
} }
@Test @Test
public void testExtractIntList() throws Exception { public void testExtractIntList() throws Exception {
ModelIntList modelDate = (ModelIntList) PageModelExtractor.create(ModelIntList.class).process(pageMocker.getMockPage()); ModelIntList modelDate = (ModelIntList) PageModelExtractor.create(ModelIntList.class).process(pageMocker.getMockPage());
assertThat(modelDate.numbers).hasSize(4); assertThat(modelDate.numbers).containsExactly(1,2,3,4);
} }
@Test @Test
public void testExtractDateList() throws Exception { public void testExtractDateList() throws Exception {
ModelDateList modelDate = (ModelDateList) PageModelExtractor.create(ModelDateList.class).process(pageMocker.getMockPage()); ModelDateList modelDate = (ModelDateList) PageModelExtractor.create(ModelDateList.class).process(pageMocker.getMockPage());
assertThat(modelDate.dates).hasSize(4); assertThat(modelDate.dates).containsExactly(DateUtils.parseDate("20170601", "yyyyMMdd"), DateUtils.parseDate("20170602", "yyyyMMdd"), DateUtils.parseDate("20170603", "yyyyMMdd"), DateUtils.parseDate("20170604", "yyyyMMdd"));
} }
} }
...@@ -10,14 +10,14 @@ ...@@ -10,14 +10,14 @@
<ul> <ul>
<li class="list"><a href="http://webmagic.io/list/1"></a></li> <li class="list"><a href="http://webmagic.io/list/1"></a></li>
<li class="list"><a href="http://webmagic.io/list/2"></a></li> <li class="list"><a href="http://webmagic.io/list/2"></a></li>
<li class="list"><a href="http://webmagic.io/post/3"></a></li> <li class="list"><a href="http://webmagic.io/list/3"></a></li>
<li class="list"><a href="http://webmagic.io/post/4"></a></li> <li class="list"><a href="http://webmagic.io/list/4"></a></li>
</ul> </ul>
<ul> <ul>
<li class="post"><a href="http://webmagic.io/post/1"></a></li> <li class="post"><a href="http://webmagic.io/post/1"></a></li>
<li class="post"><a href="http://webmagic.io/post/2"></a></li> <li class="post"><a href="http://webmagic.io/post/2"></a></li>
<li class="post"><a href="http://webmagic.io/list/3"></a></li> <li class="post"><a href="http://webmagic.io/post/3"></a></li>
<li class="post"><a href="http://webmagic.io/list/4"></a></li> <li class="post"><a href="http://webmagic.io/post/4"></a></li>
</ul> </ul>
<ul> <ul>
<li class="numbers">1</li> <li class="numbers">1</li>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment