Commit 9b773060 authored by yihua.huang's avatar yihua.huang

test of ExtractByUrl #586

parent d8bd0637
...@@ -128,7 +128,7 @@ class PageModelExtractor { ...@@ -128,7 +128,7 @@ class PageModelExtractor {
FieldExtractor.Source source = null; FieldExtractor.Source source = null;
switch (extractBy.source()){ switch (extractBy.source()){
case RawText: case RawText:
source = FieldExtractor.Source.RawText; source = FieldExtractor.Source.RawText;
break; break;
case RawHtml: case RawHtml:
...@@ -144,10 +144,7 @@ class PageModelExtractor { ...@@ -144,10 +144,7 @@ class PageModelExtractor {
fieldExtractor = new FieldExtractor(field, selector, source, fieldExtractor = new FieldExtractor(field, selector, source,
extractBy.notNull(), List.class.isAssignableFrom(field.getType())); extractBy.notNull(), List.class.isAssignableFrom(field.getType()));
Method setterMethod = getSetterMethod(clazz, field); fieldExtractor.setSetterMethod(getSetterMethod(clazz, field));
if (setterMethod != null) {
fieldExtractor.setSetterMethod(setterMethod);
}
} }
return fieldExtractor; return fieldExtractor;
} }
......
...@@ -25,22 +25,17 @@ public class ExtractorUtils { ...@@ -25,22 +25,17 @@ public class ExtractorUtils {
selector = new RegexSelector(value); selector = new RegexSelector(value);
break; break;
case XPath: case XPath:
selector = getXpathSelector(value); selector = new XpathSelector(value);
break; break;
case JsonPath: case JsonPath:
selector = new JsonPathSelector(value); selector = new JsonPathSelector(value);
break; break;
default: default:
selector = getXpathSelector(value); selector = new XpathSelector(value);
} }
return selector; return selector;
} }
private static Selector getXpathSelector(String value) {
Selector selector = new XpathSelector(value);
return selector;
}
public static List<Selector> getSelectors(ExtractBy[] extractBies) { public static List<Selector> getSelectors(ExtractBy[] extractBies) {
List<Selector> selectors = new ArrayList<Selector>(); List<Selector> selectors = new ArrayList<Selector>();
if (extractBies == null) { if (extractBies == null) {
......
...@@ -4,6 +4,7 @@ import org.apache.commons.lang3.time.DateFormatUtils; ...@@ -4,6 +4,7 @@ import org.apache.commons.lang3.time.DateFormatUtils;
import org.apache.commons.lang3.time.DateUtils; import org.apache.commons.lang3.time.DateUtils;
import org.junit.Test; import org.junit.Test;
import us.codecraft.webmagic.model.annotation.ExtractBy; import us.codecraft.webmagic.model.annotation.ExtractBy;
import us.codecraft.webmagic.model.annotation.ExtractByUrl;
import us.codecraft.webmagic.model.annotation.Formatter; import us.codecraft.webmagic.model.annotation.Formatter;
import us.codecraft.webmagic.model.formatter.DateFormatter; import us.codecraft.webmagic.model.formatter.DateFormatter;
...@@ -74,6 +75,20 @@ public class PageModelExtractorTest { ...@@ -74,6 +75,20 @@ public class PageModelExtractorTest {
} }
public static class ModelJsonStr {
@ExtractBy(type = ExtractBy.Type.JsonPath, value = "$.name",source = ExtractBy.Source.RawText)
private String name;
}
public static class ModelUrl {
@ExtractByUrl("https://api\\.github\\.com/repos/\\w+/(\\w+)")
private String name;
}
@Test @Test
public void testXpath() throws Exception { public void testXpath() throws Exception {
ModelDateStr modelDate = (ModelDateStr) PageModelExtractor.create(ModelDateStr.class).process(pageMocker.getMockPage()); ModelDateStr modelDate = (ModelDateStr) PageModelExtractor.create(ModelDateStr.class).process(pageMocker.getMockPage());
...@@ -115,4 +130,16 @@ public class PageModelExtractorTest { ...@@ -115,4 +130,16 @@ public class PageModelExtractorTest {
ModelCustomList modelDate = (ModelCustomList) PageModelExtractor.create(ModelCustomList.class).process(pageMocker.getMockPage()); ModelCustomList modelDate = (ModelCustomList) PageModelExtractor.create(ModelCustomList.class).process(pageMocker.getMockPage());
assertThat(modelDate.dates).containsExactly(DateUtils.parseDate("20170601", "yyyyMMdd"), DateUtils.parseDate("20170602", "yyyyMMdd"), DateUtils.parseDate("20170603", "yyyyMMdd"), DateUtils.parseDate("20170604", "yyyyMMdd")); assertThat(modelDate.dates).containsExactly(DateUtils.parseDate("20170601", "yyyyMMdd"), DateUtils.parseDate("20170602", "yyyyMMdd"), DateUtils.parseDate("20170603", "yyyyMMdd"), DateUtils.parseDate("20170604", "yyyyMMdd"));
} }
@Test
public void testExtractJson() throws Exception {
ModelJsonStr modelDate = (ModelJsonStr) PageModelExtractor.create(ModelJsonStr.class).process(pageMocker.getMockJsonPage());
assertThat(modelDate.name).isEqualTo("webmagic");
}
@Test
public void testExtractByUrl() throws Exception {
ModelUrl modelDate = (ModelUrl) PageModelExtractor.create(ModelUrl.class).process(pageMocker.getMockJsonPage());
assertThat(modelDate.name).isEqualTo("webmagic");
}
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment