Commit 6cc1d62a authored by yihua.huang's avatar yihua.huang

bugfix: rawhtml do not work

parent a994b1c9
...@@ -4,6 +4,7 @@ import org.apache.commons.codec.digest.DigestUtils; ...@@ -4,6 +4,7 @@ import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import us.codecraft.webmagic.*; import us.codecraft.webmagic.*;
import us.codecraft.webmagic.model.annotation.Experimental;
import us.codecraft.webmagic.pipeline.Pipeline; import us.codecraft.webmagic.pipeline.Pipeline;
import us.codecraft.webmagic.processor.PageProcessor; import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.processor.SimplePageProcessor; import us.codecraft.webmagic.processor.SimplePageProcessor;
...@@ -20,6 +21,7 @@ import java.io.*; ...@@ -20,6 +21,7 @@ import java.io.*;
* @author code4crafter@gmail.com * @author code4crafter@gmail.com
* @since 0.2.1 * @since 0.2.1
*/ */
@Experimental
public class FileCache extends FilePersistentBase implements Downloader, Pipeline, PageProcessor { public class FileCache extends FilePersistentBase implements Downloader, Pipeline, PageProcessor {
private Downloader downloaderWhenFileMiss; private Downloader downloaderWhenFileMiss;
......
...@@ -23,6 +23,12 @@ import us.codecraft.webmagic.processor.PageProcessor; ...@@ -23,6 +23,12 @@ import us.codecraft.webmagic.processor.PageProcessor;
* private List<String> tags; * private List<String> tags;
* } * }
</pre> </pre>
* And start the spider by:
* <pre>
* OOSpider.create(Site.me().addStartUrl("http://my.oschina.net/flashsword/blog")
* ,new JsonFilePageModelPipeline(), OschinaBlog.class).run();
* }
</pre>
* @author code4crafter@gmail.com <br> * @author code4crafter@gmail.com <br>
* @since 0.2.0 * @since 0.2.0
*/ */
......
...@@ -105,7 +105,8 @@ class PageModelExtractor { ...@@ -105,7 +105,8 @@ class PageModelExtractor {
default: default:
selector = new AndSelector(ExtractorUtils.getSelectors(extractBies)); selector = new AndSelector(ExtractorUtils.getSelectors(extractBies));
} }
fieldExtractor = new FieldExtractor(field, selector, FieldExtractor.Source.Html, comboExtract.notNull(), comboExtract.multi()); fieldExtractor = new FieldExtractor(field, selector, comboExtract.source() == ComboExtract.Source.RawHtml ? FieldExtractor.Source.RawHtml : FieldExtractor.Source.Html,
comboExtract.notNull(), comboExtract.multi());
Method setterMethod = getSetterMethod(clazz, field); Method setterMethod = getSetterMethod(clazz, field);
if (setterMethod != null) { if (setterMethod != null) {
fieldExtractor.setSetterMethod(setterMethod); fieldExtractor.setSetterMethod(setterMethod);
...@@ -119,7 +120,8 @@ class PageModelExtractor { ...@@ -119,7 +120,8 @@ class PageModelExtractor {
ExtractBy extractBy = field.getAnnotation(ExtractBy.class); ExtractBy extractBy = field.getAnnotation(ExtractBy.class);
if (extractBy != null) { if (extractBy != null) {
Selector selector = ExtractorUtils.getSelector(extractBy); Selector selector = ExtractorUtils.getSelector(extractBy);
fieldExtractor = new FieldExtractor(field, selector, FieldExtractor.Source.Html, extractBy.notNull(), extractBy.multi()); fieldExtractor = new FieldExtractor(field, selector, extractBy.source() == ExtractBy.Source.RawHtml ? FieldExtractor.Source.RawHtml : FieldExtractor.Source.Html,
extractBy.notNull(), extractBy.multi());
Method setterMethod = getSetterMethod(clazz, field); Method setterMethod = getSetterMethod(clazz, field);
if (setterMethod != null) { if (setterMethod != null) {
fieldExtractor.setSetterMethod(setterMethod); fieldExtractor.setSetterMethod(setterMethod);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment