Commit fa33b158 authored by yihua.huang's avatar yihua.huang

property loader

parent 362fdd06
...@@ -13,7 +13,7 @@ import java.util.List; ...@@ -13,7 +13,7 @@ import java.util.List;
* @author code4crafter@gmail.com <br> * @author code4crafter@gmail.com <br>
* @since 0.4.0 * @since 0.4.0
*/ */
public class BaiduBaikePageProcesser implements PageProcessor { public class BaiduBaikePageProcessor implements PageProcessor {
private Site site = Site.me()//.setHttpProxy(new HttpHost("127.0.0.1",8888)) private Site site = Site.me()//.setHttpProxy(new HttpHost("127.0.0.1",8888))
.setRetryTimes(3).setSleepTime(1000).setUseGzip(true); .setRetryTimes(3).setSleepTime(1000).setUseGzip(true);
...@@ -31,7 +31,7 @@ public class BaiduBaikePageProcesser implements PageProcessor { ...@@ -31,7 +31,7 @@ public class BaiduBaikePageProcesser implements PageProcessor {
public static void main(String[] args) { public static void main(String[] args) {
//single download //single download
Spider spider = Spider.create(new BaiduBaikePageProcesser()).thread(2); Spider spider = Spider.create(new BaiduBaikePageProcessor()).thread(2);
String urlTemplate = "http://baike.baidu.com/search/word?word=%s&pic=1&sug=1&enc=utf8"; String urlTemplate = "http://baike.baidu.com/search/word?word=%s&pic=1&sug=1&enc=utf8";
ResultItems resultItems = spider.<ResultItems>get(String.format(urlTemplate, "水力发电")); ResultItems resultItems = spider.<ResultItems>get(String.format(urlTemplate, "水力发电"));
System.out.println(resultItems); System.out.println(resultItems);
......
...@@ -9,7 +9,7 @@ import us.codecraft.webmagic.processor.PageProcessor; ...@@ -9,7 +9,7 @@ import us.codecraft.webmagic.processor.PageProcessor;
* @author code4crafter@gmail.com <br> * @author code4crafter@gmail.com <br>
* @since 0.3.2 * @since 0.3.2
*/ */
public class GithubRepoPageProcesser implements PageProcessor { public class GithubRepoPageProcessor implements PageProcessor {
private Site site = Site.me().setRetryTimes(3).setSleepTime(100); private Site site = Site.me().setRetryTimes(3).setSleepTime(100);
...@@ -31,6 +31,6 @@ public class GithubRepoPageProcesser implements PageProcessor { ...@@ -31,6 +31,6 @@ public class GithubRepoPageProcesser implements PageProcessor {
} }
public static void main(String[] args) { public static void main(String[] args) {
Spider.create(new GithubRepoPageProcesser()).addUrl("https://github.com/code4craft").thread(5).run(); Spider.create(new GithubRepoPageProcessor()).addUrl("https://github.com/code4craft").thread(5).run();
} }
} }
...@@ -10,7 +10,7 @@ import java.util.List; ...@@ -10,7 +10,7 @@ import java.util.List;
/** /**
* @author code4crafter@gmail.com <br> * @author code4crafter@gmail.com <br>
*/ */
public class OschinaBlogPageProcesser implements PageProcessor { public class OschinaBlogPageProcessor implements PageProcessor {
private Site site = Site.me().setDomain("my.oschina.net"); private Site site = Site.me().setDomain("my.oschina.net");
...@@ -34,6 +34,6 @@ public class OschinaBlogPageProcesser implements PageProcessor { ...@@ -34,6 +34,6 @@ public class OschinaBlogPageProcesser implements PageProcessor {
} }
public static void main(String[] args) { public static void main(String[] args) {
Spider.create(new OschinaBlogPageProcesser()).addUrl("http://my.oschina.net/flashsword/blog").thread(2).run(); Spider.create(new OschinaBlogPageProcessor()).addUrl("http://my.oschina.net/flashsword/blog").thread(2).run();
} }
} }
package us.codecraft.webmagic.configurable;
import us.codecraft.webmagic.processor.PageProcessor;
import java.util.Map;
/**
* @author yihua.huang@dianping.com
*/
public interface PropertyLoader<T> {
PropertyLoader<T> clazz(Class<?> clazz);
T load(Map<String, String> properties);
}
...@@ -10,7 +10,7 @@ import us.codecraft.webmagic.processor.PageProcessor; ...@@ -10,7 +10,7 @@ import us.codecraft.webmagic.processor.PageProcessor;
/** /**
* @author code4crafter@gmail.com <br> * @author code4crafter@gmail.com <br>
*/ */
public class ConfigurableBlogPageProcesser implements PageProcessor { public class ConfigurableBlogPageProcessor implements PageProcessor {
private Site site = Site.me().setDomain("my.oschina.net"); private Site site = Site.me().setDomain("my.oschina.net");
...@@ -46,6 +46,6 @@ public class ConfigurableBlogPageProcesser implements PageProcessor { ...@@ -46,6 +46,6 @@ public class ConfigurableBlogPageProcesser implements PageProcessor {
} }
public static void main(String[] args) { public static void main(String[] args) {
Spider.create(new ConfigurableBlogPageProcesser()).addUrl("http://my.oschina.net/flashsword/blog").thread(2).run(); Spider.create(new ConfigurableBlogPageProcessor()).addUrl("http://my.oschina.net/flashsword/blog").thread(2).run();
} }
} }
...@@ -769,7 +769,7 @@ public class MockGithubDownloader implements Downloader{ ...@@ -769,7 +769,7 @@ public class MockGithubDownloader implements Downloader{
"\n" + "\n" +
"<p>Write a class implements PageProcessor:</p>\n" + "<p>Write a class implements PageProcessor:</p>\n" +
"\n" + "\n" +
"<div class=\"highlight highlight-java\"><pre> <span class=\"kd\">public</span> <span class=\"kd\">class</span> <span class=\"nc\">OschinaBlogPageProcesser</span> <span class=\"kd\">implements</span> <span class=\"n\">PageProcessor</span> <span class=\"o\">{</span>\n" + "<div class=\"highlight highlight-java\"><pre> <span class=\"kd\">public</span> <span class=\"kd\">class</span> <span class=\"nc\">OschinaBlogPageProcessor</span> <span class=\"kd\">implements</span> <span class=\"n\">PageProcessor</span> <span class=\"o\">{</span>\n" +
"\n" + "\n" +
" <span class=\"kd\">private</span> <span class=\"n\">Site</span> <span class=\"n\">site</span> <span class=\"o\">=</span> <span class=\"n\">Site</span><span class=\"o\">.</span><span class=\"na\">me</span><span class=\"o\">().</span><span class=\"na\">setDomain</span><span class=\"o\">(</span><span class=\"s\">\"my.oschina.net\"</span><span class=\"o\">)</span>\n" + " <span class=\"kd\">private</span> <span class=\"n\">Site</span> <span class=\"n\">site</span> <span class=\"o\">=</span> <span class=\"n\">Site</span><span class=\"o\">.</span><span class=\"na\">me</span><span class=\"o\">().</span><span class=\"na\">setDomain</span><span class=\"o\">(</span><span class=\"s\">\"my.oschina.net\"</span><span class=\"o\">)</span>\n" +
" <span class=\"o\">.</span><span class=\"na\">addStartUrl</span><span class=\"o\">(</span><span class=\"s\">\"http://my.oschina.net/flashsword/blog\"</span><span class=\"o\">);</span>\n" + " <span class=\"o\">.</span><span class=\"na\">addStartUrl</span><span class=\"o\">(</span><span class=\"s\">\"http://my.oschina.net/flashsword/blog\"</span><span class=\"o\">);</span>\n" +
...@@ -790,7 +790,7 @@ public class MockGithubDownloader implements Downloader{ ...@@ -790,7 +790,7 @@ public class MockGithubDownloader implements Downloader{
" <span class=\"o\">}</span>\n" + " <span class=\"o\">}</span>\n" +
"\n" + "\n" +
" <span class=\"kd\">public</span> <span class=\"kd\">static</span> <span class=\"kt\">void</span> <span class=\"nf\">main</span><span class=\"o\">(</span><span class=\"n\">String</span><span class=\"o\">[]</span> <span class=\"n\">args</span><span class=\"o\">)</span> <span class=\"o\">{</span>\n" + " <span class=\"kd\">public</span> <span class=\"kd\">static</span> <span class=\"kt\">void</span> <span class=\"nf\">main</span><span class=\"o\">(</span><span class=\"n\">String</span><span class=\"o\">[]</span> <span class=\"n\">args</span><span class=\"o\">)</span> <span class=\"o\">{</span>\n" +
" <span class=\"n\">Spider</span><span class=\"o\">.</span><span class=\"na\">create</span><span class=\"o\">(</span><span class=\"k\">new</span> <span class=\"n\">OschinaBlogPageProcesser</span><span class=\"o\">())</span>\n" + " <span class=\"n\">Spider</span><span class=\"o\">.</span><span class=\"na\">create</span><span class=\"o\">(</span><span class=\"k\">new</span> <span class=\"n\">OschinaBlogPageProcessor</span><span class=\"o\">())</span>\n" +
" <span class=\"o\">.</span><span class=\"na\">pipeline</span><span class=\"o\">(</span><span class=\"k\">new</span> <span class=\"n\">ConsolePipeline</span><span class=\"o\">()).</span><span class=\"na\">run</span><span class=\"o\">();</span>\n" + " <span class=\"o\">.</span><span class=\"na\">pipeline</span><span class=\"o\">(</span><span class=\"k\">new</span> <span class=\"n\">ConsolePipeline</span><span class=\"o\">()).</span><span class=\"na\">run</span><span class=\"o\">();</span>\n" +
" <span class=\"o\">}</span>\n" + " <span class=\"o\">}</span>\n" +
" <span class=\"o\">}</span>\n" + " <span class=\"o\">}</span>\n" +
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment