Commit af809c4d authored by yihua.huang's avatar yihua.huang

update version to 0.5.0-snapshot

parent 6933029e
......@@ -6,7 +6,7 @@
<version>7</version>
</parent>
<groupId>us.codecraft</groupId>
<version>0.4.3-SNAPSHOT</version>
<version>0.5.0-SNAPSHOT</version>
<modelVersion>4.0.0</modelVersion>
<packaging>pom</packaging>
<properties>
......@@ -76,6 +76,16 @@
<artifactId>guava</artifactId>
<version>15.0</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.6</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.6</version>
</dependency>
<dependency>
<groupId>us.codecraft</groupId>
<artifactId>xsoup</artifactId>
......
......@@ -3,7 +3,7 @@
<parent>
<groupId>us.codecraft</groupId>
<artifactId>webmagic-parent</artifactId>
<version>0.4.3-SNAPSHOT</version>
<version>0.5.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
......@@ -23,7 +23,6 @@
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>15.0</version>
</dependency>
<dependency>
......@@ -37,8 +36,13 @@
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</dependency>
<dependency>
......
......@@ -2,7 +2,8 @@ package us.codecraft.webmagic;
import com.google.common.collect.Lists;
import org.apache.commons.collections.CollectionUtils;
import org.apache.log4j.Logger;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import us.codecraft.webmagic.downloader.Downloader;
import us.codecraft.webmagic.downloader.HttpClientDownloader;
import us.codecraft.webmagic.pipeline.CollectorPipeline;
......@@ -18,7 +19,10 @@ import us.codecraft.webmagic.utils.UrlUtils;
import java.io.Closeable;
import java.io.IOException;
import java.util.*;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.UUID;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
......@@ -72,7 +76,7 @@ public class Spider implements Runnable, Task {
protected Scheduler scheduler = new QueueScheduler();
protected Logger logger = Logger.getLogger(getClass());
protected Logger logger = LoggerFactory.getLogger(getClass());
protected ExecutorService executorService;
......
......@@ -3,7 +3,7 @@
<parent>
<groupId>us.codecraft</groupId>
<artifactId>webmagic-parent</artifactId>
<version>0.4.3-SNAPSHOT</version>
<version>0.5.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
......
package us.codecraft.webmagic.configurable;
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.Target;
/**
* @author yihua.huang@dianping.com
*/
@Retention(java.lang.annotation.RetentionPolicy.RUNTIME)
@Target({ElementType.FIELD})
public @interface Inject {
String value() default "";
}
package us.codecraft.webmagic.example;
import java.util.List;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.configurable.Inject;
import us.codecraft.webmagic.processor.PageProcessor;
/**
* @author code4crafter@gmail.com <br>
*/
public class ConfigurableBlogPageProcesser implements PageProcessor {
private Site site = Site.me().setDomain("my.oschina.net");
@Inject("linkRegex")
private String linkRegex;
@Inject("titleXpath")
private String titleXpath;
@Inject("contentXpath")
private String contentXpath;
@Inject("tagsXpath")
private String tagsXpath;
@Override
public void process(Page page) {
List<String> links = page.getHtml().links().regex(linkRegex).all();
page.addTargetRequests(links);
page.putField("title", page.getHtml().xpath(titleXpath).toString());
if (page.getResultItems().get("title") == null) {
//skip this page
page.setSkip(true);
}
page.putField("content", page.getHtml().smartContent().toString());
page.putField("tags", page.getHtml().xpath(tagsXpath).all());
}
@Override
public Site getSite() {
return site;
}
public static void main(String[] args) {
Spider.create(new ConfigurableBlogPageProcesser()).addUrl("http://my.oschina.net/flashsword/blog").thread(2).run();
}
}
......@@ -5,7 +5,7 @@
<parent>
<artifactId>webmagic-parent</artifactId>
<groupId>us.codecraft</groupId>
<version>0.4.3-SNAPSHOT</version>
<version>0.5.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
......
......@@ -5,19 +5,18 @@
<parent>
<artifactId>webmagic-parent</artifactId>
<groupId>us.codecraft</groupId>
<version>0.4.3-SNAPSHOT</version>
<version>0.5.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<groupId>us.codecraft</groupId>
<artifactId>webmagic-panel</artifactId>
<version>0.4.3-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>us.codecraft</groupId>
<artifactId>webmagic-scripts</artifactId>
<version>0.4.3-SNAPSHOT</version>
<version>${project.version}</version>
</dependency>
</dependencies>
......
......@@ -5,7 +5,7 @@
<parent>
<artifactId>webmagic-parent</artifactId>
<groupId>us.codecraft</groupId>
<version>0.4.3-SNAPSHOT</version>
<version>0.5.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
......
......@@ -5,7 +5,7 @@
<parent>
<artifactId>webmagic-parent</artifactId>
<groupId>us.codecraft</groupId>
<version>0.4.3-SNAPSHOT</version>
<version>0.5.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
......
......@@ -3,7 +3,7 @@
<parent>
<artifactId>webmagic-parent</artifactId>
<groupId>us.codecraft</groupId>
<version>0.4.3-SNAPSHOT</version>
<version>0.5.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
......
......@@ -5,7 +5,7 @@
<parent>
<artifactId>webmagic-parent</artifactId>
<groupId>us.codecraft</groupId>
<version>0.4.3-SNAPSHOT</version>
<version>0.5.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
......
......@@ -5,20 +5,19 @@
<parent>
<artifactId>webmagic-parent</artifactId>
<groupId>us.codecraft</groupId>
<version>0.4.3-SNAPSHOT</version>
<version>0.5.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<groupId>us.codecraft</groupId>
<artifactId>webmagic-worker</artifactId>
<version>0.4.3-SNAPSHOT</version>
<packaging>war</packaging>
<dependencies>
<dependency>
<groupId>us.codecraft</groupId>
<artifactId>webmagic-scripts</artifactId>
<version>0.4.3-SNAPSHOT</version>
<version>${project.version}</version>
</dependency>
<dependency>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment