Commit af809c4d authored by yihua.huang's avatar yihua.huang

update version to 0.5.0-snapshot

parent 6933029e
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
<version>7</version> <version>7</version>
</parent> </parent>
<groupId>us.codecraft</groupId> <groupId>us.codecraft</groupId>
<version>0.4.3-SNAPSHOT</version> <version>0.5.0-SNAPSHOT</version>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<packaging>pom</packaging> <packaging>pom</packaging>
<properties> <properties>
...@@ -76,6 +76,16 @@ ...@@ -76,6 +76,16 @@
<artifactId>guava</artifactId> <artifactId>guava</artifactId>
<version>15.0</version> <version>15.0</version>
</dependency> </dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.6</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.6</version>
</dependency>
<dependency> <dependency>
<groupId>us.codecraft</groupId> <groupId>us.codecraft</groupId>
<artifactId>xsoup</artifactId> <artifactId>xsoup</artifactId>
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
<parent> <parent>
<groupId>us.codecraft</groupId> <groupId>us.codecraft</groupId>
<artifactId>webmagic-parent</artifactId> <artifactId>webmagic-parent</artifactId>
<version>0.4.3-SNAPSHOT</version> <version>0.5.0-SNAPSHOT</version>
</parent> </parent>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
...@@ -23,7 +23,6 @@ ...@@ -23,7 +23,6 @@
<dependency> <dependency>
<groupId>com.google.guava</groupId> <groupId>com.google.guava</groupId>
<artifactId>guava</artifactId> <artifactId>guava</artifactId>
<version>15.0</version>
</dependency> </dependency>
<dependency> <dependency>
...@@ -37,8 +36,13 @@ ...@@ -37,8 +36,13 @@
</dependency> </dependency>
<dependency> <dependency>
<groupId>log4j</groupId> <groupId>org.slf4j</groupId>
<artifactId>log4j</artifactId> <artifactId>slf4j-api</artifactId>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</dependency> </dependency>
<dependency> <dependency>
......
...@@ -2,7 +2,8 @@ package us.codecraft.webmagic; ...@@ -2,7 +2,8 @@ package us.codecraft.webmagic;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import org.apache.commons.collections.CollectionUtils; import org.apache.commons.collections.CollectionUtils;
import org.apache.log4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import us.codecraft.webmagic.downloader.Downloader; import us.codecraft.webmagic.downloader.Downloader;
import us.codecraft.webmagic.downloader.HttpClientDownloader; import us.codecraft.webmagic.downloader.HttpClientDownloader;
import us.codecraft.webmagic.pipeline.CollectorPipeline; import us.codecraft.webmagic.pipeline.CollectorPipeline;
...@@ -18,7 +19,10 @@ import us.codecraft.webmagic.utils.UrlUtils; ...@@ -18,7 +19,10 @@ import us.codecraft.webmagic.utils.UrlUtils;
import java.io.Closeable; import java.io.Closeable;
import java.io.IOException; import java.io.IOException;
import java.util.*; import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.UUID;
import java.util.concurrent.ExecutorService; import java.util.concurrent.ExecutorService;
import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicLong;
...@@ -72,7 +76,7 @@ public class Spider implements Runnable, Task { ...@@ -72,7 +76,7 @@ public class Spider implements Runnable, Task {
protected Scheduler scheduler = new QueueScheduler(); protected Scheduler scheduler = new QueueScheduler();
protected Logger logger = Logger.getLogger(getClass()); protected Logger logger = LoggerFactory.getLogger(getClass());
protected ExecutorService executorService; protected ExecutorService executorService;
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
<parent> <parent>
<groupId>us.codecraft</groupId> <groupId>us.codecraft</groupId>
<artifactId>webmagic-parent</artifactId> <artifactId>webmagic-parent</artifactId>
<version>0.4.3-SNAPSHOT</version> <version>0.5.0-SNAPSHOT</version>
</parent> </parent>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
......
package us.codecraft.webmagic.configurable;
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.Target;
/**
* @author yihua.huang@dianping.com
*/
@Retention(java.lang.annotation.RetentionPolicy.RUNTIME)
@Target({ElementType.FIELD})
public @interface Inject {
String value() default "";
}
package us.codecraft.webmagic.example;
import java.util.List;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.configurable.Inject;
import us.codecraft.webmagic.processor.PageProcessor;
/**
* @author code4crafter@gmail.com <br>
*/
public class ConfigurableBlogPageProcesser implements PageProcessor {
private Site site = Site.me().setDomain("my.oschina.net");
@Inject("linkRegex")
private String linkRegex;
@Inject("titleXpath")
private String titleXpath;
@Inject("contentXpath")
private String contentXpath;
@Inject("tagsXpath")
private String tagsXpath;
@Override
public void process(Page page) {
List<String> links = page.getHtml().links().regex(linkRegex).all();
page.addTargetRequests(links);
page.putField("title", page.getHtml().xpath(titleXpath).toString());
if (page.getResultItems().get("title") == null) {
//skip this page
page.setSkip(true);
}
page.putField("content", page.getHtml().smartContent().toString());
page.putField("tags", page.getHtml().xpath(tagsXpath).all());
}
@Override
public Site getSite() {
return site;
}
public static void main(String[] args) {
Spider.create(new ConfigurableBlogPageProcesser()).addUrl("http://my.oschina.net/flashsword/blog").thread(2).run();
}
}
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
<parent> <parent>
<artifactId>webmagic-parent</artifactId> <artifactId>webmagic-parent</artifactId>
<groupId>us.codecraft</groupId> <groupId>us.codecraft</groupId>
<version>0.4.3-SNAPSHOT</version> <version>0.5.0-SNAPSHOT</version>
</parent> </parent>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
......
...@@ -5,19 +5,18 @@ ...@@ -5,19 +5,18 @@
<parent> <parent>
<artifactId>webmagic-parent</artifactId> <artifactId>webmagic-parent</artifactId>
<groupId>us.codecraft</groupId> <groupId>us.codecraft</groupId>
<version>0.4.3-SNAPSHOT</version> <version>0.5.0-SNAPSHOT</version>
</parent> </parent>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<groupId>us.codecraft</groupId> <groupId>us.codecraft</groupId>
<artifactId>webmagic-panel</artifactId> <artifactId>webmagic-panel</artifactId>
<version>0.4.3-SNAPSHOT</version>
<dependencies> <dependencies>
<dependency> <dependency>
<groupId>us.codecraft</groupId> <groupId>us.codecraft</groupId>
<artifactId>webmagic-scripts</artifactId> <artifactId>webmagic-scripts</artifactId>
<version>0.4.3-SNAPSHOT</version> <version>${project.version}</version>
</dependency> </dependency>
</dependencies> </dependencies>
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
<parent> <parent>
<artifactId>webmagic-parent</artifactId> <artifactId>webmagic-parent</artifactId>
<groupId>us.codecraft</groupId> <groupId>us.codecraft</groupId>
<version>0.4.3-SNAPSHOT</version> <version>0.5.0-SNAPSHOT</version>
</parent> </parent>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
<parent> <parent>
<artifactId>webmagic-parent</artifactId> <artifactId>webmagic-parent</artifactId>
<groupId>us.codecraft</groupId> <groupId>us.codecraft</groupId>
<version>0.4.3-SNAPSHOT</version> <version>0.5.0-SNAPSHOT</version>
</parent> </parent>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
<parent> <parent>
<artifactId>webmagic-parent</artifactId> <artifactId>webmagic-parent</artifactId>
<groupId>us.codecraft</groupId> <groupId>us.codecraft</groupId>
<version>0.4.3-SNAPSHOT</version> <version>0.5.0-SNAPSHOT</version>
</parent> </parent>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
<parent> <parent>
<artifactId>webmagic-parent</artifactId> <artifactId>webmagic-parent</artifactId>
<groupId>us.codecraft</groupId> <groupId>us.codecraft</groupId>
<version>0.4.3-SNAPSHOT</version> <version>0.5.0-SNAPSHOT</version>
</parent> </parent>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
......
...@@ -5,20 +5,19 @@ ...@@ -5,20 +5,19 @@
<parent> <parent>
<artifactId>webmagic-parent</artifactId> <artifactId>webmagic-parent</artifactId>
<groupId>us.codecraft</groupId> <groupId>us.codecraft</groupId>
<version>0.4.3-SNAPSHOT</version> <version>0.5.0-SNAPSHOT</version>
</parent> </parent>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<groupId>us.codecraft</groupId> <groupId>us.codecraft</groupId>
<artifactId>webmagic-worker</artifactId> <artifactId>webmagic-worker</artifactId>
<version>0.4.3-SNAPSHOT</version>
<packaging>war</packaging> <packaging>war</packaging>
<dependencies> <dependencies>
<dependency> <dependency>
<groupId>us.codecraft</groupId> <groupId>us.codecraft</groupId>
<artifactId>webmagic-scripts</artifactId> <artifactId>webmagic-scripts</artifactId>
<version>0.4.3-SNAPSHOT</version> <version>${project.version}</version>
</dependency> </dependency>
<dependency> <dependency>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment