Commit c2d6d495 authored by yihua.huang's avatar yihua.huang

#41 add getThreadAlive(),getStatus,getPageCount() to spider

parent cf62d707
...@@ -6,9 +6,9 @@ import org.apache.log4j.Logger; ...@@ -6,9 +6,9 @@ import org.apache.log4j.Logger;
import us.codecraft.webmagic.downloader.Downloader; import us.codecraft.webmagic.downloader.Downloader;
import us.codecraft.webmagic.downloader.HttpClientDownloader; import us.codecraft.webmagic.downloader.HttpClientDownloader;
import us.codecraft.webmagic.pipeline.CollectorPipeline; import us.codecraft.webmagic.pipeline.CollectorPipeline;
import us.codecraft.webmagic.pipeline.ResultItemsCollectorPipeline;
import us.codecraft.webmagic.pipeline.ConsolePipeline; import us.codecraft.webmagic.pipeline.ConsolePipeline;
import us.codecraft.webmagic.pipeline.Pipeline; import us.codecraft.webmagic.pipeline.Pipeline;
import us.codecraft.webmagic.pipeline.ResultItemsCollectorPipeline;
import us.codecraft.webmagic.processor.PageProcessor; import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.scheduler.QueueScheduler; import us.codecraft.webmagic.scheduler.QueueScheduler;
import us.codecraft.webmagic.scheduler.Scheduler; import us.codecraft.webmagic.scheduler.Scheduler;
...@@ -18,12 +18,10 @@ import us.codecraft.webmagic.utils.UrlUtils; ...@@ -18,12 +18,10 @@ import us.codecraft.webmagic.utils.UrlUtils;
import java.io.Closeable; import java.io.Closeable;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.*;
import java.util.Collection;
import java.util.List;
import java.util.UUID;
import java.util.concurrent.ExecutorService; import java.util.concurrent.ExecutorService;
import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.Condition; import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.ReentrantLock; import java.util.concurrent.locks.ReentrantLock;
...@@ -100,6 +98,8 @@ public class Spider implements Runnable, Task { ...@@ -100,6 +98,8 @@ public class Spider implements Runnable, Task {
private final AtomicInteger threadAlive = new AtomicInteger(0); private final AtomicInteger threadAlive = new AtomicInteger(0);
private final AtomicLong pageCount = new AtomicLong(0);
/** /**
* create a spider with pageProcessor. * create a spider with pageProcessor.
* *
...@@ -306,6 +306,7 @@ public class Spider implements Runnable, Task { ...@@ -306,6 +306,7 @@ public class Spider implements Runnable, Task {
logger.error("download " + requestFinal + " error", e); logger.error("download " + requestFinal + " error", e);
} finally { } finally {
threadAlive.decrementAndGet(); threadAlive.decrementAndGet();
pageCount.incrementAndGet();
signalNewUrl(); signalNewUrl();
} }
} }
...@@ -566,6 +567,61 @@ public class Spider implements Runnable, Task { ...@@ -566,6 +567,61 @@ public class Spider implements Runnable, Task {
return spawnUrl; return spawnUrl;
} }
/**
* Get page count downloaded by spider.
*
* @return total downloaded page count
* @since 0.4.1
*/
public long getPageCount() {
return pageCount.get();
}
/**
* Get running status by spider.
*
* @return running status
* @see Status
* @since 0.4.1
*/
public Status getStatus(){
return Status.fromValue(stat.get());
}
public enum Status {
Init(0), Running(1), Stopped(2);
private Status(int value) {
this.value = value;
}
private int value;
int getValue() {
return value;
}
public static Status fromValue(int value) {
for (Status status : Status.values()) {
if (status.getValue() == value) {
return status;
}
}
//default value
return Init;
}
}
/**
* Get thread count which is running
* @return thread count which is running
* @since 0.4.1
*/
public int getThreadAlive() {
return threadAlive.get();
}
/** /**
* Whether add urls extracted to download.<br> * Whether add urls extracted to download.<br>
* Add urls to download when it is true, and just download seed urls when it is false. <br> * Add urls to download when it is true, and just download seed urls when it is false. <br>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment