Commit c2d6d495 authored by yihua.huang's avatar yihua.huang

#41 add getThreadAlive(),getStatus,getPageCount() to spider

parent cf62d707
......@@ -6,9 +6,9 @@ import org.apache.log4j.Logger;
import us.codecraft.webmagic.downloader.Downloader;
import us.codecraft.webmagic.downloader.HttpClientDownloader;
import us.codecraft.webmagic.pipeline.CollectorPipeline;
import us.codecraft.webmagic.pipeline.ResultItemsCollectorPipeline;
import us.codecraft.webmagic.pipeline.ConsolePipeline;
import us.codecraft.webmagic.pipeline.Pipeline;
import us.codecraft.webmagic.pipeline.ResultItemsCollectorPipeline;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.scheduler.QueueScheduler;
import us.codecraft.webmagic.scheduler.Scheduler;
......@@ -18,12 +18,10 @@ import us.codecraft.webmagic.utils.UrlUtils;
import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.UUID;
import java.util.*;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.ReentrantLock;
......@@ -100,6 +98,8 @@ public class Spider implements Runnable, Task {
private final AtomicInteger threadAlive = new AtomicInteger(0);
private final AtomicLong pageCount = new AtomicLong(0);
/**
* create a spider with pageProcessor.
*
......@@ -306,6 +306,7 @@ public class Spider implements Runnable, Task {
logger.error("download " + requestFinal + " error", e);
} finally {
threadAlive.decrementAndGet();
pageCount.incrementAndGet();
signalNewUrl();
}
}
......@@ -566,6 +567,61 @@ public class Spider implements Runnable, Task {
return spawnUrl;
}
/**
* Get page count downloaded by spider.
*
* @return total downloaded page count
* @since 0.4.1
*/
public long getPageCount() {
return pageCount.get();
}
/**
* Get running status by spider.
*
* @return running status
* @see Status
* @since 0.4.1
*/
public Status getStatus(){
return Status.fromValue(stat.get());
}
public enum Status {
Init(0), Running(1), Stopped(2);
private Status(int value) {
this.value = value;
}
private int value;
int getValue() {
return value;
}
public static Status fromValue(int value) {
for (Status status : Status.values()) {
if (status.getValue() == value) {
return status;
}
}
//default value
return Init;
}
}
/**
* Get thread count which is running
* @return thread count which is running
* @since 0.4.1
*/
public int getThreadAlive() {
return threadAlive.get();
}
/**
* Whether add urls extracted to download.<br>
* Add urls to download when it is true, and just download seed urls when it is false. <br>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment