Commit b4fcf411 authored by yihua.huang's avatar yihua.huang

add exit when comlete option

parent 35288787
...@@ -18,6 +18,8 @@ import java.util.ArrayList; ...@@ -18,6 +18,8 @@ import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.concurrent.ExecutorService; import java.util.concurrent.ExecutorService;
import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.ReentrantLock;
/** /**
* Entrance of a crawler.<br> * Entrance of a crawler.<br>
...@@ -74,7 +76,7 @@ public class Spider implements Runnable, Task { ...@@ -74,7 +76,7 @@ public class Spider implements Runnable, Task {
protected AtomicInteger stat = new AtomicInteger(STAT_INIT); protected AtomicInteger stat = new AtomicInteger(STAT_INIT);
protected boolean exitWhenComplete = false; protected boolean exitWhenComplete = true;
protected final static int STAT_INIT = 0; protected final static int STAT_INIT = 0;
...@@ -82,6 +84,10 @@ public class Spider implements Runnable, Task { ...@@ -82,6 +84,10 @@ public class Spider implements Runnable, Task {
protected final static int STAT_STOPPED = 2; protected final static int STAT_STOPPED = 2;
private ReentrantLock newUrlLock = new ReentrantLock();
private Condition newUrlCondition = newUrlLock.newCondition();
/** /**
* create a spider with pageProcessor. * create a spider with pageProcessor.
* *
...@@ -245,12 +251,16 @@ public class Spider implements Runnable, Task { ...@@ -245,12 +251,16 @@ public class Spider implements Runnable, Task {
if (threadAlive.get() == 0 && exitWhenComplete) { if (threadAlive.get() == 0 && exitWhenComplete) {
break; break;
} }
// when no request found but some thread is alive, sleep a // wait until new url added
// while. try {
newUrlLock.lock();
try { try {
Thread.sleep(100); newUrlCondition.await();
} catch (InterruptedException e) { } catch (InterruptedException e) {
} }
} finally {
newUrlLock.unlock();
}
} else { } else {
final Request requestFinal = request; final Request requestFinal = request;
threadAlive.incrementAndGet(); threadAlive.incrementAndGet();
...@@ -263,6 +273,7 @@ public class Spider implements Runnable, Task { ...@@ -263,6 +273,7 @@ public class Spider implements Runnable, Task {
logger.error("download " + requestFinal + " error", e); logger.error("download " + requestFinal + " error", e);
} finally { } finally {
threadAlive.decrementAndGet(); threadAlive.decrementAndGet();
signalNewUrl();
} }
} }
}); });
...@@ -351,11 +362,16 @@ public class Spider implements Runnable, Task { ...@@ -351,11 +362,16 @@ public class Spider implements Runnable, Task {
protected void addRequest(Page page) { protected void addRequest(Page page) {
if (CollectionUtils.isNotEmpty(page.getTargetRequests())) { if (CollectionUtils.isNotEmpty(page.getTargetRequests())) {
for (Request request : page.getTargetRequests()) { for (Request request : page.getTargetRequests()) {
scheduler.push(request, this); addRequest(request);
} }
} }
} }
private void addRequest(Request request) {
scheduler.push(request, this);
}
protected void checkIfRunning() { protected void checkIfRunning() {
if (stat.get() == STAT_RUNNING) { if (stat.get() == STAT_RUNNING) {
throw new IllegalStateException("Spider is already running!"); throw new IllegalStateException("Spider is already running!");
...@@ -368,6 +384,29 @@ public class Spider implements Runnable, Task { ...@@ -368,6 +384,29 @@ public class Spider implements Runnable, Task {
thread.start(); thread.start();
} }
/**
* Add urls to crawl.<br/>
*
* @param urls
* @return
*/
public Spider addUrl(String... urls) {
for (String url : urls) {
addRequest(new Request(url));
}
signalNewUrl();
return this;
}
private void signalNewUrl() {
try {
newUrlLock.lock();
newUrlCondition.signalAll();
} finally {
newUrlLock.unlock();
}
}
public void start() { public void start() {
runAsync(); runAsync();
} }
......
...@@ -34,6 +34,6 @@ public class OschinaBlogPageProcesser implements PageProcessor { ...@@ -34,6 +34,6 @@ public class OschinaBlogPageProcesser implements PageProcessor {
} }
public static void main(String[] args) { public static void main(String[] args) {
Spider.create(new OschinaBlogPageProcesser()).thread(2).run(); Spider.create(new OschinaBlogPageProcesser()).thread(10).run();
} }
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment