Commit fba33087 authored by yihua.huang's avatar yihua.huang

fix a thread pool exception

parent 3c79d031
...@@ -21,21 +21,26 @@ import java.util.concurrent.atomic.AtomicInteger; ...@@ -21,21 +21,26 @@ import java.util.concurrent.atomic.AtomicInteger;
/** /**
* Entrance of a crawler.<br> * Entrance of a crawler.<br>
* A spider contains four modules: Downloader, Scheduler, PageProcessor and Pipeline.<br> * A spider contains four modules: Downloader, Scheduler, PageProcessor and
* Pipeline.<br>
* Every module is a field of Spider. <br> * Every module is a field of Spider. <br>
* The modules are defined in interface. <br> * The modules are defined in interface. <br>
* You can customize a spider with various implementations of them. <br> * You can customize a spider with various implementations of them. <br>
* Examples: <br> * Examples: <br>
* <br> * <br>
* A simple crawler: <br> * A simple crawler: <br>
* Spider.create(new SimplePageProcessor("http://my.oschina.net/", "http://my.oschina.net/*blog/*")).run();<br> * Spider.create(new SimplePageProcessor("http://my.oschina.net/",
* "http://my.oschina.net/*blog/*")).run();<br>
* <br> * <br>
* Store results to files by FilePipeline: <br> * Store results to files by FilePipeline: <br>
* Spider.create(new SimplePageProcessor("http://my.oschina.net/", "http://my.oschina.net/*blog/*")) <br> * Spider.create(new SimplePageProcessor("http://my.oschina.net/",
* "http://my.oschina.net/*blog/*")) <br>
* .pipeline(new FilePipeline("/data/temp/webmagic/")).run(); <br> * .pipeline(new FilePipeline("/data/temp/webmagic/")).run(); <br>
* <br> * <br>
* Use FileCacheQueueScheduler to store urls and cursor in files, so that a Spider can resume the status when shutdown. <br> * Use FileCacheQueueScheduler to store urls and cursor in files, so that a
* Spider.create(new SimplePageProcessor("http://my.oschina.net/", "http://my.oschina.net/*blog/*")) <br> * Spider can resume the status when shutdown. <br>
* Spider.create(new SimplePageProcessor("http://my.oschina.net/",
* "http://my.oschina.net/*blog/*")) <br>
* .scheduler(new FileCacheQueueScheduler("/data/temp/webmagic/cache/")).run(); <br> * .scheduler(new FileCacheQueueScheduler("/data/temp/webmagic/cache/")).run(); <br>
* *
* @author code4crafter@gmail.com <br> * @author code4crafter@gmail.com <br>
...@@ -221,8 +226,7 @@ public class Spider implements Runnable, Task { ...@@ -221,8 +226,7 @@ public class Spider implements Runnable, Task {
@Override @Override
public void run() { public void run() {
if (!stat.compareAndSet(STAT_INIT, STAT_RUNNING) if (!stat.compareAndSet(STAT_INIT, STAT_RUNNING) && !stat.compareAndSet(STAT_STOPPED, STAT_RUNNING)) {
&& !stat.compareAndSet(STAT_STOPPED, STAT_RUNNING)) {
throw new IllegalStateException("Spider is already running!"); throw new IllegalStateException("Spider is already running!");
} }
checkComponent(); checkComponent();
...@@ -233,7 +237,8 @@ public class Spider implements Runnable, Task { ...@@ -233,7 +237,8 @@ public class Spider implements Runnable, Task {
startUrls.clear(); startUrls.clear();
} }
Request request = scheduler.poll(this); Request request = scheduler.poll(this);
//single thread logger.info("Spider " + getUUID() + " started!");
// single thread
if (threadNum <= 1) { if (threadNum <= 1) {
while (request != null && stat.compareAndSet(STAT_RUNNING, STAT_RUNNING)) { while (request != null && stat.compareAndSet(STAT_RUNNING, STAT_RUNNING)) {
processRequest(request); processRequest(request);
...@@ -243,11 +248,12 @@ public class Spider implements Runnable, Task { ...@@ -243,11 +248,12 @@ public class Spider implements Runnable, Task {
synchronized (this) { synchronized (this) {
this.executorService = ThreadUtils.newFixedThreadPool(threadNum); this.executorService = ThreadUtils.newFixedThreadPool(threadNum);
} }
//multi thread // multi thread
final AtomicInteger threadAlive = new AtomicInteger(0); final AtomicInteger threadAlive = new AtomicInteger(0);
while (true && stat.compareAndSet(STAT_RUNNING, STAT_RUNNING)) { while (true && stat.compareAndSet(STAT_RUNNING, STAT_RUNNING)) {
if (request == null) { if (request == null) {
//when no request found but some thread is alive, sleep a while. // when no request found but some thread is alive, sleep a
// while.
try { try {
Thread.sleep(100); Thread.sleep(100);
} catch (InterruptedException e) { } catch (InterruptedException e) {
...@@ -274,7 +280,7 @@ public class Spider implements Runnable, Task { ...@@ -274,7 +280,7 @@ public class Spider implements Runnable, Task {
executorService.shutdown(); executorService.shutdown();
} }
stat.compareAndSet(STAT_RUNNING, STAT_STOPPED); stat.compareAndSet(STAT_RUNNING, STAT_STOPPED);
//release some resources // release some resources
destroy(); destroy();
} }
...@@ -299,7 +305,8 @@ public class Spider implements Runnable, Task { ...@@ -299,7 +305,8 @@ public class Spider implements Runnable, Task {
/** /**
* Process specific urls without url discovering. * Process specific urls without url discovering.
* *
* @param urls urls to process * @param urls
* urls to process
*/ */
public void test(String... urls) { public void test(String... urls) {
checkComponent(); checkComponent();
...@@ -316,7 +323,7 @@ public class Spider implements Runnable, Task { ...@@ -316,7 +323,7 @@ public class Spider implements Runnable, Task {
sleep(site.getSleepTime()); sleep(site.getSleepTime());
return; return;
} }
//for cycle retry // for cycle retry
if (page.getHtml() == null) { if (page.getHtml() == null) {
addRequest(page); addRequest(page);
sleep(site.getSleepTime()); sleep(site.getSleepTime());
...@@ -365,9 +372,15 @@ public class Spider implements Runnable, Task { ...@@ -365,9 +372,15 @@ public class Spider implements Runnable, Task {
} }
public void stop() { public void stop() {
stat.compareAndSet(STAT_RUNNING, STAT_STOPPED); if (stat.compareAndSet(STAT_RUNNING, STAT_STOPPED)) {
if (executorService != null) {
executorService.shutdown(); executorService.shutdown();
} }
logger.info("Spider " + getUUID() + " stop success!");
} else {
logger.info("Spider " + getUUID() + " stop fail!");
}
}
public void stopAndDestroy() { public void stopAndDestroy() {
stop(); stop();
......
package us.codecraft.webmagic.utils; package us.codecraft.webmagic.utils;
import java.util.concurrent.ExecutorService; import java.util.concurrent.ExecutorService;
import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.SynchronousQueue;
import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
...@@ -12,21 +12,10 @@ import java.util.concurrent.TimeUnit; ...@@ -12,21 +12,10 @@ import java.util.concurrent.TimeUnit;
public class ThreadUtils { public class ThreadUtils {
public static ExecutorService newFixedThreadPool(int threadSize) { public static ExecutorService newFixedThreadPool(int threadSize) {
return new ThreadPoolExecutor(threadSize, threadSize, 0L, TimeUnit.MILLISECONDS, if (threadSize <= 1) {
new LinkedBlockingQueue<Runnable>(1) { throw new IllegalArgumentException("ThreadSize must be greater than 1!");
private static final long serialVersionUID = -9028058603126367678L;
@Override
public boolean offer(Runnable e) {
try {
put(e);
return true;
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
}
return false;
} }
}); return new ThreadPoolExecutor(threadSize - 1, threadSize - 1, 0L, TimeUnit.MILLISECONDS,
new SynchronousQueue<Runnable>(), new ThreadPoolExecutor.CallerRunsPolicy());
} }
} }
...@@ -18,11 +18,12 @@ public class SpiderTest { ...@@ -18,11 +18,12 @@ public class SpiderTest {
public void process(ResultItems resultItems, Task task) { public void process(ResultItems resultItems, Task task) {
System.out.println(1); System.out.println(1);
} }
}); }).thread(2);
spider.start(); spider.start();
Thread.sleep(10000); Thread.sleep(10000);
spider.stop(); spider.stop();
// spider.run(); Thread.sleep(10000);
spider.start();
Thread.sleep(10000); Thread.sleep(10000);
} }
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment