Commit 11ba5beb authored by yihua.huang's avatar yihua.huang

[refactor]move monitor to webmagic-extension #98

parent d61f65ce
...@@ -111,7 +111,7 @@ public class Request implements Serializable { ...@@ -111,7 +111,7 @@ public class Request implements Serializable {
/** /**
* The http method of the request. Get for default. * The http method of the request. Get for default.
* @return httpMethod * @return httpMethod
* @see us.codecraft.webmagic.constant.HttpConstant.Method * @see us.codecraft.webmagic.utils.HttpConstant.Method
* @since 0.5.0 * @since 0.5.0
*/ */
public String getMethod() { public String getMethod() {
......
...@@ -50,7 +50,7 @@ public class Site { ...@@ -50,7 +50,7 @@ public class Site {
private boolean useGzip = true; private boolean useGzip = true;
/** /**
* @see us.codecraft.webmagic.constant.HttpConstant.Header * @see us.codecraft.webmagic.utils.HttpConstant.Header
* @deprecated * @deprecated
*/ */
public static interface HeaderConst { public static interface HeaderConst {
......
...@@ -8,7 +8,6 @@ import org.slf4j.LoggerFactory; ...@@ -8,7 +8,6 @@ import org.slf4j.LoggerFactory;
import us.codecraft.webmagic.downloader.Downloader; import us.codecraft.webmagic.downloader.Downloader;
import us.codecraft.webmagic.downloader.HttpClientDownloader; import us.codecraft.webmagic.downloader.HttpClientDownloader;
import us.codecraft.webmagic.monitor.SpiderListener;
import us.codecraft.webmagic.pipeline.CollectorPipeline; import us.codecraft.webmagic.pipeline.CollectorPipeline;
import us.codecraft.webmagic.pipeline.ConsolePipeline; import us.codecraft.webmagic.pipeline.ConsolePipeline;
import us.codecraft.webmagic.pipeline.Pipeline; import us.codecraft.webmagic.pipeline.Pipeline;
......
package us.codecraft.webmagic.monitor; package us.codecraft.webmagic;
import us.codecraft.webmagic.Request;
/** /**
* Listener of Spider on page processing. Used for monitor and such on.
*
* @author code4crafer@gmail.com * @author code4crafer@gmail.com
* @since 0.5.0 * @since 0.5.0
*/ */
......
...@@ -18,7 +18,7 @@ import us.codecraft.webmagic.Page; ...@@ -18,7 +18,7 @@ import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Request; import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Site; import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Task; import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.constant.HttpConstant; import us.codecraft.webmagic.utils.HttpConstant;
import us.codecraft.webmagic.selector.PlainText; import us.codecraft.webmagic.selector.PlainText;
import us.codecraft.webmagic.utils.UrlUtils; import us.codecraft.webmagic.utils.UrlUtils;
......
...@@ -5,7 +5,6 @@ import org.slf4j.Logger; ...@@ -5,7 +5,6 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import us.codecraft.webmagic.Request; import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Task; import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.monitor.MonitorableScheduler;
import java.util.Set; import java.util.Set;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
......
package us.codecraft.webmagic.monitor; package us.codecraft.webmagic.scheduler;
import us.codecraft.webmagic.Task; import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.scheduler.Scheduler;
/** /**
* The scheduler whose requests can be counted for monitor. * The scheduler whose requests can be counted for monitor.
......
package us.codecraft.webmagic.constant; package us.codecraft.webmagic.utils;
/** /**
* Some constants of Http protocal. * Some constants of Http protocal.
......
package us.codecraft.webmagic.example;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.monitor.SpiderMonitor;
import us.codecraft.webmagic.processor.example.GithubRepoPageProcessor;
import us.codecraft.webmagic.processor.example.OschinaBlogPageProcessor;
/**
* @author code4crafer@gmail.com
*/
public class MonitorExample {
public static void main(String[] args) throws Exception {
Spider oschinaSpider = Spider.create(new OschinaBlogPageProcessor())
.addUrl("http://my.oschina.net/flashsword/blog").thread(2);
Spider githubSpider = Spider.create(new GithubRepoPageProcessor())
.addUrl("https://github.com/code4craft");
SpiderMonitor spiderMonitor = new SpiderMonitor();
spiderMonitor.register(oschinaSpider, githubSpider);
//If you want to connect it from remote, use spiderMonitor.server().jmxStart();
//ONLY ONE server can start for a machine.
//Others will be registered
spiderMonitor.server().server();
spiderMonitor.jmxStart();
oschinaSpider.start();
githubSpider.start();
}
}
package us.codecraft.webmagic.monitor; package us.codecraft.webmagic.monitor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import us.codecraft.webmagic.Request; import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Spider; import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.SpiderListener;
import us.codecraft.webmagic.processor.example.GithubRepoPageProcessor; import us.codecraft.webmagic.processor.example.GithubRepoPageProcessor;
import us.codecraft.webmagic.processor.example.OschinaBlogPageProcessor; import us.codecraft.webmagic.processor.example.OschinaBlogPageProcessor;
import us.codecraft.webmagic.utils.IPUtils;
import javax.management.JMException; import javax.management.JMException;
import javax.management.MBeanServer; import javax.management.MBeanServer;
...@@ -15,6 +19,7 @@ import java.io.IOException; ...@@ -15,6 +19,7 @@ import java.io.IOException;
import java.lang.management.ManagementFactory; import java.lang.management.ManagementFactory;
import java.rmi.registry.LocateRegistry; import java.rmi.registry.LocateRegistry;
import java.rmi.registry.Registry; import java.rmi.registry.Registry;
import java.rmi.server.ExportException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.List; import java.util.List;
...@@ -30,6 +35,8 @@ public class SpiderMonitor { ...@@ -30,6 +35,8 @@ public class SpiderMonitor {
Server, Client, Local; Server, Client, Local;
} }
private Logger logger = LoggerFactory.getLogger(getClass());
private static final int DEFAULT_SERVER_PORT = 14721; private static final int DEFAULT_SERVER_PORT = 14721;
private static final String DEFAULT_SERVER_HOST = "localhost"; private static final String DEFAULT_SERVER_HOST = "localhost";
...@@ -52,6 +59,7 @@ public class SpiderMonitor { ...@@ -52,6 +59,7 @@ public class SpiderMonitor {
/** /**
* Register spider for monitor. * Register spider for monitor.
*
* @param spiders * @param spiders
* @return * @return
*/ */
...@@ -113,13 +121,18 @@ public class SpiderMonitor { ...@@ -113,13 +121,18 @@ public class SpiderMonitor {
/** /**
* Start monitor as server mode. * Start monitor as server mode.
*
* @param port * @param port
* @return * @return
* @throws IOException * @throws IOException
* @throws JMException * @throws JMException
*/ */
public SpiderMonitor server(int port) throws IOException, JMException { public SpiderMonitor server(int port) throws IOException, JMException {
try {
Registry registry = LocateRegistry.createRegistry(port); Registry registry = LocateRegistry.createRegistry(port);
} catch (ExportException e) {
logger.warn("Start server fail, maybe the address is in using.", e);
}
serverPort = port; serverPort = port;
serverHost = "localhost"; serverHost = "localhost";
type = Type.Server; type = Type.Server;
...@@ -128,6 +141,7 @@ public class SpiderMonitor { ...@@ -128,6 +141,7 @@ public class SpiderMonitor {
/** /**
* Start monitor as server mode. * Start monitor as server mode.
*
* @return * @return
* @throws IOException * @throws IOException
* @throws JMException * @throws JMException
...@@ -139,6 +153,7 @@ public class SpiderMonitor { ...@@ -139,6 +153,7 @@ public class SpiderMonitor {
/** /**
* Start monitor as client mode. * Start monitor as client mode.
*
* @param serverHost * @param serverHost
* @param serverPort * @param serverPort
* @return * @return
...@@ -154,6 +169,7 @@ public class SpiderMonitor { ...@@ -154,6 +169,7 @@ public class SpiderMonitor {
/** /**
* Start monitor as client mode. * Start monitor as client mode.
*
* @return * @return
* @throws IOException * @throws IOException
* @throws JMException * @throws JMException
...@@ -167,7 +183,7 @@ public class SpiderMonitor { ...@@ -167,7 +183,7 @@ public class SpiderMonitor {
} }
public SpiderMonitor jmxStart(String jndiServer, int rmiPort) throws IOException, JMException { public SpiderMonitor jmxStart(String jndiServer, int rmiPort) throws IOException, JMException {
String jmxServerName = "WebMagic"; String jmxServerName = "WebMagic-"+ IPUtils.getFirstNoLoopbackIPAddresses();
// start JNDI // start JNDI
MBeanServer localServer = ManagementFactory.getPlatformMBeanServer(); MBeanServer localServer = ManagementFactory.getPlatformMBeanServer();
...@@ -199,7 +215,10 @@ public class SpiderMonitor { ...@@ -199,7 +215,10 @@ public class SpiderMonitor {
SpiderMonitor spiderMonitor = new SpiderMonitor(); SpiderMonitor spiderMonitor = new SpiderMonitor();
spiderMonitor.register(oschinaSpider, githubSpider); spiderMonitor.register(oschinaSpider, githubSpider);
// //If you want to connect it from remote, use spiderMonitor.server().jmxStart();
//ONLY ONE server can start for a machine.
//Others will be registered
spiderMonitor.server().server();
spiderMonitor.jmxStart(); spiderMonitor.jmxStart();
oschinaSpider.start(); oschinaSpider.start();
githubSpider.start(); githubSpider.start();
......
...@@ -3,6 +3,7 @@ package us.codecraft.webmagic.monitor; ...@@ -3,6 +3,7 @@ package us.codecraft.webmagic.monitor;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import us.codecraft.webmagic.Spider; import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.scheduler.MonitorableScheduler;
import java.util.List; import java.util.List;
......
...@@ -7,7 +7,6 @@ import redis.clients.jedis.JedisPool; ...@@ -7,7 +7,6 @@ import redis.clients.jedis.JedisPool;
import redis.clients.jedis.JedisPoolConfig; import redis.clients.jedis.JedisPoolConfig;
import us.codecraft.webmagic.Request; import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Task; import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.monitor.MonitorableScheduler;
/** /**
* Use Redis as url scheduler for distributed crawlers.<br> * Use Redis as url scheduler for distributed crawlers.<br>
......
package us.codecraft.webmagic.utils;
import java.net.Inet6Address;
import java.net.InetAddress;
import java.net.NetworkInterface;
import java.net.SocketException;
import java.util.Enumeration;
/**
* @author code4crafer@gmail.com
* @since 0.5.0
*/
public abstract class IPUtils {
public static String getFirstNoLoopbackIPAddresses() throws SocketException {
Enumeration<NetworkInterface> networkInterfaces = NetworkInterface.getNetworkInterfaces();
InetAddress localAddress = null;
while (networkInterfaces.hasMoreElements()) {
NetworkInterface networkInterface = networkInterfaces.nextElement();
Enumeration<InetAddress> inetAddresses = networkInterface.getInetAddresses();
while (inetAddresses.hasMoreElements()) {
InetAddress address = inetAddresses.nextElement();
if (!address.isLoopbackAddress() && !Inet6Address.class.isInstance(address)) {
return address.getHostAddress();
} else if (!address.isLoopbackAddress()) {
localAddress = address;
}
}
}
return localAddress.getHostAddress();
}
}
package us.codecraft.webmagic.utils;
import org.junit.Test;
/**
* @author code4crafer@gmail.com
*/
public class IPUtilsTest {
@Test
public void testGetFirstNoLoopbackIPAddresses() throws Exception {
System.out.println(IPUtils.getFirstNoLoopbackIPAddresses());
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment