Commit 11ba5beb authored by yihua.huang's avatar yihua.huang

[refactor]move monitor to webmagic-extension #98

parent d61f65ce
......@@ -111,7 +111,7 @@ public class Request implements Serializable {
/**
* The http method of the request. Get for default.
* @return httpMethod
* @see us.codecraft.webmagic.constant.HttpConstant.Method
* @see us.codecraft.webmagic.utils.HttpConstant.Method
* @since 0.5.0
*/
public String getMethod() {
......
......@@ -50,7 +50,7 @@ public class Site {
private boolean useGzip = true;
/**
* @see us.codecraft.webmagic.constant.HttpConstant.Header
* @see us.codecraft.webmagic.utils.HttpConstant.Header
* @deprecated
*/
public static interface HeaderConst {
......
......@@ -8,7 +8,6 @@ import org.slf4j.LoggerFactory;
import us.codecraft.webmagic.downloader.Downloader;
import us.codecraft.webmagic.downloader.HttpClientDownloader;
import us.codecraft.webmagic.monitor.SpiderListener;
import us.codecraft.webmagic.pipeline.CollectorPipeline;
import us.codecraft.webmagic.pipeline.ConsolePipeline;
import us.codecraft.webmagic.pipeline.Pipeline;
......
package us.codecraft.webmagic.monitor;
import us.codecraft.webmagic.Request;
package us.codecraft.webmagic;
/**
* Listener of Spider on page processing. Used for monitor and such on.
*
* @author code4crafer@gmail.com
* @since 0.5.0
*/
......
......@@ -18,7 +18,7 @@ import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.constant.HttpConstant;
import us.codecraft.webmagic.utils.HttpConstant;
import us.codecraft.webmagic.selector.PlainText;
import us.codecraft.webmagic.utils.UrlUtils;
......
......@@ -5,7 +5,6 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.monitor.MonitorableScheduler;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
......
package us.codecraft.webmagic.monitor;
package us.codecraft.webmagic.scheduler;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.scheduler.Scheduler;
/**
* The scheduler whose requests can be counted for monitor.
......
package us.codecraft.webmagic.constant;
package us.codecraft.webmagic.utils;
/**
* Some constants of Http protocal.
......
package us.codecraft.webmagic.example;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.monitor.SpiderMonitor;
import us.codecraft.webmagic.processor.example.GithubRepoPageProcessor;
import us.codecraft.webmagic.processor.example.OschinaBlogPageProcessor;
/**
* @author code4crafer@gmail.com
*/
public class MonitorExample {
public static void main(String[] args) throws Exception {
Spider oschinaSpider = Spider.create(new OschinaBlogPageProcessor())
.addUrl("http://my.oschina.net/flashsword/blog").thread(2);
Spider githubSpider = Spider.create(new GithubRepoPageProcessor())
.addUrl("https://github.com/code4craft");
SpiderMonitor spiderMonitor = new SpiderMonitor();
spiderMonitor.register(oschinaSpider, githubSpider);
//If you want to connect it from remote, use spiderMonitor.server().jmxStart();
//ONLY ONE server can start for a machine.
//Others will be registered
spiderMonitor.server().server();
spiderMonitor.jmxStart();
oschinaSpider.start();
githubSpider.start();
}
}
package us.codecraft.webmagic.monitor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.SpiderListener;
import us.codecraft.webmagic.processor.example.GithubRepoPageProcessor;
import us.codecraft.webmagic.processor.example.OschinaBlogPageProcessor;
import us.codecraft.webmagic.utils.IPUtils;
import javax.management.JMException;
import javax.management.MBeanServer;
......@@ -15,6 +19,7 @@ import java.io.IOException;
import java.lang.management.ManagementFactory;
import java.rmi.registry.LocateRegistry;
import java.rmi.registry.Registry;
import java.rmi.server.ExportException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
......@@ -30,6 +35,8 @@ public class SpiderMonitor {
Server, Client, Local;
}
private Logger logger = LoggerFactory.getLogger(getClass());
private static final int DEFAULT_SERVER_PORT = 14721;
private static final String DEFAULT_SERVER_HOST = "localhost";
......@@ -52,6 +59,7 @@ public class SpiderMonitor {
/**
* Register spider for monitor.
*
* @param spiders
* @return
*/
......@@ -113,13 +121,18 @@ public class SpiderMonitor {
/**
* Start monitor as server mode.
*
* @param port
* @return
* @throws IOException
* @throws JMException
*/
public SpiderMonitor server(int port) throws IOException, JMException {
Registry registry = LocateRegistry.createRegistry(port);
try {
Registry registry = LocateRegistry.createRegistry(port);
} catch (ExportException e) {
logger.warn("Start server fail, maybe the address is in using.", e);
}
serverPort = port;
serverHost = "localhost";
type = Type.Server;
......@@ -128,6 +141,7 @@ public class SpiderMonitor {
/**
* Start monitor as server mode.
*
* @return
* @throws IOException
* @throws JMException
......@@ -139,6 +153,7 @@ public class SpiderMonitor {
/**
* Start monitor as client mode.
*
* @param serverHost
* @param serverPort
* @return
......@@ -154,6 +169,7 @@ public class SpiderMonitor {
/**
* Start monitor as client mode.
*
* @return
* @throws IOException
* @throws JMException
......@@ -167,7 +183,7 @@ public class SpiderMonitor {
}
public SpiderMonitor jmxStart(String jndiServer, int rmiPort) throws IOException, JMException {
String jmxServerName = "WebMagic";
String jmxServerName = "WebMagic-"+ IPUtils.getFirstNoLoopbackIPAddresses();
// start JNDI
MBeanServer localServer = ManagementFactory.getPlatformMBeanServer();
......@@ -199,7 +215,10 @@ public class SpiderMonitor {
SpiderMonitor spiderMonitor = new SpiderMonitor();
spiderMonitor.register(oschinaSpider, githubSpider);
//
//If you want to connect it from remote, use spiderMonitor.server().jmxStart();
//ONLY ONE server can start for a machine.
//Others will be registered
spiderMonitor.server().server();
spiderMonitor.jmxStart();
oschinaSpider.start();
githubSpider.start();
......
......@@ -3,6 +3,7 @@ package us.codecraft.webmagic.monitor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.scheduler.MonitorableScheduler;
import java.util.List;
......
......@@ -7,7 +7,6 @@ import redis.clients.jedis.JedisPool;
import redis.clients.jedis.JedisPoolConfig;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.monitor.MonitorableScheduler;
/**
* Use Redis as url scheduler for distributed crawlers.<br>
......
package us.codecraft.webmagic.utils;
import java.net.Inet6Address;
import java.net.InetAddress;
import java.net.NetworkInterface;
import java.net.SocketException;
import java.util.Enumeration;
/**
* @author code4crafer@gmail.com
* @since 0.5.0
*/
public abstract class IPUtils {
public static String getFirstNoLoopbackIPAddresses() throws SocketException {
Enumeration<NetworkInterface> networkInterfaces = NetworkInterface.getNetworkInterfaces();
InetAddress localAddress = null;
while (networkInterfaces.hasMoreElements()) {
NetworkInterface networkInterface = networkInterfaces.nextElement();
Enumeration<InetAddress> inetAddresses = networkInterface.getInetAddresses();
while (inetAddresses.hasMoreElements()) {
InetAddress address = inetAddresses.nextElement();
if (!address.isLoopbackAddress() && !Inet6Address.class.isInstance(address)) {
return address.getHostAddress();
} else if (!address.isLoopbackAddress()) {
localAddress = address;
}
}
}
return localAddress.getHostAddress();
}
}
package us.codecraft.webmagic.utils;
import org.junit.Test;
/**
* @author code4crafer@gmail.com
*/
public class IPUtilsTest {
@Test
public void testGetFirstNoLoopbackIPAddresses() throws Exception {
System.out.println(IPUtils.getFirstNoLoopbackIPAddresses());
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment