Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
W
webmagic
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
沈俊林
webmagic
Commits
11ba5beb
Commit
11ba5beb
authored
Apr 25, 2014
by
yihua.huang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[refactor]move monitor to webmagic-extension #98
parent
d61f65ce
Changes
18
Show whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
112 additions
and
15 deletions
+112
-15
Request.java
...gic-core/src/main/java/us/codecraft/webmagic/Request.java
+1
-1
Site.java
webmagic-core/src/main/java/us/codecraft/webmagic/Site.java
+1
-1
Spider.java
...agic-core/src/main/java/us/codecraft/webmagic/Spider.java
+0
-1
SpiderListener.java
...e/src/main/java/us/codecraft/webmagic/SpiderListener.java
+3
-3
HttpClientDownloader.java
...s/codecraft/webmagic/downloader/HttpClientDownloader.java
+1
-1
LocalDuplicatedRemovedScheduler.java
...t/webmagic/scheduler/LocalDuplicatedRemovedScheduler.java
+0
-1
MonitorableScheduler.java
...us/codecraft/webmagic/scheduler/MonitorableScheduler.java
+1
-2
HttpConstant.java
...c/main/java/us/codecraft/webmagic/utils/HttpConstant.java
+1
-1
MonitorExample.java
...in/java/us/codecraft/webmagic/example/MonitorExample.java
+31
-0
SpiderMonitor.java
...ain/java/us/codecraft/webmagic/monitor/SpiderMonitor.java
+22
-3
SpiderStatus.java
...main/java/us/codecraft/webmagic/monitor/SpiderStatus.java
+1
-0
SpiderStatusMXBean.java
...ava/us/codecraft/webmagic/monitor/SpiderStatusMXBean.java
+0
-0
RedisScheduler.java
.../java/us/codecraft/webmagic/scheduler/RedisScheduler.java
+0
-1
IPUtils.java
...on/src/main/java/us/codecraft/webmagic/utils/IPUtils.java
+36
-0
CustomSpiderStatus.java
...ava/us/codecraft/webmagic/monitor/CustomSpiderStatus.java
+0
-0
CustomSpiderStatusMXBean.java
.../codecraft/webmagic/monitor/CustomSpiderStatusMXBean.java
+0
-0
SpiderMonitorTest.java
...java/us/codecraft/webmagic/monitor/SpiderMonitorTest.java
+0
-0
IPUtilsTest.java
...rc/test/java/us/codecraft/webmagic/utils/IPUtilsTest.java
+14
-0
No files found.
webmagic-core/src/main/java/us/codecraft/webmagic/Request.java
View file @
11ba5beb
...
@@ -111,7 +111,7 @@ public class Request implements Serializable {
...
@@ -111,7 +111,7 @@ public class Request implements Serializable {
/**
/**
* The http method of the request. Get for default.
* The http method of the request. Get for default.
* @return httpMethod
* @return httpMethod
* @see us.codecraft.webmagic.
constant
.HttpConstant.Method
* @see us.codecraft.webmagic.
utils
.HttpConstant.Method
* @since 0.5.0
* @since 0.5.0
*/
*/
public
String
getMethod
()
{
public
String
getMethod
()
{
...
...
webmagic-core/src/main/java/us/codecraft/webmagic/Site.java
View file @
11ba5beb
...
@@ -50,7 +50,7 @@ public class Site {
...
@@ -50,7 +50,7 @@ public class Site {
private
boolean
useGzip
=
true
;
private
boolean
useGzip
=
true
;
/**
/**
* @see us.codecraft.webmagic.
constant
.HttpConstant.Header
* @see us.codecraft.webmagic.
utils
.HttpConstant.Header
* @deprecated
* @deprecated
*/
*/
public
static
interface
HeaderConst
{
public
static
interface
HeaderConst
{
...
...
webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java
View file @
11ba5beb
...
@@ -8,7 +8,6 @@ import org.slf4j.LoggerFactory;
...
@@ -8,7 +8,6 @@ import org.slf4j.LoggerFactory;
import
us.codecraft.webmagic.downloader.Downloader
;
import
us.codecraft.webmagic.downloader.Downloader
;
import
us.codecraft.webmagic.downloader.HttpClientDownloader
;
import
us.codecraft.webmagic.downloader.HttpClientDownloader
;
import
us.codecraft.webmagic.monitor.SpiderListener
;
import
us.codecraft.webmagic.pipeline.CollectorPipeline
;
import
us.codecraft.webmagic.pipeline.CollectorPipeline
;
import
us.codecraft.webmagic.pipeline.ConsolePipeline
;
import
us.codecraft.webmagic.pipeline.ConsolePipeline
;
import
us.codecraft.webmagic.pipeline.Pipeline
;
import
us.codecraft.webmagic.pipeline.Pipeline
;
...
...
webmagic-core/src/main/java/us/codecraft/webmagic/
monitor/
SpiderListener.java
→
webmagic-core/src/main/java/us/codecraft/webmagic/SpiderListener.java
View file @
11ba5beb
package
us
.
codecraft
.
webmagic
.
monitor
;
package
us
.
codecraft
.
webmagic
;
import
us.codecraft.webmagic.Request
;
/**
/**
* Listener of Spider on page processing. Used for monitor and such on.
*
* @author code4crafer@gmail.com
* @author code4crafer@gmail.com
* @since 0.5.0
* @since 0.5.0
*/
*/
...
...
webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java
View file @
11ba5beb
...
@@ -18,7 +18,7 @@ import us.codecraft.webmagic.Page;
...
@@ -18,7 +18,7 @@ import us.codecraft.webmagic.Page;
import
us.codecraft.webmagic.Request
;
import
us.codecraft.webmagic.Request
;
import
us.codecraft.webmagic.Site
;
import
us.codecraft.webmagic.Site
;
import
us.codecraft.webmagic.Task
;
import
us.codecraft.webmagic.Task
;
import
us.codecraft.webmagic.
constant
.HttpConstant
;
import
us.codecraft.webmagic.
utils
.HttpConstant
;
import
us.codecraft.webmagic.selector.PlainText
;
import
us.codecraft.webmagic.selector.PlainText
;
import
us.codecraft.webmagic.utils.UrlUtils
;
import
us.codecraft.webmagic.utils.UrlUtils
;
...
...
webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/LocalDuplicatedRemovedScheduler.java
View file @
11ba5beb
...
@@ -5,7 +5,6 @@ import org.slf4j.Logger;
...
@@ -5,7 +5,6 @@ import org.slf4j.Logger;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
us.codecraft.webmagic.Request
;
import
us.codecraft.webmagic.Request
;
import
us.codecraft.webmagic.Task
;
import
us.codecraft.webmagic.Task
;
import
us.codecraft.webmagic.monitor.MonitorableScheduler
;
import
java.util.Set
;
import
java.util.Set
;
import
java.util.concurrent.ConcurrentHashMap
;
import
java.util.concurrent.ConcurrentHashMap
;
...
...
webmagic-core/src/main/java/us/codecraft/webmagic/
monito
r/MonitorableScheduler.java
→
webmagic-core/src/main/java/us/codecraft/webmagic/
schedule
r/MonitorableScheduler.java
View file @
11ba5beb
package
us
.
codecraft
.
webmagic
.
monito
r
;
package
us
.
codecraft
.
webmagic
.
schedule
r
;
import
us.codecraft.webmagic.Task
;
import
us.codecraft.webmagic.Task
;
import
us.codecraft.webmagic.scheduler.Scheduler
;
/**
/**
* The scheduler whose requests can be counted for monitor.
* The scheduler whose requests can be counted for monitor.
...
...
webmagic-core/src/main/java/us/codecraft/webmagic/
constant
/HttpConstant.java
→
webmagic-core/src/main/java/us/codecraft/webmagic/
utils
/HttpConstant.java
View file @
11ba5beb
package
us
.
codecraft
.
webmagic
.
constant
;
package
us
.
codecraft
.
webmagic
.
utils
;
/**
/**
* Some constants of Http protocal.
* Some constants of Http protocal.
...
...
webmagic-extension/src/main/java/us/codecraft/webmagic/example/MonitorExample.java
0 → 100644
View file @
11ba5beb
package
us
.
codecraft
.
webmagic
.
example
;
import
us.codecraft.webmagic.Spider
;
import
us.codecraft.webmagic.monitor.SpiderMonitor
;
import
us.codecraft.webmagic.processor.example.GithubRepoPageProcessor
;
import
us.codecraft.webmagic.processor.example.OschinaBlogPageProcessor
;
/**
* @author code4crafer@gmail.com
*/
public
class
MonitorExample
{
public
static
void
main
(
String
[]
args
)
throws
Exception
{
Spider
oschinaSpider
=
Spider
.
create
(
new
OschinaBlogPageProcessor
())
.
addUrl
(
"http://my.oschina.net/flashsword/blog"
).
thread
(
2
);
Spider
githubSpider
=
Spider
.
create
(
new
GithubRepoPageProcessor
())
.
addUrl
(
"https://github.com/code4craft"
);
SpiderMonitor
spiderMonitor
=
new
SpiderMonitor
();
spiderMonitor
.
register
(
oschinaSpider
,
githubSpider
);
//If you want to connect it from remote, use spiderMonitor.server().jmxStart();
//ONLY ONE server can start for a machine.
//Others will be registered
spiderMonitor
.
server
().
server
();
spiderMonitor
.
jmxStart
();
oschinaSpider
.
start
();
githubSpider
.
start
();
}
}
webmagic-
core
/src/main/java/us/codecraft/webmagic/monitor/SpiderMonitor.java
→
webmagic-
extension
/src/main/java/us/codecraft/webmagic/monitor/SpiderMonitor.java
View file @
11ba5beb
package
us
.
codecraft
.
webmagic
.
monitor
;
package
us
.
codecraft
.
webmagic
.
monitor
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
us.codecraft.webmagic.Request
;
import
us.codecraft.webmagic.Request
;
import
us.codecraft.webmagic.Spider
;
import
us.codecraft.webmagic.Spider
;
import
us.codecraft.webmagic.SpiderListener
;
import
us.codecraft.webmagic.processor.example.GithubRepoPageProcessor
;
import
us.codecraft.webmagic.processor.example.GithubRepoPageProcessor
;
import
us.codecraft.webmagic.processor.example.OschinaBlogPageProcessor
;
import
us.codecraft.webmagic.processor.example.OschinaBlogPageProcessor
;
import
us.codecraft.webmagic.utils.IPUtils
;
import
javax.management.JMException
;
import
javax.management.JMException
;
import
javax.management.MBeanServer
;
import
javax.management.MBeanServer
;
...
@@ -15,6 +19,7 @@ import java.io.IOException;
...
@@ -15,6 +19,7 @@ import java.io.IOException;
import
java.lang.management.ManagementFactory
;
import
java.lang.management.ManagementFactory
;
import
java.rmi.registry.LocateRegistry
;
import
java.rmi.registry.LocateRegistry
;
import
java.rmi.registry.Registry
;
import
java.rmi.registry.Registry
;
import
java.rmi.server.ExportException
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Collections
;
import
java.util.Collections
;
import
java.util.List
;
import
java.util.List
;
...
@@ -30,6 +35,8 @@ public class SpiderMonitor {
...
@@ -30,6 +35,8 @@ public class SpiderMonitor {
Server
,
Client
,
Local
;
Server
,
Client
,
Local
;
}
}
private
Logger
logger
=
LoggerFactory
.
getLogger
(
getClass
());
private
static
final
int
DEFAULT_SERVER_PORT
=
14721
;
private
static
final
int
DEFAULT_SERVER_PORT
=
14721
;
private
static
final
String
DEFAULT_SERVER_HOST
=
"localhost"
;
private
static
final
String
DEFAULT_SERVER_HOST
=
"localhost"
;
...
@@ -52,6 +59,7 @@ public class SpiderMonitor {
...
@@ -52,6 +59,7 @@ public class SpiderMonitor {
/**
/**
* Register spider for monitor.
* Register spider for monitor.
*
* @param spiders
* @param spiders
* @return
* @return
*/
*/
...
@@ -113,13 +121,18 @@ public class SpiderMonitor {
...
@@ -113,13 +121,18 @@ public class SpiderMonitor {
/**
/**
* Start monitor as server mode.
* Start monitor as server mode.
*
* @param port
* @param port
* @return
* @return
* @throws IOException
* @throws IOException
* @throws JMException
* @throws JMException
*/
*/
public
SpiderMonitor
server
(
int
port
)
throws
IOException
,
JMException
{
public
SpiderMonitor
server
(
int
port
)
throws
IOException
,
JMException
{
try
{
Registry
registry
=
LocateRegistry
.
createRegistry
(
port
);
Registry
registry
=
LocateRegistry
.
createRegistry
(
port
);
}
catch
(
ExportException
e
)
{
logger
.
warn
(
"Start server fail, maybe the address is in using."
,
e
);
}
serverPort
=
port
;
serverPort
=
port
;
serverHost
=
"localhost"
;
serverHost
=
"localhost"
;
type
=
Type
.
Server
;
type
=
Type
.
Server
;
...
@@ -128,6 +141,7 @@ public class SpiderMonitor {
...
@@ -128,6 +141,7 @@ public class SpiderMonitor {
/**
/**
* Start monitor as server mode.
* Start monitor as server mode.
*
* @return
* @return
* @throws IOException
* @throws IOException
* @throws JMException
* @throws JMException
...
@@ -139,6 +153,7 @@ public class SpiderMonitor {
...
@@ -139,6 +153,7 @@ public class SpiderMonitor {
/**
/**
* Start monitor as client mode.
* Start monitor as client mode.
*
* @param serverHost
* @param serverHost
* @param serverPort
* @param serverPort
* @return
* @return
...
@@ -154,6 +169,7 @@ public class SpiderMonitor {
...
@@ -154,6 +169,7 @@ public class SpiderMonitor {
/**
/**
* Start monitor as client mode.
* Start monitor as client mode.
*
* @return
* @return
* @throws IOException
* @throws IOException
* @throws JMException
* @throws JMException
...
@@ -167,7 +183,7 @@ public class SpiderMonitor {
...
@@ -167,7 +183,7 @@ public class SpiderMonitor {
}
}
public
SpiderMonitor
jmxStart
(
String
jndiServer
,
int
rmiPort
)
throws
IOException
,
JMException
{
public
SpiderMonitor
jmxStart
(
String
jndiServer
,
int
rmiPort
)
throws
IOException
,
JMException
{
String
jmxServerName
=
"WebMagic
"
;
String
jmxServerName
=
"WebMagic
-"
+
IPUtils
.
getFirstNoLoopbackIPAddresses
()
;
// start JNDI
// start JNDI
MBeanServer
localServer
=
ManagementFactory
.
getPlatformMBeanServer
();
MBeanServer
localServer
=
ManagementFactory
.
getPlatformMBeanServer
();
...
@@ -199,7 +215,10 @@ public class SpiderMonitor {
...
@@ -199,7 +215,10 @@ public class SpiderMonitor {
SpiderMonitor
spiderMonitor
=
new
SpiderMonitor
();
SpiderMonitor
spiderMonitor
=
new
SpiderMonitor
();
spiderMonitor
.
register
(
oschinaSpider
,
githubSpider
);
spiderMonitor
.
register
(
oschinaSpider
,
githubSpider
);
//
//If you want to connect it from remote, use spiderMonitor.server().jmxStart();
//ONLY ONE server can start for a machine.
//Others will be registered
spiderMonitor
.
server
().
server
();
spiderMonitor
.
jmxStart
();
spiderMonitor
.
jmxStart
();
oschinaSpider
.
start
();
oschinaSpider
.
start
();
githubSpider
.
start
();
githubSpider
.
start
();
...
...
webmagic-
core
/src/main/java/us/codecraft/webmagic/monitor/SpiderStatus.java
→
webmagic-
extension
/src/main/java/us/codecraft/webmagic/monitor/SpiderStatus.java
View file @
11ba5beb
...
@@ -3,6 +3,7 @@ package us.codecraft.webmagic.monitor;
...
@@ -3,6 +3,7 @@ package us.codecraft.webmagic.monitor;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
us.codecraft.webmagic.Spider
;
import
us.codecraft.webmagic.Spider
;
import
us.codecraft.webmagic.scheduler.MonitorableScheduler
;
import
java.util.List
;
import
java.util.List
;
...
...
webmagic-
core
/src/main/java/us/codecraft/webmagic/monitor/SpiderStatusMXBean.java
→
webmagic-
extension
/src/main/java/us/codecraft/webmagic/monitor/SpiderStatusMXBean.java
View file @
11ba5beb
File moved
webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/RedisScheduler.java
View file @
11ba5beb
...
@@ -7,7 +7,6 @@ import redis.clients.jedis.JedisPool;
...
@@ -7,7 +7,6 @@ import redis.clients.jedis.JedisPool;
import
redis.clients.jedis.JedisPoolConfig
;
import
redis.clients.jedis.JedisPoolConfig
;
import
us.codecraft.webmagic.Request
;
import
us.codecraft.webmagic.Request
;
import
us.codecraft.webmagic.Task
;
import
us.codecraft.webmagic.Task
;
import
us.codecraft.webmagic.monitor.MonitorableScheduler
;
/**
/**
* Use Redis as url scheduler for distributed crawlers.<br>
* Use Redis as url scheduler for distributed crawlers.<br>
...
...
webmagic-extension/src/main/java/us/codecraft/webmagic/utils/IPUtils.java
0 → 100644
View file @
11ba5beb
package
us
.
codecraft
.
webmagic
.
utils
;
import
java.net.Inet6Address
;
import
java.net.InetAddress
;
import
java.net.NetworkInterface
;
import
java.net.SocketException
;
import
java.util.Enumeration
;
/**
* @author code4crafer@gmail.com
* @since 0.5.0
*/
public
abstract
class
IPUtils
{
public
static
String
getFirstNoLoopbackIPAddresses
()
throws
SocketException
{
Enumeration
<
NetworkInterface
>
networkInterfaces
=
NetworkInterface
.
getNetworkInterfaces
();
InetAddress
localAddress
=
null
;
while
(
networkInterfaces
.
hasMoreElements
())
{
NetworkInterface
networkInterface
=
networkInterfaces
.
nextElement
();
Enumeration
<
InetAddress
>
inetAddresses
=
networkInterface
.
getInetAddresses
();
while
(
inetAddresses
.
hasMoreElements
())
{
InetAddress
address
=
inetAddresses
.
nextElement
();
if
(!
address
.
isLoopbackAddress
()
&&
!
Inet6Address
.
class
.
isInstance
(
address
))
{
return
address
.
getHostAddress
();
}
else
if
(!
address
.
isLoopbackAddress
())
{
localAddress
=
address
;
}
}
}
return
localAddress
.
getHostAddress
();
}
}
webmagic-
core
/src/test/java/us/codecraft/webmagic/monitor/CustomSpiderStatus.java
→
webmagic-
extension
/src/test/java/us/codecraft/webmagic/monitor/CustomSpiderStatus.java
View file @
11ba5beb
File moved
webmagic-
core
/src/test/java/us/codecraft/webmagic/monitor/CustomSpiderStatusMXBean.java
→
webmagic-
extension
/src/test/java/us/codecraft/webmagic/monitor/CustomSpiderStatusMXBean.java
View file @
11ba5beb
File moved
webmagic-
core
/src/test/java/us/codecraft/webmagic/monitor/SpiderMonitorTest.java
→
webmagic-
extension
/src/test/java/us/codecraft/webmagic/monitor/SpiderMonitorTest.java
View file @
11ba5beb
File moved
webmagic-extension/src/test/java/us/codecraft/webmagic/utils/IPUtilsTest.java
0 → 100644
View file @
11ba5beb
package
us
.
codecraft
.
webmagic
.
utils
;
import
org.junit.Test
;
/**
* @author code4crafer@gmail.com
*/
public
class
IPUtilsTest
{
@Test
public
void
testGetFirstNoLoopbackIPAddresses
()
throws
Exception
{
System
.
out
.
println
(
IPUtils
.
getFirstNoLoopbackIPAddresses
());
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment