Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
W
webmagic
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
沈俊林
webmagic
Commits
2f89cfc3
Commit
2f89cfc3
authored
Jun 09, 2014
by
zwf
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add test and fix bug of proxy module
parent
2a15bc02
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
242 additions
and
96 deletions
+242
-96
Spider.java
...agic-core/src/main/java/us/codecraft/webmagic/Spider.java
+2
-0
Proxy.java
...core/src/main/java/us/codecraft/webmagic/proxy/Proxy.java
+14
-5
ProxyPool.java
.../src/main/java/us/codecraft/webmagic/proxy/ProxyPool.java
+109
-75
ProxyUtils.java
...src/main/java/us/codecraft/webmagic/utils/ProxyUtils.java
+38
-16
ProxyTest.java
.../src/test/java/us/codecraft/webmagic/proxy/ProxyTest.java
+79
-0
No files found.
webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java
View file @
2f89cfc3
...
@@ -424,6 +424,8 @@ public class Spider implements Runnable, Task {
...
@@ -424,6 +424,8 @@ public class Spider implements Runnable, Task {
pipeline
.
process
(
page
.
getResultItems
(),
this
);
pipeline
.
process
(
page
.
getResultItems
(),
this
);
}
}
}
}
//for proxy status management
request
.
putExtra
(
Request
.
STATUS_CODE
,
page
.
getStatusCode
());
sleep
(
site
.
getSleepTime
());
sleep
(
site
.
getSleepTime
());
}
}
...
...
webmagic-core/src/main/java/us/codecraft/webmagic/proxy/Proxy.java
View file @
2f89cfc3
...
@@ -9,7 +9,8 @@ import java.util.concurrent.TimeUnit;
...
@@ -9,7 +9,8 @@ import java.util.concurrent.TimeUnit;
import
org.apache.http.HttpHost
;
import
org.apache.http.HttpHost
;
/**
/**
* >>>>Proxy Status
* >>>> Proxy lifecycle
+----------+ +-----+
+----------+ +-----+
| last use | | new |
| last use | | new |
+-----+----+ +---+-+
+-----+----+ +---+-+
...
@@ -44,13 +45,22 @@ import org.apache.http.HttpHost;
...
@@ -44,13 +45,22 @@ import org.apache.http.HttpHost;
| |+-------------------+
| |+-------------------+
+--------+
+--------+
*/
*/
/**
* Object has these status of lifecycle above.<br>
*
* @author yxssfxwzy@sina.com <br>
* @since 0.5.1
* @see ProxyPool
*/
public
class
Proxy
implements
Delayed
,
Serializable
{
public
class
Proxy
implements
Delayed
,
Serializable
{
private
static
final
long
serialVersionUID
=
228939737383625551L
;
private
static
final
long
serialVersionUID
=
228939737383625551L
;
public
static
final
int
ERROR_403
=
403
;
public
static
final
int
ERROR_403
=
403
;
public
static
final
int
ERROR_404
=
404
;
public
static
final
int
ERROR_404
=
404
;
public
static
final
int
ERROR_BANNED
=
10000
;
public
static
final
int
ERROR_BANNED
=
10000
;
// banned by website
public
static
final
int
ERROR_Proxy
=
10001
;
public
static
final
int
ERROR_Proxy
=
10001
;
// the proxy itself failed
public
static
final
int
SUCCESS
=
200
;
public
static
final
int
SUCCESS
=
200
;
private
final
HttpHost
httpHost
;
private
final
HttpHost
httpHost
;
...
@@ -59,7 +69,6 @@ public class Proxy implements Delayed, Serializable {
...
@@ -59,7 +69,6 @@ public class Proxy implements Delayed, Serializable {
private
Long
canReuseTime
=
0L
;
private
Long
canReuseTime
=
0L
;
private
Long
lastBorrowTime
=
System
.
currentTimeMillis
();
private
Long
lastBorrowTime
=
System
.
currentTimeMillis
();
private
Long
responseTime
=
0L
;
private
Long
responseTime
=
0L
;
private
Long
idleTime
=
0L
;
private
int
failedNum
=
0
;
private
int
failedNum
=
0
;
private
int
successNum
=
0
;
private
int
successNum
=
0
;
...
@@ -143,7 +152,7 @@ public class Proxy implements Delayed, Serializable {
...
@@ -143,7 +152,7 @@ public class Proxy implements Delayed, Serializable {
@Override
@Override
public
long
getDelay
(
TimeUnit
unit
)
{
public
long
getDelay
(
TimeUnit
unit
)
{
return
unit
.
convert
(
canReuseTime
-
System
.
nanoTime
(),
u
nit
.
NANOSECONDS
);
return
unit
.
convert
(
canReuseTime
-
System
.
nanoTime
(),
TimeU
nit
.
NANOSECONDS
);
}
}
@Override
@Override
...
...
webmagic-core/src/main/java/us/codecraft/webmagic/proxy/ProxyPool.java
View file @
2f89cfc3
package
us
.
codecraft
.
webmagic
.
proxy
;
package
us
.
codecraft
.
webmagic
.
proxy
;
import
org.apache.http.HttpHost
;
import
java.io.File
;
import
org.slf4j.Logger
;
import
java.io.FileInputStream
;
import
org.slf4j.LoggerFactory
;
import
java.io.FileNotFoundException
;
import
java.io.FileOutputStream
;
import
java.io.*
;
import
java.io.IOException
;
import
java.io.ObjectInputStream
;
import
java.io.ObjectOutputStream
;
import
java.net.InetAddress
;
import
java.net.InetAddress
;
import
java.net.UnknownHostException
;
import
java.net.UnknownHostException
;
import
java.util.*
;
import
java.util.ArrayList
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.NoSuchElementException
;
import
java.util.Timer
;
import
java.util.TimerTask
;
import
java.util.Map.Entry
;
import
java.util.Map.Entry
;
import
java.util.concurrent.BlockingQueue
;
import
java.util.concurrent.BlockingQueue
;
import
java.util.concurrent.ConcurrentHashMap
;
import
java.util.concurrent.ConcurrentHashMap
;
import
java.util.concurrent.DelayQueue
;
import
java.util.concurrent.DelayQueue
;
import
org.apache.http.HttpHost
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
us.codecraft.webmagic.utils.FilePersistentBase
;
import
us.codecraft.webmagic.utils.ProxyUtils
;
/**
/**
*
ClassName:ProxyPool
*
Pooled Proxy Object
*
*
* @see
* @author yxssfxwzy@sina.com <br>
* @Function: TODO ADD FUNCTION
* @since 0.5.1
* @author ch
* @see Proxy
* @version Ver 1.0
* @Date 2014-2-14 下午01:10:04
*/
*/
public
class
ProxyPool
{
public
class
ProxyPool
{
...
@@ -31,10 +44,14 @@ public class ProxyPool {
...
@@ -31,10 +44,14 @@ public class ProxyPool {
private
int
reuseInterval
=
1500
;
// ms
private
int
reuseInterval
=
1500
;
// ms
private
int
reviveTime
=
2
*
60
*
60
*
1000
;
// ms
private
int
reviveTime
=
2
*
60
*
60
*
1000
;
// ms
private
int
saveProxyInterval
=
10
*
60
*
1000
;
// ms
private
boolean
isEnable
=
false
;
private
boolean
isEnable
=
false
;
private
boolean
validateWhenInit
=
false
;
private
boolean
validateWhenInit
=
false
;
private
String
proxyFile
=
"data/lastUse.proxy"
;
// private boolean isUseLastProxy = true;
private
String
proxyFilePath
=
"/data/webmagic/lastUse.proxy"
;
private
FilePersistentBase
fBase
=
new
FilePersistentBase
();
private
Timer
timer
=
new
Timer
(
true
);
private
Timer
timer
=
new
Timer
(
true
);
private
TimerTask
saveProxyTask
=
new
TimerTask
()
{
private
TimerTask
saveProxyTask
=
new
TimerTask
()
{
...
@@ -47,13 +64,46 @@ public class ProxyPool {
...
@@ -47,13 +64,46 @@ public class ProxyPool {
};
};
public
ProxyPool
()
{
public
ProxyPool
()
{
this
(
null
,
true
);
}
}
public
ProxyPool
(
List
<
String
[]>
httpProxyList
)
{
public
ProxyPool
(
List
<
String
[]>
httpProxyList
)
{
readProxyList
();
this
(
httpProxyList
,
true
);
addProxy
(
httpProxyList
.
toArray
(
new
String
[
httpProxyList
.
size
()][]));
}
timer
.
schedule
(
saveProxyTask
,
10
*
60
*
1000L
,
10
*
60
*
1000
);
public
ProxyPool
(
List
<
String
[]>
httpProxyList
,
boolean
isUseLastProxy
)
{
if
(
httpProxyList
!=
null
)
{
addProxy
(
httpProxyList
.
toArray
(
new
String
[
httpProxyList
.
size
()][]));
}
if
(
isUseLastProxy
)
{
if
(!
new
File
(
proxyFilePath
).
exists
())
{
setFilePath
();
}
setFilePath
();
readProxyList
();
timer
.
schedule
(
saveProxyTask
,
0
,
saveProxyInterval
);
}
}
private
void
setFilePath
()
{
String
tmpDir
=
System
.
getProperty
(
"java.io.tmpdir"
);
String
path
=
tmpDir
+
"webmagic\\lastUse.proxy"
;
if
(
tmpDir
!=
null
&&
new
File
(
tmpDir
).
isDirectory
())
{
fBase
.
setPath
(
tmpDir
+
"webmagic"
);
File
f
=
fBase
.
getFile
(
path
);
if
(!
f
.
exists
())
{
try
{
f
.
createNewFile
();
}
catch
(
IOException
e
)
{
logger
.
error
(
"proxy file create error"
,
e
);
}
}
}
else
{
logger
.
error
(
"java tmp dir not exists"
);
}
this
.
proxyFilePath
=
path
;
}
}
private
void
saveProxyList
()
{
private
void
saveProxyList
()
{
...
@@ -61,7 +111,7 @@ public class ProxyPool {
...
@@ -61,7 +111,7 @@ public class ProxyPool {
return
;
return
;
}
}
try
{
try
{
ObjectOutputStream
os
=
new
ObjectOutputStream
(
new
FileOutputStream
(
proxyFile
));
ObjectOutputStream
os
=
new
ObjectOutputStream
(
new
FileOutputStream
(
fBase
.
getFile
(
proxyFilePath
)
));
os
.
writeObject
(
prepareForSaving
());
os
.
writeObject
(
prepareForSaving
());
os
.
close
();
os
.
close
();
logger
.
info
(
"save proxy"
);
logger
.
info
(
"save proxy"
);
...
@@ -84,15 +134,15 @@ public class ProxyPool {
...
@@ -84,15 +134,15 @@ public class ProxyPool {
private
void
readProxyList
()
{
private
void
readProxyList
()
{
try
{
try
{
ObjectInputStream
is
=
new
ObjectInputStream
(
new
FileInputStream
(
proxyFile
));
ObjectInputStream
is
=
new
ObjectInputStream
(
new
FileInputStream
(
fBase
.
getFile
(
proxyFilePath
)
));
addProxy
((
Map
<
String
,
Proxy
>)
is
.
readObject
());
addProxy
((
Map
<
String
,
Proxy
>)
is
.
readObject
());
is
.
close
();
is
.
close
();
}
catch
(
FileNotFoundException
e
)
{
}
catch
(
FileNotFoundException
e
)
{
logger
.
error
(
"
proxy file not found"
,
e
);
logger
.
info
(
"last use
proxy file not found"
,
e
);
}
catch
(
IOException
e
)
{
}
catch
(
IOException
e
)
{
e
.
printStackTrace
();
//
e.printStackTrace();
}
catch
(
ClassNotFoundException
e
)
{
}
catch
(
ClassNotFoundException
e
)
{
e
.
printStackTrace
();
//
e.printStackTrace();
}
}
}
}
...
@@ -103,7 +153,7 @@ public class ProxyPool {
...
@@ -103,7 +153,7 @@ public class ProxyPool {
if
(
allProxy
.
containsKey
(
entry
.
getKey
()))
{
if
(
allProxy
.
containsKey
(
entry
.
getKey
()))
{
continue
;
continue
;
}
}
if
(!
validateWhenInit
||
ProxyUtil
.
validateProxy
(
entry
.
getValue
().
getHttpHost
()))
{
if
(!
validateWhenInit
||
ProxyUtil
s
.
validateProxy
(
entry
.
getValue
().
getHttpHost
()))
{
entry
.
getValue
().
setFailedNum
(
0
);
entry
.
getValue
().
setFailedNum
(
0
);
entry
.
getValue
().
setReuseTimeInterval
(
reuseInterval
);
entry
.
getValue
().
setReuseTimeInterval
(
reuseInterval
);
proxyQueue
.
add
(
entry
.
getValue
());
proxyQueue
.
add
(
entry
.
getValue
());
...
@@ -124,7 +174,7 @@ public class ProxyPool {
...
@@ -124,7 +174,7 @@ public class ProxyPool {
continue
;
continue
;
}
}
HttpHost
item
=
new
HttpHost
(
InetAddress
.
getByName
(
s
[
0
]),
Integer
.
valueOf
(
s
[
1
]));
HttpHost
item
=
new
HttpHost
(
InetAddress
.
getByName
(
s
[
0
]),
Integer
.
valueOf
(
s
[
1
]));
if
(!
validateWhenInit
||
ProxyUtil
.
validateProxy
(
item
))
{
if
(!
validateWhenInit
||
ProxyUtil
s
.
validateProxy
(
item
))
{
Proxy
p
=
new
Proxy
(
item
,
reuseInterval
);
Proxy
p
=
new
Proxy
(
item
,
reuseInterval
);
proxyQueue
.
add
(
p
);
proxyQueue
.
add
(
p
);
allProxy
.
put
(
s
[
0
],
p
);
allProxy
.
put
(
s
[
0
],
p
);
...
@@ -173,7 +223,7 @@ public class ProxyPool {
...
@@ -173,7 +223,7 @@ public class ProxyPool {
p
.
successNumIncrement
(
1
);
p
.
successNumIncrement
(
1
);
break
;
break
;
case
Proxy
.
ERROR_403
:
case
Proxy
.
ERROR_403
:
// banned,try l
ar
ger interval
// banned,try l
on
ger interval
p
.
fail
(
Proxy
.
ERROR_403
);
p
.
fail
(
Proxy
.
ERROR_403
);
p
.
setReuseTimeInterval
(
reuseInterval
*
p
.
getFailedNum
());
p
.
setReuseTimeInterval
(
reuseInterval
*
p
.
getFailedNum
());
logger
.
info
(
host
+
" >>>> reuseTimeInterval is >>>> "
+
p
.
getReuseTimeInterval
()
/
1000.0
);
logger
.
info
(
host
+
" >>>> reuseTimeInterval is >>>> "
+
p
.
getReuseTimeInterval
()
/
1000.0
);
...
@@ -185,7 +235,7 @@ public class ProxyPool {
...
@@ -185,7 +235,7 @@ public class ProxyPool {
logger
.
info
(
host
+
" >>>> reuseTimeInterval is >>>> "
+
p
.
getReuseTimeInterval
()
/
1000.0
);
logger
.
info
(
host
+
" >>>> reuseTimeInterval is >>>> "
+
p
.
getReuseTimeInterval
()
/
1000.0
);
break
;
break
;
case
Proxy
.
ERROR_404
:
case
Proxy
.
ERROR_404
:
//p.fail(Proxy.ERROR_404);
//
p.fail(Proxy.ERROR_404);
// p.setReuseTimeInterval(reuseInterval * p.getFailedNum());
// p.setReuseTimeInterval(reuseInterval * p.getFailedNum());
break
;
break
;
default
:
default
:
...
@@ -193,14 +243,12 @@ public class ProxyPool {
...
@@ -193,14 +243,12 @@ public class ProxyPool {
break
;
break
;
}
}
if
(
p
.
getFailedNum
()
>
20
)
{
if
(
p
.
getFailedNum
()
>
20
)
{
// allProxy.remove(host.getAddress().getHostAddress());
p
.
setReuseTimeInterval
(
reviveTime
);
p
.
setReuseTimeInterval
(
reviveTime
);
logger
.
error
(
"remove proxy >>>> "
+
host
+
">>>>"
+
p
.
getFailedType
()
+
" >>>> remain proxy >>>> "
+
proxyQueue
.
size
());
logger
.
error
(
"remove proxy >>>> "
+
host
+
">>>>"
+
p
.
getFailedType
()
+
" >>>> remain proxy >>>> "
+
proxyQueue
.
size
());
return
;
return
;
}
}
if
(
p
.
getFailedNum
()%
5
==
0
)
{
if
(
p
.
getFailedNum
()
>
0
&&
p
.
getFailedNum
()
%
5
==
0
)
{
if
(!
ProxyUtil
.
validateProxy
(
host
))
{
if
(!
ProxyUtils
.
validateProxy
(
host
))
{
// allProxy.remove(host.getAddress().getHostAddress());
p
.
setReuseTimeInterval
(
reviveTime
);
p
.
setReuseTimeInterval
(
reviveTime
);
logger
.
error
(
"remove proxy >>>> "
+
host
+
">>>>"
+
p
.
getFailedType
()
+
" >>>> remain proxy >>>> "
+
proxyQueue
.
size
());
logger
.
error
(
"remove proxy >>>> "
+
host
+
">>>>"
+
p
.
getFailedType
()
+
" >>>> remain proxy >>>> "
+
proxyQueue
.
size
());
return
;
return
;
...
@@ -219,7 +267,6 @@ public class ProxyPool {
...
@@ -219,7 +267,6 @@ public class ProxyPool {
re
+=
entry
.
getValue
().
toString
()
+
"\n"
;
re
+=
entry
.
getValue
().
toString
()
+
"\n"
;
}
}
return
re
;
return
re
;
}
}
public
int
getIdleNum
()
{
public
int
getIdleNum
()
{
...
@@ -234,57 +281,44 @@ public class ProxyPool {
...
@@ -234,57 +281,44 @@ public class ProxyPool {
this
.
reuseInterval
=
reuseInterval
;
this
.
reuseInterval
=
reuseInterval
;
}
}
public
static
List
<
String
[]>
getProxyList
()
{
public
void
enable
(
boolean
isEnable
)
{
List
<
String
[]>
proxyList
=
new
ArrayList
<
String
[]>();
this
.
isEnable
=
isEnable
;
BufferedReader
br
=
null
;
}
try
{
br
=
new
BufferedReader
(
new
FileReader
(
new
File
(
"proxy.txt"
)));
String
line
=
""
;
public
boolean
isEnable
()
{
while
((
line
=
br
.
readLine
())
!=
null
)
{
return
isEnable
;
proxyList
.
add
(
new
String
[]
{
line
.
split
(
":"
)[
0
],
line
.
split
(
":"
)[
1
]
});
}
}
catch
(
FileNotFoundException
e
)
{
e
.
printStackTrace
();
}
catch
(
IOException
e
)
{
e
.
printStackTrace
();
}
return
proxyList
;
}
}
public
static
void
main
(
String
[]
args
)
throws
IOException
{
public
int
getReviveTime
()
{
ProxyPool
proxyPool
=
new
ProxyPool
(
getProxyList
());
return
reviveTime
;
proxyPool
.
setReuseInterval
(
10000
);
}
// proxyPool.saveProxyList();
while
(
true
)
{
List
<
HttpHost
>
httphostList
=
new
ArrayList
<
HttpHost
>();
System
.
in
.
read
();
int
i
=
0
;
while
(
proxyPool
.
getIdleNum
()
>
2
)
{
HttpHost
httphost
=
proxyPool
.
getProxy
();
httphostList
.
add
(
httphost
);
// proxyPool.proxyPool.use(httphost);
proxyPool
.
logger
.
info
(
"borrow object>>>>"
+
i
+
">>>>"
+
httphostList
.
get
(
i
).
toString
());
i
++;
}
System
.
out
.
println
(
proxyPool
.
allProxyStatus
());
System
.
in
.
read
();
for
(
i
=
0
;
i
<
httphostList
.
size
();
i
++)
{
proxyPool
.
returnProxy
(
httphostList
.
get
(
i
),
200
);
proxyPool
.
logger
.
info
(
"return object>>>>"
+
i
+
">>>>"
+
httphostList
.
get
(
i
).
toString
());
}
System
.
out
.
println
(
proxyPool
.
allProxyStatus
());
System
.
in
.
read
();
}
public
void
setReviveTime
(
int
reviveTime
)
{
this
.
reviveTime
=
reviveTime
;
}
}
public
void
enable
(
boolean
isEnable
)
{
public
boolean
isValidateWhenInit
(
)
{
this
.
isEnable
=
isEnable
;
return
validateWhenInit
;
}
}
public
boolean
isEnable
()
{
public
void
validateWhenInit
(
boolean
validateWhenInit
)
{
return
isEnable
;
this
.
validateWhenInit
=
validateWhenInit
;
}
public
int
getSaveProxyInterval
()
{
return
saveProxyInterval
;
}
}
public
void
setSaveProxyInterval
(
int
saveProxyInterval
)
{
this
.
saveProxyInterval
=
saveProxyInterval
;
}
public
String
getProxyFilePath
()
{
return
proxyFilePath
;
}
public
void
setProxyFilePath
(
String
proxyFilePath
)
{
this
.
proxyFilePath
=
proxyFilePath
;
}
}
}
webmagic-core/src/main/java/us/codecraft/webmagic/
proxy/ProxyUtil
.java
→
webmagic-core/src/main/java/us/codecraft/webmagic/
utils/ProxyUtils
.java
View file @
2f89cfc3
package
us
.
codecraft
.
webmagic
.
proxy
;
package
us
.
codecraft
.
webmagic
.
utils
;
import
java.io.IOException
;
import
java.io.IOException
;
import
java.net.Inet6Address
;
import
java.net.Inet6Address
;
...
@@ -7,36 +7,54 @@ import java.net.InetSocketAddress;
...
@@ -7,36 +7,54 @@ import java.net.InetSocketAddress;
import
java.net.NetworkInterface
;
import
java.net.NetworkInterface
;
import
java.net.Socket
;
import
java.net.Socket
;
import
java.net.SocketException
;
import
java.net.SocketException
;
import
java.net.UnknownHostException
;
import
java.util.Enumeration
;
import
java.util.Enumeration
;
import
java.util.regex.Pattern
;
import
org.apache.http.HttpHost
;
import
org.apache.http.HttpHost
;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
/**
/**
*
ClassName:ProxyUtil
*
Pooled Proxy Object
*
*
* @see
* @author yxssfxwzy@sina.com <br>
* @author ch
* @since 0.5.1
* @version Ver 1.0
* @Date 2014-2-16 下午04:20:07
*/
*/
public
class
ProxyUtil
{
// TODO 改为单例
public
class
ProxyUtils
{
private
static
InetAddress
localAddr
;
private
static
InetAddress
localAddr
;
private
static
final
Logger
logger
=
LoggerFactory
.
getLogger
(
ProxyUtil
.
class
);
private
static
String
networkInterface
=
"eth7"
;
private
static
final
Logger
logger
=
LoggerFactory
.
getLogger
(
ProxyUtils
.
class
);
static
{
static
{
init
();
init
();
}
}
private
static
void
init
()
{
private
static
void
init
()
{
// first way to get local IP
try
{
localAddr
=
InetAddress
.
getLocalHost
();
logger
.
info
(
"local IP:"
+
localAddr
.
getHostAddress
());
}
catch
(
UnknownHostException
e
)
{
logger
.
info
(
"try again\n"
);
}
if
(
localAddr
!=
null
)
{
return
;
}
// other way to get local IP
Enumeration
<
InetAddress
>
localAddrs
;
Enumeration
<
InetAddress
>
localAddrs
;
try
{
try
{
NetworkInterface
ni
=
NetworkInterface
.
getByName
(
"eth7"
);
// modify your network interface name
NetworkInterface
ni
=
NetworkInterface
.
getByName
(
networkInterface
);
if
(
ni
==
null
)
{
if
(
ni
==
null
)
{
logger
.
error
(
"choose NetworkInterface\n"
+
getNetworkInterface
())
;
return
;
}
}
localAddrs
=
ni
.
getInetAddresses
();
localAddrs
=
ni
.
getInetAddresses
();
if
(
localAddrs
==
null
||
!
localAddrs
.
hasMoreElements
())
{
logger
.
error
(
"choose NetworkInterface\n"
+
getNetworkInterface
());
return
;
}
while
(
localAddrs
.
hasMoreElements
())
{
while
(
localAddrs
.
hasMoreElements
())
{
InetAddress
tmp
=
localAddrs
.
nextElement
();
InetAddress
tmp
=
localAddrs
.
nextElement
();
if
(!
tmp
.
isLoopbackAddress
()
&&
!
tmp
.
isLinkLocalAddress
()
&&
!(
tmp
instanceof
Inet6Address
))
{
if
(!
tmp
.
isLoopbackAddress
()
&&
!
tmp
.
isLinkLocalAddress
()
&&
!(
tmp
instanceof
Inet6Address
))
{
...
@@ -49,12 +67,11 @@ public class ProxyUtil {
...
@@ -49,12 +67,11 @@ public class ProxyUtil {
logger
.
error
(
"Failure when init ProxyUtil"
,
e
);
logger
.
error
(
"Failure when init ProxyUtil"
,
e
);
logger
.
error
(
"choose NetworkInterface\n"
+
getNetworkInterface
());
logger
.
error
(
"choose NetworkInterface\n"
+
getNetworkInterface
());
}
}
}
}
public
static
boolean
validateProxy
(
HttpHost
p
)
{
public
static
boolean
validateProxy
(
HttpHost
p
)
{
if
(
localAddr
==
null
)
{
if
(
localAddr
==
null
)
{
logger
.
error
(
"cannot get local
ip
"
);
logger
.
error
(
"cannot get local
IP
"
);
return
false
;
return
false
;
}
}
boolean
isReachable
=
false
;
boolean
isReachable
=
false
;
...
@@ -81,7 +98,8 @@ public class ProxyUtil {
...
@@ -81,7 +98,8 @@ public class ProxyUtil {
}
}
private
static
String
getNetworkInterface
()
{
private
static
String
getNetworkInterface
()
{
String
networkInterfaceName
=
""
;
String
networkInterfaceName
=
">>>> modify networkInterface in us.codecraft.webmagic.utils.ProxyUtils"
;
Enumeration
<
NetworkInterface
>
enumeration
=
null
;
Enumeration
<
NetworkInterface
>
enumeration
=
null
;
try
{
try
{
enumeration
=
NetworkInterface
.
getNetworkInterfaces
();
enumeration
=
NetworkInterface
.
getNetworkInterfaces
();
...
@@ -90,10 +108,14 @@ public class ProxyUtil {
...
@@ -90,10 +108,14 @@ public class ProxyUtil {
}
}
while
(
enumeration
.
hasMoreElements
())
{
while
(
enumeration
.
hasMoreElements
())
{
NetworkInterface
networkInterface
=
enumeration
.
nextElement
();
NetworkInterface
networkInterface
=
enumeration
.
nextElement
();
networkInterfaceName
+=
networkInterface
.
toString
()
+
'\n'
;
Enumeration
<
InetAddress
>
addr
=
networkInterface
.
getInetAddresses
();
Enumeration
<
InetAddress
>
addr
=
networkInterface
.
getInetAddresses
();
while
(
addr
.
hasMoreElements
())
{
while
(
addr
.
hasMoreElements
())
{
networkInterfaceName
+=
"\tip:"
+
addr
.
nextElement
().
getHostAddress
()
+
"\n"
;
String
s
=
addr
.
nextElement
().
getHostAddress
();
Pattern
IPV4_PATTERN
=
Pattern
.
compile
(
"^(25[0-5]|2[0-4]\\d|[0-1]?\\d?\\d)(\\.(25[0-5]|2[0-4]\\d|[0-1]?\\d?\\d)){3}$"
);
if
(
s
!=
null
&&
IPV4_PATTERN
.
matcher
(
s
).
matches
())
{
networkInterfaceName
+=
networkInterface
.
toString
()
+
"IP:"
+
s
+
"\n\n"
;
}
}
}
}
}
return
networkInterfaceName
;
return
networkInterfaceName
;
...
...
webmagic-core/src/test/java/us/codecraft/webmagic/proxy/ProxyTest.java
0 → 100644
View file @
2f89cfc3
package
us
.
codecraft
.
webmagic
.
proxy
;
import
static
org
.
assertj
.
core
.
api
.
Assertions
.
assertThat
;
import
java.io.File
;
import
java.util.ArrayList
;
import
java.util.List
;
import
org.apache.http.HttpHost
;
import
org.junit.BeforeClass
;
import
org.junit.Test
;
import
us.codecraft.webmagic.Request
;
/**
* @author yxssfxwzy@sina.com May 30, 2014
*
*/
public
class
ProxyTest
{
private
static
List
<
String
[]>
httpProxyList
=
new
ArrayList
<
String
[]>();
@BeforeClass
public
static
void
before
()
{
// String[] source = { "0.0.0.1:0", "0.0.0.2:0", "0.0.0.3:0",
// "0.0.0.4:0" };
String
[]
source
=
{
"0.0.0.1:0"
,
"0.0.0.2:0"
,
"0.0.0.3:0"
,
"0.0.0.4:0"
};
for
(
String
line
:
source
)
{
httpProxyList
.
add
(
new
String
[]
{
line
.
split
(
":"
)[
0
],
line
.
split
(
":"
)[
1
]
});
}
}
@Test
public
void
testAddProxy
()
{
}
@Test
public
void
testProxy
()
{
ProxyPool
proxyPool
=
new
ProxyPool
(
httpProxyList
);
proxyPool
.
setReuseInterval
(
500
);
assertThat
(
proxyPool
.
getIdleNum
()).
isEqualTo
(
4
);
assertThat
(
new
File
(
proxyPool
.
getProxyFilePath
()).
exists
()).
isEqualTo
(
true
);
for
(
int
i
=
0
;
i
<
2
;
i
++)
{
List
<
Fetch
>
fetchList
=
new
ArrayList
<
Fetch
>();
while
(
proxyPool
.
getIdleNum
()
!=
0
)
{
HttpHost
httphost
=
proxyPool
.
getProxy
();
// httphostList.add(httphost);
System
.
out
.
println
(
httphost
.
getHostName
()
+
":"
+
httphost
.
getPort
());
Fetch
tmp
=
new
Fetch
(
httphost
);
tmp
.
start
();
fetchList
.
add
(
tmp
);
}
for
(
Fetch
fetch
:
fetchList
)
{
proxyPool
.
returnProxy
(
fetch
.
hp
,
Proxy
.
SUCCESS
);
}
System
.
out
.
println
(
proxyPool
.
allProxyStatus
());
}
}
class
Fetch
extends
Thread
{
HttpHost
hp
;
public
Fetch
(
HttpHost
hp
)
{
this
.
hp
=
hp
;
}
@Override
public
void
run
()
{
try
{
System
.
out
.
println
(
"fetch web page use proxy: "
+
hp
.
getHostName
()
+
":"
+
hp
.
getPort
());
sleep
(
500
);
}
catch
(
InterruptedException
e
)
{
e
.
printStackTrace
();
}
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment