Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
W
webmagic
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
沈俊林
webmagic
Commits
19474e47
Commit
19474e47
authored
Feb 28, 2015
by
edwardsbean
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add SimpleProxyPool and IProxyPool
parent
05a1f395
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
136 additions
and
12 deletions
+136
-12
Site.java
webmagic-core/src/main/java/us/codecraft/webmagic/Site.java
+7
-11
IProxyPool.java
...src/main/java/us/codecraft/webmagic/proxy/IProxyPool.java
+12
-0
ProxyPool.java
.../src/main/java/us/codecraft/webmagic/proxy/ProxyPool.java
+1
-1
SimpleProxyPool.java
...ain/java/us/codecraft/webmagic/proxy/SimpleProxyPool.java
+116
-0
No files found.
webmagic-core/src/main/java/us/codecraft/webmagic/Site.java
View file @
19474e47
...
...
@@ -4,7 +4,8 @@ import com.google.common.collect.HashBasedTable;
import
com.google.common.collect.Table
;
import
org.apache.http.HttpHost
;
import
us.codecraft.webmagic.proxy.ProxyPool
;
import
us.codecraft.webmagic.proxy.IProxyPool
;
import
us.codecraft.webmagic.proxy.SimpleProxyPool
;
import
us.codecraft.webmagic.utils.UrlUtils
;
import
java.util.*
;
...
...
@@ -51,7 +52,7 @@ public class Site {
private
HttpHost
httpProxy
;
private
ProxyPool
httpProxyPool
;
private
I
ProxyPool
httpProxyPool
;
private
boolean
useGzip
=
true
;
...
...
@@ -464,17 +465,17 @@ public class Site {
*
* @return this
*/
public
Site
setHttpProxyPool
(
List
<
String
[]>
httpProxyList
)
{
this
.
httpProxyPool
=
new
ProxyPool
(
httpProxyList
)
;
public
Site
setHttpProxyPool
(
IProxyPool
proxyPool
)
{
this
.
httpProxyPool
=
proxyPool
;
return
this
;
}
public
Site
enableHttpProxyPool
()
{
this
.
httpProxyPool
=
new
ProxyPool
();
this
.
httpProxyPool
=
new
Simple
ProxyPool
();
return
this
;
}
public
ProxyPool
getHttpProxyPool
()
{
public
I
ProxyPool
getHttpProxyPool
()
{
return
httpProxyPool
;
}
...
...
@@ -486,9 +487,4 @@ public class Site {
httpProxyPool
.
returnProxy
(
proxy
,
statusCode
);
}
public
Site
setProxyReuseInterval
(
int
reuseInterval
)
{
this
.
httpProxyPool
.
setReuseInterval
(
reuseInterval
);
return
this
;
}
}
webmagic-core/src/main/java/us/codecraft/webmagic/proxy/IProxyPool.java
0 → 100644
View file @
19474e47
package
us
.
codecraft
.
webmagic
.
proxy
;
import
org.apache.http.HttpHost
;
/**
* Created by edwardsbean on 15-2-28.
*/
public
interface
IProxyPool
{
public
void
returnProxy
(
HttpHost
host
,
int
statusCode
);
public
HttpHost
getProxy
();
public
boolean
isEnable
();
}
webmagic-core/src/main/java/us/codecraft/webmagic/proxy/ProxyPool.java
View file @
19474e47
...
...
@@ -22,7 +22,7 @@ import java.util.concurrent.DelayQueue;
* @see Proxy
* @since 0.5.1
*/
public
class
ProxyPool
{
public
class
ProxyPool
implements
IProxyPool
{
private
Logger
logger
=
LoggerFactory
.
getLogger
(
getClass
());
...
...
webmagic-core/src/main/java/us/codecraft/webmagic/proxy/SimpleProxyPool.java
0 → 100644
View file @
19474e47
package
us
.
codecraft
.
webmagic
.
proxy
;
import
org.apache.http.HttpHost
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
java.net.InetAddress
;
import
java.net.UnknownHostException
;
import
java.util.ArrayList
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.NoSuchElementException
;
import
java.util.concurrent.BlockingQueue
;
import
java.util.concurrent.ConcurrentHashMap
;
import
java.util.concurrent.DelayQueue
;
/**
* Created by edwardsbean on 15-2-28.
*/
public
class
SimpleProxyPool
implements
IProxyPool
{
private
Logger
logger
=
LoggerFactory
.
getLogger
(
getClass
());
private
BlockingQueue
<
Proxy
>
proxyQueue
=
new
DelayQueue
<
Proxy
>();
private
Map
<
String
,
Proxy
>
allProxy
=
new
ConcurrentHashMap
<
String
,
Proxy
>();
private
boolean
isEnable
=
false
;
private
int
reuseInterval
=
1500
;
// ms
private
int
reviveTime
=
2
*
60
*
60
*
1000
;
// ms
public
SimpleProxyPool
()
{
this
(
null
);
}
public
SimpleProxyPool
(
List
<
String
[]>
httpProxyList
)
{
if
(
httpProxyList
!=
null
)
{
addProxy
(
httpProxyList
.
toArray
(
new
String
[
httpProxyList
.
size
()][]));
}
}
public
void
addProxy
(
String
[]...
httpProxyList
)
{
isEnable
=
true
;
for
(
String
[]
s
:
httpProxyList
)
{
try
{
if
(
allProxy
.
containsKey
(
s
[
0
]))
{
continue
;
}
HttpHost
item
=
new
HttpHost
(
InetAddress
.
getByName
(
s
[
0
]),
Integer
.
valueOf
(
s
[
1
]));
Proxy
p
=
new
Proxy
(
item
,
reuseInterval
);
proxyQueue
.
add
(
p
);
allProxy
.
put
(
s
[
0
],
p
);
}
catch
(
NumberFormatException
e
)
{
logger
.
error
(
"HttpHost init error:"
,
e
);
}
catch
(
UnknownHostException
e
)
{
logger
.
error
(
"HttpHost init error:"
,
e
);
}
}
logger
.
info
(
"proxy pool size>>>>"
+
allProxy
.
size
());
}
public
void
returnProxy
(
HttpHost
host
,
int
statusCode
)
{
Proxy
p
=
allProxy
.
get
(
host
.
getAddress
().
getHostAddress
());
if
(
p
==
null
)
{
return
;
}
switch
(
statusCode
)
{
case
Proxy
.
SUCCESS
:
p
.
setFailedNum
(
0
);
p
.
setFailedErrorType
(
new
ArrayList
<
Integer
>());
p
.
recordResponse
();
p
.
successNumIncrement
(
1
);
break
;
case
Proxy
.
ERROR_403
:
// banned,try longer interval
p
.
fail
(
Proxy
.
ERROR_403
);
break
;
case
Proxy
.
ERROR_BANNED
:
p
.
fail
(
Proxy
.
ERROR_BANNED
);
logger
.
warn
(
"this proxy is banned >>>> "
+
p
.
getHttpHost
());
break
;
case
Proxy
.
ERROR_404
:
// p.fail(Proxy.ERROR_404);
// p.setReuseTimeInterval(reuseInterval * p.getFailedNum());
break
;
default
:
p
.
fail
(
statusCode
);
break
;
}
if
(
p
.
getFailedNum
()
>
3
)
{
logger
.
error
(
"remove proxy >>>> "
+
host
+
">>>>"
+
p
.
getFailedType
()
+
" >>>> remain proxy >>>> "
+
proxyQueue
.
size
());
return
;
}
try
{
proxyQueue
.
put
(
p
);
}
catch
(
InterruptedException
e
)
{
logger
.
warn
(
"proxyQueue return proxy error"
,
e
);
}
}
@Override
public
HttpHost
getProxy
()
{
Proxy
proxy
=
null
;
try
{
proxy
=
proxyQueue
.
take
();
}
catch
(
InterruptedException
e
)
{
logger
.
error
(
"get proxy error"
,
e
);
}
if
(
proxy
==
null
)
{
throw
new
NoSuchElementException
();
}
return
proxy
.
getHttpHost
();
}
@Override
public
boolean
isEnable
()
{
return
isEnable
;
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment