Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
W
webmagic
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
沈俊林
webmagic
Commits
1cfbd13a
Commit
1cfbd13a
authored
Apr 08, 2017
by
yihua.huang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
refacor in httpclientdownloader
parent
83ada974
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
77 additions
and
47 deletions
+77
-47
HttpClientDownloader.java
...s/codecraft/webmagic/downloader/HttpClientDownloader.java
+5
-45
HttpClientRequestContext.java
...decraft/webmagic/downloader/HttpClientRequestContext.java
+33
-0
HttpUriRequestConverter.java
...odecraft/webmagic/downloader/HttpUriRequestConverter.java
+39
-2
No files found.
webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java
View file @
1cfbd13a
package
us
.
codecraft
.
webmagic
.
downloader
;
package
us
.
codecraft
.
webmagic
.
downloader
;
import
org.apache.commons.collections.CollectionUtils
;
import
org.apache.commons.io.IOUtils
;
import
org.apache.commons.io.IOUtils
;
import
org.apache.http.Header
;
import
org.apache.http.HttpHost
;
import
org.apache.http.HttpResponse
;
import
org.apache.http.HttpResponse
;
import
org.apache.http.annotation.ThreadSafe
;
import
org.apache.http.annotation.ThreadSafe
;
import
org.apache.http.auth.AuthState
;
import
org.apache.http.auth.UsernamePasswordCredentials
;
import
org.apache.http.client.CookieStore
;
import
org.apache.http.client.config.CookieSpecs
;
import
org.apache.http.client.config.RequestConfig
;
import
org.apache.http.client.entity.UrlEncodedFormEntity
;
import
org.apache.http.client.methods.CloseableHttpResponse
;
import
org.apache.http.client.methods.CloseableHttpResponse
;
import
org.apache.http.client.methods.HttpUriRequest
;
import
org.apache.http.client.methods.RequestBuilder
;
import
org.apache.http.client.protocol.HttpClientContext
;
import
org.apache.http.cookie.Cookie
;
import
org.apache.http.impl.auth.BasicScheme
;
import
org.apache.http.impl.client.BasicCookieStore
;
import
org.apache.http.impl.client.CloseableHttpClient
;
import
org.apache.http.impl.client.CloseableHttpClient
;
import
org.apache.http.message.BasicNameValuePair
;
import
org.apache.http.protocol.BasicHttpContext
;
import
org.apache.http.protocol.HttpContext
;
import
org.apache.http.util.EntityUtils
;
import
org.apache.http.util.EntityUtils
;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
...
@@ -30,16 +12,15 @@ import us.codecraft.webmagic.Page;
...
@@ -30,16 +12,15 @@ import us.codecraft.webmagic.Page;
import
us.codecraft.webmagic.Request
;
import
us.codecraft.webmagic.Request
;
import
us.codecraft.webmagic.Site
;
import
us.codecraft.webmagic.Site
;
import
us.codecraft.webmagic.Task
;
import
us.codecraft.webmagic.Task
;
import
us.codecraft.webmagic.proxy.Proxy
;
import
us.codecraft.webmagic.proxy.ProxyProvider
;
import
us.codecraft.webmagic.proxy.ProxyProvider
;
import
us.codecraft.webmagic.selector.PlainText
;
import
us.codecraft.webmagic.selector.PlainText
;
import
us.codecraft.webmagic.utils.CharsetUtils
;
import
us.codecraft.webmagic.utils.CharsetUtils
;
import
us.codecraft.webmagic.utils.HttpClientUtils
;
import
us.codecraft.webmagic.utils.HttpClientUtils
;
import
us.codecraft.webmagic.utils.HttpConstant
;
import
java.io.IOException
;
import
java.io.IOException
;
import
java.nio.charset.Charset
;
import
java.nio.charset.Charset
;
import
java.util.*
;
import
java.util.HashMap
;
import
java.util.Map
;
/**
/**
...
@@ -96,33 +77,12 @@ public class HttpClientDownloader extends AbstractDownloader {
...
@@ -96,33 +77,12 @@ public class HttpClientDownloader extends AbstractDownloader {
}
}
logger
.
debug
(
"downloading page {}"
,
request
.
getUrl
());
logger
.
debug
(
"downloading page {}"
,
request
.
getUrl
());
CloseableHttpResponse
httpResponse
=
null
;
CloseableHttpResponse
httpResponse
=
null
;
int
statusCode
=
0
;
Site
site
=
task
.
getSite
();
Site
site
=
task
.
getSite
();
Proxy
proxy
=
null
;
HttpClientContext
httpContext
=
new
HttpClientContext
();
if
(
proxyProvider
!=
null
)
{
proxy
=
proxyProvider
.
getProxy
(
task
);
AuthState
authState
=
new
AuthState
();
authState
.
update
(
new
BasicScheme
(),
new
UsernamePasswordCredentials
(
proxy
.
getUsername
(),
proxy
.
getPassword
()));
httpContext
.
setAttribute
(
HttpClientContext
.
PROXY_AUTH_STATE
,
authState
);
}
CloseableHttpClient
httpClient
=
getHttpClient
(
site
);
CloseableHttpClient
httpClient
=
getHttpClient
(
site
);
HttpUriRequest
httpUriRequest
=
httpUriRequestConverter
.
convert
(
request
,
site
,
proxy
);
HttpClientRequestContext
requestContext
=
httpUriRequestConverter
.
convert
(
request
,
site
,
proxyProvider
!=
null
?
proxyProvider
.
getProxy
(
task
)
:
null
);
if
(
request
.
getCookies
()
!=
null
&&
CollectionUtils
.
isNotEmpty
(
request
.
getCookies
()))
{
CookieStore
cookieStore
=
new
BasicCookieStore
();
for
(
Cookie
c
:
request
.
getCookies
())
{
cookieStore
.
addCookie
(
c
);
}
httpContext
.
setCookieStore
(
cookieStore
);
}
if
(
request
.
getHeaders
()
!=
null
&&
CollectionUtils
.
isNotEmpty
(
request
.
getHeaders
()))
{
for
(
Header
h
:
request
.
getHeaders
())
{
httpUriRequest
.
setHeader
(
h
);
}
}
try
{
try
{
httpResponse
=
httpClient
.
execute
(
httpUriRequest
,
httpContext
);
httpResponse
=
httpClient
.
execute
(
requestContext
.
getHttpUriRequest
(),
requestContext
.
getHttpClientContext
()
);
statusCode
=
httpResponse
.
getStatusLine
().
getStatusCode
();
int
statusCode
=
httpResponse
.
getStatusLine
().
getStatusCode
();
if
(
site
.
getAcceptStatCode
().
contains
(
statusCode
))
{
if
(
site
.
getAcceptStatCode
().
contains
(
statusCode
))
{
Page
page
=
handleResponse
(
request
,
site
.
getCharset
(),
httpResponse
,
task
);
Page
page
=
handleResponse
(
request
,
site
.
getCharset
(),
httpResponse
,
task
);
onSuccess
(
request
);
onSuccess
(
request
);
...
...
webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientRequestContext.java
0 → 100644
View file @
1cfbd13a
package
us
.
codecraft
.
webmagic
.
downloader
;
import
org.apache.http.client.methods.HttpUriRequest
;
import
org.apache.http.client.protocol.HttpClientContext
;
/**
* @author code4crafter@gmail.com
* Date: 17/4/8
* Time: 19:43
*/
public
class
HttpClientRequestContext
{
private
HttpUriRequest
httpUriRequest
;
private
HttpClientContext
httpClientContext
;
public
HttpUriRequest
getHttpUriRequest
()
{
return
httpUriRequest
;
}
public
void
setHttpUriRequest
(
HttpUriRequest
httpUriRequest
)
{
this
.
httpUriRequest
=
httpUriRequest
;
}
public
HttpClientContext
getHttpClientContext
()
{
return
httpClientContext
;
}
public
void
setHttpClientContext
(
HttpClientContext
httpClientContext
)
{
this
.
httpClientContext
=
httpClientContext
;
}
}
webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpUriRequestConverter.java
View file @
1cfbd13a
package
us
.
codecraft
.
webmagic
.
downloader
;
package
us
.
codecraft
.
webmagic
.
downloader
;
import
org.apache.commons.collections.CollectionUtils
;
import
org.apache.http.Header
;
import
org.apache.http.HttpHost
;
import
org.apache.http.HttpHost
;
import
org.apache.http.auth.AuthState
;
import
org.apache.http.auth.UsernamePasswordCredentials
;
import
org.apache.http.client.CookieStore
;
import
org.apache.http.client.config.CookieSpecs
;
import
org.apache.http.client.config.CookieSpecs
;
import
org.apache.http.client.config.RequestConfig
;
import
org.apache.http.client.config.RequestConfig
;
import
org.apache.http.client.methods.HttpUriRequest
;
import
org.apache.http.client.methods.HttpUriRequest
;
import
org.apache.http.client.methods.RequestBuilder
;
import
org.apache.http.client.methods.RequestBuilder
;
import
org.apache.http.client.protocol.HttpClientContext
;
import
org.apache.http.cookie.Cookie
;
import
org.apache.http.entity.ByteArrayEntity
;
import
org.apache.http.entity.ByteArrayEntity
;
import
org.apache.http.impl.auth.BasicScheme
;
import
org.apache.http.impl.client.BasicCookieStore
;
import
us.codecraft.webmagic.Request
;
import
us.codecraft.webmagic.Request
;
import
us.codecraft.webmagic.Site
;
import
us.codecraft.webmagic.Site
;
import
us.codecraft.webmagic.proxy.Proxy
;
import
us.codecraft.webmagic.proxy.Proxy
;
...
@@ -20,7 +29,29 @@ import java.util.Map;
...
@@ -20,7 +29,29 @@ import java.util.Map;
*/
*/
public
class
HttpUriRequestConverter
{
public
class
HttpUriRequestConverter
{
public
HttpUriRequest
convert
(
Request
request
,
Site
site
,
Proxy
proxy
)
{
public
HttpClientRequestContext
convert
(
Request
request
,
Site
site
,
Proxy
proxy
)
{
HttpClientRequestContext
httpClientRequestContext
=
new
HttpClientRequestContext
();
httpClientRequestContext
.
setHttpUriRequest
(
convertHttpUriRequest
(
request
,
site
,
proxy
));
httpClientRequestContext
.
setHttpClientContext
(
convertHttpClientContext
(
request
,
site
,
proxy
));
return
httpClientRequestContext
;
}
private
HttpClientContext
convertHttpClientContext
(
Request
request
,
Site
site
,
Proxy
proxy
)
{
HttpClientContext
httpContext
=
new
HttpClientContext
();
AuthState
authState
=
new
AuthState
();
authState
.
update
(
new
BasicScheme
(),
new
UsernamePasswordCredentials
(
proxy
.
getUsername
(),
proxy
.
getPassword
()));
httpContext
.
setAttribute
(
HttpClientContext
.
PROXY_AUTH_STATE
,
authState
);
if
(
request
.
getCookies
()
!=
null
&&
CollectionUtils
.
isNotEmpty
(
request
.
getCookies
()))
{
CookieStore
cookieStore
=
new
BasicCookieStore
();
for
(
Cookie
c
:
request
.
getCookies
())
{
cookieStore
.
addCookie
(
c
);
}
httpContext
.
setCookieStore
(
cookieStore
);
}
return
httpContext
;
}
private
HttpUriRequest
convertHttpUriRequest
(
Request
request
,
Site
site
,
Proxy
proxy
)
{
RequestBuilder
requestBuilder
=
selectRequestMethod
(
request
).
setUri
(
request
.
getUrl
());
RequestBuilder
requestBuilder
=
selectRequestMethod
(
request
).
setUri
(
request
.
getUrl
());
if
(
site
.
getHeaders
()
!=
null
)
{
if
(
site
.
getHeaders
()
!=
null
)
{
for
(
Map
.
Entry
<
String
,
String
>
headerEntry
:
site
.
getHeaders
().
entrySet
())
{
for
(
Map
.
Entry
<
String
,
String
>
headerEntry
:
site
.
getHeaders
().
entrySet
())
{
...
@@ -40,7 +71,13 @@ public class HttpUriRequestConverter {
...
@@ -40,7 +71,13 @@ public class HttpUriRequestConverter {
requestConfigBuilder
.
setProxy
(
new
HttpHost
(
proxy
.
getHost
(),
proxy
.
getPort
()));
requestConfigBuilder
.
setProxy
(
new
HttpHost
(
proxy
.
getHost
(),
proxy
.
getPort
()));
}
}
requestBuilder
.
setConfig
(
requestConfigBuilder
.
build
());
requestBuilder
.
setConfig
(
requestConfigBuilder
.
build
());
return
requestBuilder
.
build
();
HttpUriRequest
httpUriRequest
=
requestBuilder
.
build
();
if
(
request
.
getHeaders
()
!=
null
&&
CollectionUtils
.
isNotEmpty
(
request
.
getHeaders
()))
{
for
(
Header
h
:
request
.
getHeaders
())
{
httpUriRequest
.
setHeader
(
h
);
}
}
return
httpUriRequest
;
}
}
private
RequestBuilder
selectRequestMethod
(
Request
request
)
{
private
RequestBuilder
selectRequestMethod
(
Request
request
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment