Commit fe95a684 authored by yihua.huang's avatar yihua.huang

Request再次重构:去掉params,仅保留HttpRequestBody

parents 74110e6e 395396c6
package us.codecraft.webmagic;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.Header;
import us.codecraft.webmagic.selector.Html;
import us.codecraft.webmagic.selector.Json;
import us.codecraft.webmagic.selector.Selectable;
import us.codecraft.webmagic.utils.UrlUtils;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
/**
......@@ -46,7 +49,7 @@ public class Page {
private boolean needCycleRetry;
private List<Request> targetRequests = new ArrayList<Request>();
public Page() {
}
......@@ -232,6 +235,11 @@ public class Page {
", statusCode=" + statusCode +
", needCycleRetry=" + needCycleRetry +
", targetRequests=" + targetRequests +
", headers=" + headers+
'}';
}
}
package us.codecraft.webmagic;
import org.apache.http.Header;
import org.apache.http.cookie.Cookie;
import us.codecraft.webmagic.model.HttpRequestBody;
import us.codecraft.webmagic.utils.Experimental;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
......@@ -23,14 +28,19 @@ public class Request implements Serializable {
private String method;
private HttpRequestBody requestBody;
/**
* Store additional information in extras.
*/
private Map<String, Object> extras;
/**
* POST/GET param set
* */
private Map<String,String> params=new HashMap<String, String>();
* cookies for current url, if not set use Site's cookies
*/
private List<Cookie> cookies=new ArrayList<Cookie>();
private List<Header> headers=new ArrayList<Header>();
/**
* Priority of the request.<br>
......@@ -109,57 +119,38 @@ public class Request implements Serializable {
this.method = method;
}
public Map<String, String> getParams() {
return params;
}
/**
* set params for request
* <br>
* DO NOT set this for request already has params, like 'https://github.com/search?q=webmagic'
* @param params params
* */
public void setParams(Map<String, String> params) {
this.params = params;
}
/**
* set params for request
* <br>
* DO NOT set this for request already has params, like 'https://github.com/search?q=webmagic'
* @param key key
* @param value value
* */
public void putParams(String key,String value) {
params.put(key,value);
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
Request request = (Request) o;
if (url != null ? !url.equals(request.url) : request.url != null) return false;
if (method != null ? !method.equals(request.method) : request.method != null) return false;
return params != null ? params.equals(request.params) : request.params == null;
}
@Override
public int hashCode() {
int result = url != null ? url.hashCode() : 0;
result = 31 * result + (method != null ? method.hashCode() : 0);
result = 31 * result + (params != null ? params.hashCode() : 0);
result = 31 * result + (headers != null ? headers.hashCode() : 0);
result = 31 * result + (cookies != null ? cookies.hashCode() : 0);
return result;
}
public List<Cookie> getCookies() {
return cookies;
}
public List<Header> getHeaders() {
return headers;
}
public HttpRequestBody getRequestBody() {
return requestBody;
}
@Override
public String toString() {
return "Request{" +
"url='" + url + '\'' +
", method='" + method + '\'' +
", extras=" + extras +
", params=" + params +
", priority=" + priority +
", headers=" + headers +
", cookies="+ cookies+
'}';
}
}
package us.codecraft.webmagic.downloader;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.io.IOUtils;
import org.apache.http.Header;
import org.apache.http.HttpHost;
import org.apache.http.HttpResponse;
import org.apache.http.annotation.ThreadSafe;
import org.apache.http.auth.AuthState;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.CookieStore;
import org.apache.http.client.config.CookieSpecs;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.client.methods.RequestBuilder;
import org.apache.http.client.protocol.HttpClientContext;
import org.apache.http.cookie.Cookie;
import org.apache.http.impl.auth.BasicScheme;
import org.apache.http.impl.client.BasicCookieStore;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.protocol.BasicHttpContext;
import org.apache.http.protocol.HttpContext;
import org.apache.http.util.EntityUtils;
......@@ -24,11 +35,11 @@ import us.codecraft.webmagic.proxy.ProxyProvider;
import us.codecraft.webmagic.selector.PlainText;
import us.codecraft.webmagic.utils.CharsetUtils;
import us.codecraft.webmagic.utils.HttpClientUtils;
import us.codecraft.webmagic.utils.HttpConstant;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.Map;
import java.util.*;
/**
......@@ -88,7 +99,7 @@ public class HttpClientDownloader extends AbstractDownloader {
int statusCode = 0;
Site site = task.getSite();
Proxy proxy = null;
HttpContext httpContext = new BasicHttpContext();
HttpClientContext httpContext = new HttpClientContext();
if (proxyProvider != null) {
proxy = proxyProvider.getProxy(task);
AuthState authState = new AuthState();
......@@ -97,6 +108,18 @@ public class HttpClientDownloader extends AbstractDownloader {
}
CloseableHttpClient httpClient = getHttpClient(site);
HttpUriRequest httpUriRequest = httpUriRequestConverter.convert(request, site, proxy);
if (request.getCookies() != null && CollectionUtils.isNotEmpty(request.getCookies())) {
CookieStore cookieStore = new BasicCookieStore();
for (Cookie c : request.getCookies()) {
cookieStore.addCookie(c);
}
httpContext.setCookieStore(cookieStore);
}
if (request.getHeaders() != null && CollectionUtils.isNotEmpty(request.getHeaders())) {
for (Header h : request.getHeaders()) {
httpUriRequest.setHeader(h);
}
}
try {
httpResponse = httpClient.execute(httpUriRequest, httpContext);
statusCode = httpResponse.getStatusLine().getStatusCode();
......
package us.codecraft.webmagic.downloader;
import org.apache.http.HttpHost;
import org.apache.http.NameValuePair;
import org.apache.http.client.config.CookieSpecs;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.client.methods.RequestBuilder;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.entity.ByteArrayEntity;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.proxy.Proxy;
import us.codecraft.webmagic.utils.HttpConstant;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
/**
......@@ -53,32 +47,27 @@ public class HttpUriRequestConverter {
String method = request.getMethod();
if (method == null || method.equalsIgnoreCase(HttpConstant.Method.GET)) {
//default get
return addQueryParams(RequestBuilder.get(),request.getParams());
return RequestBuilder.get();
} else if (method.equalsIgnoreCase(HttpConstant.Method.POST)) {
return addFormParams(RequestBuilder.post(), (NameValuePair[]) request.getExtra("nameValuePair"), request.getParams());
return addFormParams(RequestBuilder.post(),request);
} else if (method.equalsIgnoreCase(HttpConstant.Method.HEAD)) {
return addQueryParams(RequestBuilder.head(),request.getParams());
return RequestBuilder.head();
} else if (method.equalsIgnoreCase(HttpConstant.Method.PUT)) {
return addFormParams(RequestBuilder.put(), (NameValuePair[]) request.getExtra("nameValuePair"), request.getParams());
return addFormParams(RequestBuilder.put(), request);
} else if (method.equalsIgnoreCase(HttpConstant.Method.DELETE)) {
return addQueryParams(RequestBuilder.delete(),request.getParams());
return RequestBuilder.delete();
} else if (method.equalsIgnoreCase(HttpConstant.Method.TRACE)) {
return addQueryParams(RequestBuilder.trace(),request.getParams());
return RequestBuilder.trace();
}
throw new IllegalArgumentException("Illegal HTTP Method " + method);
}
private RequestBuilder addFormParams(RequestBuilder requestBuilder, NameValuePair[] nameValuePair, Map<String, String> params) {
List<NameValuePair> allNameValuePair=new ArrayList<NameValuePair>();
if (nameValuePair != null && nameValuePair.length > 0) {
allNameValuePair= Arrays.asList(nameValuePair);
private RequestBuilder addFormParams(RequestBuilder requestBuilder, Request request) {
if (request.getRequestBody() != null) {
ByteArrayEntity entity = new ByteArrayEntity(request.getRequestBody().getBody());
entity.setContentType(request.getRequestBody().getContentType());
requestBuilder.setEntity(entity);
}
if (params != null) {
for (String key : params.keySet()) {
allNameValuePair.add(new BasicNameValuePair(key, params.get(key)));
}
}
requestBuilder.setEntity(new UrlEncodedFormEntity(allNameValuePair, Charset.forName("utf8")));
return requestBuilder;
}
......
package us.codecraft.webmagic.model;
import org.apache.http.NameValuePair;
import org.apache.http.client.utils.URLEncodedUtils;
import org.apache.http.message.BasicNameValuePair;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
/**
* @author code4crafter@gmail.com
* Date: 17/4/8
*/
public class HttpRequestBody {
public static abstract class ContentType {
public static final String JSON = "application/json";
public static final String XML = "text/xml";
public static final String FORM = "application/x-www-form-urlencoded";
public static final String MULTIPART = "multipart/form-data";
}
private final byte[] body;
private final String contentType;
private final String encoding;
public HttpRequestBody(byte[] body, String contentType, String encoding) {
this.body = body;
this.contentType = contentType;
this.encoding = encoding;
}
public String getContentType() {
return contentType;
}
public String getEncoding() {
return encoding;
}
public static HttpRequestBody json(String json, String encoding) throws UnsupportedEncodingException {
return new HttpRequestBody(json.getBytes(encoding), ContentType.JSON, encoding);
}
public static HttpRequestBody xml(String xml, String encoding) throws UnsupportedEncodingException {
return new HttpRequestBody(xml.getBytes(encoding), ContentType.XML, encoding);
}
public static HttpRequestBody custom(byte[] body, String contentType, String encoding) throws UnsupportedEncodingException {
return new HttpRequestBody(body, contentType, encoding);
}
public static HttpRequestBody form(Map<String,Object> params, String encoding) throws UnsupportedEncodingException {
List<NameValuePair> nameValuePairs = new ArrayList<NameValuePair>(params.size());
for (Map.Entry<String, Object> entry : params.entrySet()) {
nameValuePairs.add(new BasicNameValuePair(entry.getKey(), String.valueOf(entry.getValue())));
}
return new HttpRequestBody(URLEncodedUtils.format(nameValuePairs, encoding).getBytes(encoding), ContentType.FORM, encoding);
}
public byte[] getBody() {
return body;
}
}
......@@ -26,7 +26,7 @@ public abstract class CharsetUtils {
// charset
// 1、encoding in http header Content-Type
charset = UrlUtils.getCharset(contentType);
if (StringUtils.isNotBlank(contentType)) {
if (StringUtils.isNotBlank(contentType) && StringUtils.isNotBlank(charset)) {
logger.debug("Auto get charset: {}", charset);
return charset;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment