Commit fe95a684 authored by yihua.huang's avatar yihua.huang

Request再次重构:去掉params,仅保留HttpRequestBody

parents 74110e6e 395396c6
package us.codecraft.webmagic; package us.codecraft.webmagic;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.http.Header;
import us.codecraft.webmagic.selector.Html; import us.codecraft.webmagic.selector.Html;
import us.codecraft.webmagic.selector.Json; import us.codecraft.webmagic.selector.Json;
import us.codecraft.webmagic.selector.Selectable; import us.codecraft.webmagic.selector.Selectable;
import us.codecraft.webmagic.utils.UrlUtils; import us.codecraft.webmagic.utils.UrlUtils;
import java.util.ArrayList;
import java.util.List;
import java.util.Map; import java.util.Map;
/** /**
...@@ -46,7 +49,7 @@ public class Page { ...@@ -46,7 +49,7 @@ public class Page {
private boolean needCycleRetry; private boolean needCycleRetry;
private List<Request> targetRequests = new ArrayList<Request>(); private List<Request> targetRequests = new ArrayList<Request>();
public Page() { public Page() {
} }
...@@ -232,6 +235,11 @@ public class Page { ...@@ -232,6 +235,11 @@ public class Page {
", statusCode=" + statusCode + ", statusCode=" + statusCode +
", needCycleRetry=" + needCycleRetry + ", needCycleRetry=" + needCycleRetry +
", targetRequests=" + targetRequests + ", targetRequests=" + targetRequests +
", headers=" + headers+
'}'; '}';
} }
} }
package us.codecraft.webmagic; package us.codecraft.webmagic;
import org.apache.http.Header;
import org.apache.http.cookie.Cookie;
import us.codecraft.webmagic.model.HttpRequestBody;
import us.codecraft.webmagic.utils.Experimental; import us.codecraft.webmagic.utils.Experimental;
import java.io.Serializable; import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.List;
import java.util.Map; import java.util.Map;
/** /**
...@@ -23,14 +28,19 @@ public class Request implements Serializable { ...@@ -23,14 +28,19 @@ public class Request implements Serializable {
private String method; private String method;
private HttpRequestBody requestBody;
/** /**
* Store additional information in extras. * Store additional information in extras.
*/ */
private Map<String, Object> extras; private Map<String, Object> extras;
/** /**
* POST/GET param set * cookies for current url, if not set use Site's cookies
* */ */
private Map<String,String> params=new HashMap<String, String>(); private List<Cookie> cookies=new ArrayList<Cookie>();
private List<Header> headers=new ArrayList<Header>();
/** /**
* Priority of the request.<br> * Priority of the request.<br>
...@@ -109,57 +119,38 @@ public class Request implements Serializable { ...@@ -109,57 +119,38 @@ public class Request implements Serializable {
this.method = method; this.method = method;
} }
public Map<String, String> getParams() {
return params;
}
/**
* set params for request
* <br>
* DO NOT set this for request already has params, like 'https://github.com/search?q=webmagic'
* @param params params
* */
public void setParams(Map<String, String> params) {
this.params = params;
}
/**
* set params for request
* <br>
* DO NOT set this for request already has params, like 'https://github.com/search?q=webmagic'
* @param key key
* @param value value
* */
public void putParams(String key,String value) {
params.put(key,value);
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
Request request = (Request) o;
if (url != null ? !url.equals(request.url) : request.url != null) return false;
if (method != null ? !method.equals(request.method) : request.method != null) return false;
return params != null ? params.equals(request.params) : request.params == null;
}
@Override @Override
public int hashCode() { public int hashCode() {
int result = url != null ? url.hashCode() : 0; int result = url != null ? url.hashCode() : 0;
result = 31 * result + (method != null ? method.hashCode() : 0); result = 31 * result + (method != null ? method.hashCode() : 0);
result = 31 * result + (params != null ? params.hashCode() : 0); result = 31 * result + (headers != null ? headers.hashCode() : 0);
result = 31 * result + (cookies != null ? cookies.hashCode() : 0);
return result; return result;
} }
public List<Cookie> getCookies() {
return cookies;
}
public List<Header> getHeaders() {
return headers;
}
public HttpRequestBody getRequestBody() {
return requestBody;
}
@Override @Override
public String toString() { public String toString() {
return "Request{" + return "Request{" +
"url='" + url + '\'' + "url='" + url + '\'' +
", method='" + method + '\'' + ", method='" + method + '\'' +
", extras=" + extras + ", extras=" + extras +
", params=" + params +
", priority=" + priority + ", priority=" + priority +
", headers=" + headers +
", cookies="+ cookies+
'}'; '}';
} }
} }
package us.codecraft.webmagic.downloader; package us.codecraft.webmagic.downloader;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.http.Header;
import org.apache.http.HttpHost;
import org.apache.http.HttpResponse; import org.apache.http.HttpResponse;
import org.apache.http.annotation.ThreadSafe; import org.apache.http.annotation.ThreadSafe;
import org.apache.http.auth.AuthState; import org.apache.http.auth.AuthState;
import org.apache.http.auth.UsernamePasswordCredentials; import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.CookieStore;
import org.apache.http.client.config.CookieSpecs;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpUriRequest; import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.client.methods.RequestBuilder;
import org.apache.http.client.protocol.HttpClientContext; import org.apache.http.client.protocol.HttpClientContext;
import org.apache.http.cookie.Cookie;
import org.apache.http.impl.auth.BasicScheme; import org.apache.http.impl.auth.BasicScheme;
import org.apache.http.impl.client.BasicCookieStore;
import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.protocol.BasicHttpContext; import org.apache.http.protocol.BasicHttpContext;
import org.apache.http.protocol.HttpContext; import org.apache.http.protocol.HttpContext;
import org.apache.http.util.EntityUtils; import org.apache.http.util.EntityUtils;
...@@ -24,11 +35,11 @@ import us.codecraft.webmagic.proxy.ProxyProvider; ...@@ -24,11 +35,11 @@ import us.codecraft.webmagic.proxy.ProxyProvider;
import us.codecraft.webmagic.selector.PlainText; import us.codecraft.webmagic.selector.PlainText;
import us.codecraft.webmagic.utils.CharsetUtils; import us.codecraft.webmagic.utils.CharsetUtils;
import us.codecraft.webmagic.utils.HttpClientUtils; import us.codecraft.webmagic.utils.HttpClientUtils;
import us.codecraft.webmagic.utils.HttpConstant;
import java.io.IOException; import java.io.IOException;
import java.nio.charset.Charset; import java.nio.charset.Charset;
import java.util.HashMap; import java.util.*;
import java.util.Map;
/** /**
...@@ -88,7 +99,7 @@ public class HttpClientDownloader extends AbstractDownloader { ...@@ -88,7 +99,7 @@ public class HttpClientDownloader extends AbstractDownloader {
int statusCode = 0; int statusCode = 0;
Site site = task.getSite(); Site site = task.getSite();
Proxy proxy = null; Proxy proxy = null;
HttpContext httpContext = new BasicHttpContext(); HttpClientContext httpContext = new HttpClientContext();
if (proxyProvider != null) { if (proxyProvider != null) {
proxy = proxyProvider.getProxy(task); proxy = proxyProvider.getProxy(task);
AuthState authState = new AuthState(); AuthState authState = new AuthState();
...@@ -97,6 +108,18 @@ public class HttpClientDownloader extends AbstractDownloader { ...@@ -97,6 +108,18 @@ public class HttpClientDownloader extends AbstractDownloader {
} }
CloseableHttpClient httpClient = getHttpClient(site); CloseableHttpClient httpClient = getHttpClient(site);
HttpUriRequest httpUriRequest = httpUriRequestConverter.convert(request, site, proxy); HttpUriRequest httpUriRequest = httpUriRequestConverter.convert(request, site, proxy);
if (request.getCookies() != null && CollectionUtils.isNotEmpty(request.getCookies())) {
CookieStore cookieStore = new BasicCookieStore();
for (Cookie c : request.getCookies()) {
cookieStore.addCookie(c);
}
httpContext.setCookieStore(cookieStore);
}
if (request.getHeaders() != null && CollectionUtils.isNotEmpty(request.getHeaders())) {
for (Header h : request.getHeaders()) {
httpUriRequest.setHeader(h);
}
}
try { try {
httpResponse = httpClient.execute(httpUriRequest, httpContext); httpResponse = httpClient.execute(httpUriRequest, httpContext);
statusCode = httpResponse.getStatusLine().getStatusCode(); statusCode = httpResponse.getStatusLine().getStatusCode();
......
package us.codecraft.webmagic.downloader; package us.codecraft.webmagic.downloader;
import org.apache.http.HttpHost; import org.apache.http.HttpHost;
import org.apache.http.NameValuePair;
import org.apache.http.client.config.CookieSpecs; import org.apache.http.client.config.CookieSpecs;
import org.apache.http.client.config.RequestConfig; import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpUriRequest; import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.client.methods.RequestBuilder; import org.apache.http.client.methods.RequestBuilder;
import org.apache.http.message.BasicNameValuePair; import org.apache.http.entity.ByteArrayEntity;
import us.codecraft.webmagic.Request; import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Site; import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.proxy.Proxy; import us.codecraft.webmagic.proxy.Proxy;
import us.codecraft.webmagic.utils.HttpConstant; import us.codecraft.webmagic.utils.HttpConstant;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map; import java.util.Map;
/** /**
...@@ -53,32 +47,27 @@ public class HttpUriRequestConverter { ...@@ -53,32 +47,27 @@ public class HttpUriRequestConverter {
String method = request.getMethod(); String method = request.getMethod();
if (method == null || method.equalsIgnoreCase(HttpConstant.Method.GET)) { if (method == null || method.equalsIgnoreCase(HttpConstant.Method.GET)) {
//default get //default get
return addQueryParams(RequestBuilder.get(),request.getParams()); return RequestBuilder.get();
} else if (method.equalsIgnoreCase(HttpConstant.Method.POST)) { } else if (method.equalsIgnoreCase(HttpConstant.Method.POST)) {
return addFormParams(RequestBuilder.post(), (NameValuePair[]) request.getExtra("nameValuePair"), request.getParams()); return addFormParams(RequestBuilder.post(),request);
} else if (method.equalsIgnoreCase(HttpConstant.Method.HEAD)) { } else if (method.equalsIgnoreCase(HttpConstant.Method.HEAD)) {
return addQueryParams(RequestBuilder.head(),request.getParams()); return RequestBuilder.head();
} else if (method.equalsIgnoreCase(HttpConstant.Method.PUT)) { } else if (method.equalsIgnoreCase(HttpConstant.Method.PUT)) {
return addFormParams(RequestBuilder.put(), (NameValuePair[]) request.getExtra("nameValuePair"), request.getParams()); return addFormParams(RequestBuilder.put(), request);
} else if (method.equalsIgnoreCase(HttpConstant.Method.DELETE)) { } else if (method.equalsIgnoreCase(HttpConstant.Method.DELETE)) {
return addQueryParams(RequestBuilder.delete(),request.getParams()); return RequestBuilder.delete();
} else if (method.equalsIgnoreCase(HttpConstant.Method.TRACE)) { } else if (method.equalsIgnoreCase(HttpConstant.Method.TRACE)) {
return addQueryParams(RequestBuilder.trace(),request.getParams()); return RequestBuilder.trace();
} }
throw new IllegalArgumentException("Illegal HTTP Method " + method); throw new IllegalArgumentException("Illegal HTTP Method " + method);
} }
private RequestBuilder addFormParams(RequestBuilder requestBuilder, NameValuePair[] nameValuePair, Map<String, String> params) { private RequestBuilder addFormParams(RequestBuilder requestBuilder, Request request) {
List<NameValuePair> allNameValuePair=new ArrayList<NameValuePair>(); if (request.getRequestBody() != null) {
if (nameValuePair != null && nameValuePair.length > 0) { ByteArrayEntity entity = new ByteArrayEntity(request.getRequestBody().getBody());
allNameValuePair= Arrays.asList(nameValuePair); entity.setContentType(request.getRequestBody().getContentType());
requestBuilder.setEntity(entity);
} }
if (params != null) {
for (String key : params.keySet()) {
allNameValuePair.add(new BasicNameValuePair(key, params.get(key)));
}
}
requestBuilder.setEntity(new UrlEncodedFormEntity(allNameValuePair, Charset.forName("utf8")));
return requestBuilder; return requestBuilder;
} }
......
package us.codecraft.webmagic.model;
import org.apache.http.NameValuePair;
import org.apache.http.client.utils.URLEncodedUtils;
import org.apache.http.message.BasicNameValuePair;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
/**
* @author code4crafter@gmail.com
* Date: 17/4/8
*/
public class HttpRequestBody {
public static abstract class ContentType {
public static final String JSON = "application/json";
public static final String XML = "text/xml";
public static final String FORM = "application/x-www-form-urlencoded";
public static final String MULTIPART = "multipart/form-data";
}
private final byte[] body;
private final String contentType;
private final String encoding;
public HttpRequestBody(byte[] body, String contentType, String encoding) {
this.body = body;
this.contentType = contentType;
this.encoding = encoding;
}
public String getContentType() {
return contentType;
}
public String getEncoding() {
return encoding;
}
public static HttpRequestBody json(String json, String encoding) throws UnsupportedEncodingException {
return new HttpRequestBody(json.getBytes(encoding), ContentType.JSON, encoding);
}
public static HttpRequestBody xml(String xml, String encoding) throws UnsupportedEncodingException {
return new HttpRequestBody(xml.getBytes(encoding), ContentType.XML, encoding);
}
public static HttpRequestBody custom(byte[] body, String contentType, String encoding) throws UnsupportedEncodingException {
return new HttpRequestBody(body, contentType, encoding);
}
public static HttpRequestBody form(Map<String,Object> params, String encoding) throws UnsupportedEncodingException {
List<NameValuePair> nameValuePairs = new ArrayList<NameValuePair>(params.size());
for (Map.Entry<String, Object> entry : params.entrySet()) {
nameValuePairs.add(new BasicNameValuePair(entry.getKey(), String.valueOf(entry.getValue())));
}
return new HttpRequestBody(URLEncodedUtils.format(nameValuePairs, encoding).getBytes(encoding), ContentType.FORM, encoding);
}
public byte[] getBody() {
return body;
}
}
...@@ -26,7 +26,7 @@ public abstract class CharsetUtils { ...@@ -26,7 +26,7 @@ public abstract class CharsetUtils {
// charset // charset
// 1、encoding in http header Content-Type // 1、encoding in http header Content-Type
charset = UrlUtils.getCharset(contentType); charset = UrlUtils.getCharset(contentType);
if (StringUtils.isNotBlank(contentType)) { if (StringUtils.isNotBlank(contentType) && StringUtils.isNotBlank(charset)) {
logger.debug("Auto get charset: {}", charset); logger.debug("Auto get charset: {}", charset);
return charset; return charset;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment