Commit 68050fc8 authored by yihua.huang's avatar yihua.huang

test pass

parent 474b7c9d
...@@ -2,8 +2,7 @@ package us.codecraft.webmagic; ...@@ -2,8 +2,7 @@ package us.codecraft.webmagic;
import org.apache.http.HttpHost; import org.apache.http.HttpHost;
import org.apache.http.auth.UsernamePasswordCredentials; import org.apache.http.auth.UsernamePasswordCredentials;
import us.codecraft.webmagic.proxy.ProxyPool; import us.codecraft.webmagic.proxy.ProxyProvider;
import us.codecraft.webmagic.proxy.TimerReuseProxyPool;
import us.codecraft.webmagic.utils.UrlUtils; import us.codecraft.webmagic.utils.UrlUtils;
import java.util.*; import java.util.*;
...@@ -52,7 +51,7 @@ public class Site { ...@@ -52,7 +51,7 @@ public class Site {
private UsernamePasswordCredentials usernamePasswordCredentials; //代理用户名密码设置 private UsernamePasswordCredentials usernamePasswordCredentials; //代理用户名密码设置
private ProxyPool httpProxyPool; private ProxyProvider httpProxyPool;
private boolean useGzip = true; private boolean useGzip = true;
...@@ -399,7 +398,11 @@ public class Site { ...@@ -399,7 +398,11 @@ public class Site {
return new Task() { return new Task() {
@Override @Override
public String getUUID() { public String getUUID() {
return Site.this.getDomain(); String uuid = Site.this.getDomain();
if (uuid == null) {
uuid = UUID.randomUUID().toString();
}
return uuid;
} }
@Override @Override
...@@ -467,45 +470,4 @@ public class Site { ...@@ -467,45 +470,4 @@ public class Site {
'}'; '}';
} }
/**
* Set httpProxyPool, String[0]:ip, String[1]:port <br>
*
* @param proxyPool proxyPool
* @return this
*/
public Site setHttpProxyPool(ProxyPool proxyPool) {
this.httpProxyPool = proxyPool;
return this;
}
/**
* Set httpProxyPool, String[0]:ip, String[1]:port <br>
*
* @param httpProxyList httpProxyList
* @param isUseLastProxy isUseLastProxy
* @return this
*/
public Site setHttpProxyPool(List<String[]> httpProxyList, boolean isUseLastProxy) {
this.httpProxyPool=new TimerReuseProxyPool(httpProxyList, isUseLastProxy);
return this;
}
public Site enableHttpProxyPool() {
this.httpProxyPool=new TimerReuseProxyPool();
return this;
}
public UsernamePasswordCredentials getUsernamePasswordCredentials() {
return usernamePasswordCredentials;
}
public Site setUsernamePasswordCredentials(UsernamePasswordCredentials usernamePasswordCredentials) {
this.usernamePasswordCredentials = usernamePasswordCredentials;
return this;
}
public ProxyPool getHttpProxyPool() {
return httpProxyPool;
}
} }
...@@ -20,6 +20,7 @@ import us.codecraft.webmagic.Request; ...@@ -20,6 +20,7 @@ import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Site; import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Task; import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.proxy.Proxy; import us.codecraft.webmagic.proxy.Proxy;
import us.codecraft.webmagic.proxy.ProxyProvider;
import us.codecraft.webmagic.selector.PlainText; import us.codecraft.webmagic.selector.PlainText;
import us.codecraft.webmagic.utils.CharsetUtils; import us.codecraft.webmagic.utils.CharsetUtils;
...@@ -46,10 +47,16 @@ public class HttpClientDownloader extends AbstractDownloader { ...@@ -46,10 +47,16 @@ public class HttpClientDownloader extends AbstractDownloader {
private HttpUriRequestConverter httpUriRequestConverter = new HttpUriRequestConverter(); private HttpUriRequestConverter httpUriRequestConverter = new HttpUriRequestConverter();
private ProxyProvider proxyProvider;
public void setHttpUriRequestConverter(HttpUriRequestConverter httpUriRequestConverter) { public void setHttpUriRequestConverter(HttpUriRequestConverter httpUriRequestConverter) {
this.httpUriRequestConverter = httpUriRequestConverter; this.httpUriRequestConverter = httpUriRequestConverter;
} }
public void setProxyProvider(ProxyProvider proxyProvider) {
this.proxyProvider = proxyProvider;
}
private CloseableHttpClient getHttpClient(Site site) { private CloseableHttpClient getHttpClient(Site site) {
if (site == null) { if (site == null) {
return httpClientGenerator.getClient(null); return httpClientGenerator.getClient(null);
...@@ -79,8 +86,8 @@ public class HttpClientDownloader extends AbstractDownloader { ...@@ -79,8 +86,8 @@ public class HttpClientDownloader extends AbstractDownloader {
Site site = task.getSite(); Site site = task.getSite();
Proxy proxy = null; Proxy proxy = null;
HttpContext httpContext = new BasicHttpContext(); HttpContext httpContext = new BasicHttpContext();
if (site.getHttpProxyPool() != null) { if (proxyProvider != null) {
proxy = site.getHttpProxyPool().getProxy(task); proxy = proxyProvider.getProxy(task);
request.putExtra(Request.PROXY, proxy); request.putExtra(Request.PROXY, proxy);
AuthState authState = new AuthState(); AuthState authState = new AuthState();
authState.update(new BasicScheme(), new UsernamePasswordCredentials(proxy.getUsername(), proxy.getPassword())); authState.update(new BasicScheme(), new UsernamePasswordCredentials(proxy.getUsername(), proxy.getPassword()));
...@@ -111,9 +118,6 @@ public class HttpClientDownloader extends AbstractDownloader { ...@@ -111,9 +118,6 @@ public class HttpClientDownloader extends AbstractDownloader {
//ensure the connection is released back to pool //ensure the connection is released back to pool
EntityUtils.consumeQuietly(httpResponse.getEntity()); EntityUtils.consumeQuietly(httpResponse.getEntity());
} }
if (proxy != null) {
site.getHttpProxyPool().returnProxy(proxy, statusCode, task);
}
} }
} }
......
...@@ -43,7 +43,7 @@ public class HttpUriRequestConverter { ...@@ -43,7 +43,7 @@ public class HttpUriRequestConverter {
} }
if (proxy != null) { if (proxy != null) {
requestConfigBuilder.setProxy(new HttpHost(proxy.getProxyHost().getHost(), proxy.getProxyHost().getPort())); requestConfigBuilder.setProxy(new HttpHost(proxy.getHost(), proxy.getPort()));
} }
requestBuilder.setConfig(requestConfigBuilder.build()); requestBuilder.setConfig(requestConfigBuilder.build());
return requestBuilder.build(); return requestBuilder.build();
......
...@@ -6,42 +6,36 @@ package us.codecraft.webmagic.proxy; ...@@ -6,42 +6,36 @@ package us.codecraft.webmagic.proxy;
public class Proxy { public class Proxy {
private ProxyHost proxyHost; private String host;
private int port;
private String username; private String username;
private String password; private String password;
public Proxy(ProxyHost proxyHost, String username, String password) { public Proxy(String host, int port) {
this.proxyHost = proxyHost; this.host = host;
this.username = username; this.port = port;
this.password = password;
} }
public Proxy(ProxyHost proxyHost) { public Proxy(String host, int port, String username, String password) {
this.proxyHost = proxyHost; this.host = host;
this.port = port;
this.username = username;
this.password = password;
} }
public ProxyHost getProxyHost() { public String getHost() {
return proxyHost; return host;
} }
public void setProxyHost(ProxyHost proxyHost) { public int getPort() {
this.proxyHost = proxyHost; return port;
} }
public String getUsername() { public String getUsername() {
return username; return username;
} }
public void setUsername(String username) {
this.username = username;
}
public String getPassword() { public String getPassword() {
return password; return password;
} }
public void setPassword(String password) {
this.password = password;
}
} }
package us.codecraft.webmagic.proxy;
/**
* @author code4crafter@gmail.com
* Date: 17/3/18
* Time: 下午12:04
*/
public class ProxyHost {
private String host;
private int port;
public String getHost() {
return host;
}
public ProxyHost(String host, int port) {
this.host = host;
this.port = port;
}
public void setHost(String host) {
this.host = host;
}
public int getPort() {
return port;
}
public void setPort(int port) {
this.port = port;
}
}
...@@ -5,7 +5,7 @@ import us.codecraft.webmagic.Task; ...@@ -5,7 +5,7 @@ import us.codecraft.webmagic.Task;
/** /**
* Created by edwardsbean on 15-2-28. * Created by edwardsbean on 15-2-28.
*/ */
public interface ProxyPool { public interface ProxyProvider {
void returnProxy(Proxy proxy, boolean banned, Task task); void returnProxy(Proxy proxy, boolean banned, Task task);
......
...@@ -72,14 +72,10 @@ public class TimerReuseProxy extends Proxy implements Delayed, Serializable { ...@@ -72,14 +72,10 @@ public class TimerReuseProxy extends Proxy implements Delayed, Serializable {
private List<Integer> failedErrorType = new ArrayList<Integer>(); private List<Integer> failedErrorType = new ArrayList<Integer>();
public TimerReuseProxy(ProxyHost proxyHost, String user, String password) { public TimerReuseProxy(String host, int port, String username, String password) {
super(proxyHost, user, password); super(host, port, username, password);
} }
public TimerReuseProxy(ProxyHost proxyHost, String user, String password, int reuseTimeInterval) {
super(proxyHost, user, password);
this.reuseTimeInterval = reuseTimeInterval;
}
public int getSuccessNum() { public int getSuccessNum() {
return successNum; return successNum;
......
package us.codecraft.webmagic.utils; package us.codecraft.webmagic.utils;
import org.apache.http.HttpHost;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import us.codecraft.webmagic.proxy.ProxyHost; import us.codecraft.webmagic.proxy.Proxy;
import java.io.IOException; import java.io.IOException;
import java.net.*; import java.net.InetSocketAddress;
import java.util.Enumeration; import java.net.Socket;
import java.util.regex.Pattern;
/** /**
* Pooled Proxy Object * Pooled Proxy Object
...@@ -18,72 +16,19 @@ import java.util.regex.Pattern; ...@@ -18,72 +16,19 @@ import java.util.regex.Pattern;
*/ */
public class ProxyUtils { public class ProxyUtils {
private static InetAddress localAddr;
private static String networkInterface = "eth7";
private static final Logger logger = LoggerFactory.getLogger(ProxyUtils.class); private static final Logger logger = LoggerFactory.getLogger(ProxyUtils.class);
static {
init();
}
private static void init() {
// first way to get local IP
try {
localAddr = InetAddress.getLocalHost();
logger.info("local IP:" + localAddr.getHostAddress());
} catch (UnknownHostException e) {
logger.info("try again\n");
}
if (localAddr != null) {
return;
}
// other way to get local IP
Enumeration<InetAddress> localAddrs;
try {
// modify your network interface name
NetworkInterface ni = NetworkInterface.getByName(networkInterface);
if (ni == null) {
return;
}
localAddrs = ni.getInetAddresses();
if (localAddrs == null || !localAddrs.hasMoreElements()) {
logger.error("choose NetworkInterface\n" + getNetworkInterface());
return;
}
while (localAddrs.hasMoreElements()) {
InetAddress tmp = localAddrs.nextElement();
if (!tmp.isLoopbackAddress() && !tmp.isLinkLocalAddress() && !(tmp instanceof Inet6Address)) {
localAddr = tmp;
logger.info("local IP:" + localAddr.getHostAddress());
break;
}
}
} catch (Exception e) {
logger.error("Failure when init ProxyUtil", e);
logger.error("choose NetworkInterface\n" + getNetworkInterface());
}
}
public static HttpHost convert(ProxyHost p){ public static boolean validateProxy(Proxy p) {
return new HttpHost(p.getHost(),p.getPort());
}
public static boolean validateProxy(ProxyHost p) {
if (localAddr == null) {
logger.error("cannot get local IP");
return false;
}
boolean isReachable = false;
Socket socket = null; Socket socket = null;
try { try {
socket = new Socket(); socket = new Socket();
socket.bind(new InetSocketAddress(localAddr, 0));
InetSocketAddress endpointSocketAddr = new InetSocketAddress(p.getHost(), p.getPort()); InetSocketAddress endpointSocketAddr = new InetSocketAddress(p.getHost(), p.getPort());
socket.connect(endpointSocketAddr, 3000); socket.connect(endpointSocketAddr, 3000);
logger.debug("SUCCESS - connection established! Local: " + localAddr.getHostAddress() + " remote: " + p); return true;
isReachable = true;
} catch (IOException e) { } catch (IOException e) {
logger.warn("FAILRE - CAN not connect! Local: " + localAddr.getHostAddress() + " remote: " + p); logger.warn("FAILRE - CAN not connect! remote: " + p);
return false;
} finally { } finally {
if (socket != null) { if (socket != null) {
try { try {
...@@ -93,30 +38,7 @@ public class ProxyUtils { ...@@ -93,30 +38,7 @@ public class ProxyUtils {
} }
} }
} }
return isReachable;
}
private static String getNetworkInterface() {
String networkInterfaceName = ">>>> modify networkInterface in us.codecraft.webmagic.utils.ProxyUtils";
Enumeration<NetworkInterface> enumeration = null;
try {
enumeration = NetworkInterface.getNetworkInterfaces();
} catch (SocketException e1) {
e1.printStackTrace();
} }
while (enumeration.hasMoreElements()) {
NetworkInterface networkInterface = enumeration.nextElement();
Enumeration<InetAddress> addr = networkInterface.getInetAddresses();
while (addr.hasMoreElements()) {
String s = addr.nextElement().getHostAddress();
Pattern IPV4_PATTERN = Pattern.compile("^(25[0-5]|2[0-4]\\d|[0-1]?\\d?\\d)(\\.(25[0-5]|2[0-4]\\d|[0-1]?\\d?\\d)){3}$");
if (s != null && IPV4_PATTERN.matcher(s).matches()) {
networkInterfaceName += networkInterface.toString() + "IP:" + s + "\n\n";
}
}
}
return networkInterfaceName;
}
} }
...@@ -5,7 +5,7 @@ import com.github.dreamhead.moco.Runnable; ...@@ -5,7 +5,7 @@ import com.github.dreamhead.moco.Runnable;
import com.github.dreamhead.moco.Runner; import com.github.dreamhead.moco.Runner;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.RequestBuilder; import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients; import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils; import org.apache.http.util.EntityUtils;
...@@ -87,12 +87,12 @@ public class HttpClientDownloaderTest { ...@@ -87,12 +87,12 @@ public class HttpClientDownloaderTest {
private String getCharsetByUrl(String url) { private String getCharsetByUrl(String url) {
HttpClientDownloader downloader = new HttpClientDownloader(); HttpClientDownloader downloader = new HttpClientDownloader();
Site site = Site.me(); Site site = Site.me();
CloseableHttpClient httpClient = new HttpClientGenerator().getClient(site, null); CloseableHttpClient httpClient = new HttpClientGenerator().getClient(site);
// encoding in http header Content-Type // encoding in http header Content-Type
Request requestGBK = new Request(url); Request requestGBK = new Request(url);
CloseableHttpResponse httpResponse = null; CloseableHttpResponse httpResponse = null;
try { try {
httpResponse = httpClient.execute(downloader.getHttpUriRequest(requestGBK, site, null,null)); httpResponse = httpClient.execute(new HttpUriRequestConverter().convert(requestGBK, site, null));
} catch (IOException e) { } catch (IOException e) {
e.printStackTrace(); e.printStackTrace();
} }
...@@ -117,31 +117,32 @@ public class HttpClientDownloaderTest { ...@@ -117,31 +117,32 @@ public class HttpClientDownloaderTest {
server.delete(eq(query("q"), "webmagic")).response("delete"); server.delete(eq(query("q"), "webmagic")).response("delete");
server.request(and(by(method("HEAD")),eq(query("q"), "webmagic"))).response(header("method","head")); server.request(and(by(method("HEAD")),eq(query("q"), "webmagic"))).response(header("method","head"));
server.request(and(by(method("TRACE")),eq(query("q"), "webmagic"))).response("trace"); server.request(and(by(method("TRACE")),eq(query("q"), "webmagic"))).response("trace");
final HttpUriRequestConverter httpUriRequestConverter = new HttpUriRequestConverter();
final Site site = Site.me();
Runner.running(server, new Runnable() { Runner.running(server, new Runnable() {
@Override @Override
public void run() throws Exception { public void run() throws Exception {
HttpClientDownloader httpClientDownloader = new HttpClientDownloader();
Request request = new Request(); Request request = new Request();
request.setUrl("http://127.0.0.1:12306/search"); request.setUrl("http://127.0.0.1:12306/search");
request.putParams("q", "webmagic"); request.putParams("q", "webmagic");
request.setMethod(HttpConstant.Method.GET); request.setMethod(HttpConstant.Method.GET);
RequestBuilder requestBuilder = httpClientDownloader.selectRequestMethod(request).setUri(request.getUrl()); HttpUriRequest httpUriRequest = httpUriRequestConverter.convert(request,site,null);
assertThat(EntityUtils.toString(HttpClients.custom().build().execute(requestBuilder.build()).getEntity())).isEqualTo("get"); assertThat(EntityUtils.toString(HttpClients.custom().build().execute(httpUriRequest).getEntity())).isEqualTo("get");
request.setMethod(HttpConstant.Method.POST); request.setMethod(HttpConstant.Method.POST);
requestBuilder = httpClientDownloader.selectRequestMethod(request).setUri(request.getUrl()); httpUriRequest = httpUriRequestConverter.convert(request, site, null);
assertThat(EntityUtils.toString(HttpClients.custom().build().execute(requestBuilder.build()).getEntity())).isEqualTo("post"); assertThat(EntityUtils.toString(HttpClients.custom().build().execute(httpUriRequest).getEntity())).isEqualTo("post");
request.setMethod(HttpConstant.Method.PUT); request.setMethod(HttpConstant.Method.PUT);
requestBuilder = httpClientDownloader.selectRequestMethod(request).setUri(request.getUrl()); httpUriRequest = httpUriRequestConverter.convert(request, site, null);
assertThat(EntityUtils.toString(HttpClients.custom().build().execute(requestBuilder.build()).getEntity())).isEqualTo("put"); assertThat(EntityUtils.toString(HttpClients.custom().build().execute(httpUriRequest).getEntity())).isEqualTo("put");
request.setMethod(HttpConstant.Method.DELETE); request.setMethod(HttpConstant.Method.DELETE);
requestBuilder = httpClientDownloader.selectRequestMethod(request).setUri(request.getUrl()); httpUriRequest = httpUriRequestConverter.convert(request, site, null);
assertThat(EntityUtils.toString(HttpClients.custom().build().execute(requestBuilder.build()).getEntity())).isEqualTo("delete"); assertThat(EntityUtils.toString(HttpClients.custom().build().execute(httpUriRequest).getEntity())).isEqualTo("delete");
request.setMethod(HttpConstant.Method.HEAD); request.setMethod(HttpConstant.Method.HEAD);
requestBuilder = httpClientDownloader.selectRequestMethod(request).setUri(request.getUrl()); httpUriRequest = httpUriRequestConverter.convert(request, site, null);
assertThat(HttpClients.custom().build().execute(requestBuilder.build()).getFirstHeader("method").getValue()).isEqualTo("head"); assertThat(HttpClients.custom().build().execute(httpUriRequest).getFirstHeader("method").getValue()).isEqualTo("head");
request.setMethod(HttpConstant.Method.TRACE); request.setMethod(HttpConstant.Method.TRACE);
requestBuilder = httpClientDownloader.selectRequestMethod(request).setUri(request.getUrl()); httpUriRequest = httpUriRequestConverter.convert(request, site, null);
assertThat(EntityUtils.toString(HttpClients.custom().build().execute(requestBuilder.build()).getEntity())).isEqualTo("trace"); assertThat(EntityUtils.toString(HttpClients.custom().build().execute(httpUriRequest).getEntity())).isEqualTo("trace");
} }
}); });
} }
...@@ -156,7 +157,7 @@ public class HttpClientDownloaderTest { ...@@ -156,7 +157,7 @@ public class HttpClientDownloaderTest {
final HttpClientDownloader httpClientDownloader = new HttpClientDownloader(); final HttpClientDownloader httpClientDownloader = new HttpClientDownloader();
Request request = new Request(); Request request = new Request();
request.setUrl("http://127.0.0.1:12306/"); request.setUrl("http://127.0.0.1:12306/");
Page page = httpClientDownloader.download(request, null); Page page = httpClientDownloader.download(request, Site.me().toTask());
assertThat(page.getRawText()).isEqualTo("foo"); assertThat(page.getRawText()).isEqualTo("foo");
} }
}); });
......
...@@ -2,13 +2,10 @@ package us.codecraft.webmagic.proxy; ...@@ -2,13 +2,10 @@ package us.codecraft.webmagic.proxy;
import org.apache.http.HttpHost; import org.apache.http.HttpHost;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.Test;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import static org.assertj.core.api.Assertions.assertThat;
/** /**
* @author yxssfxwzy@sina.com May 30, 2014 * @author yxssfxwzy@sina.com May 30, 2014
* *
...@@ -27,30 +24,6 @@ public class ProxyTest { ...@@ -27,30 +24,6 @@ public class ProxyTest {
} }
} }
@Test
public void testProxy() {
TimerReuseProxyPool proxyPool = new TimerReuseProxyPool(httpProxyList,false);
proxyPool.setReuseInterval(500);
assertThat(proxyPool.getIdleNum()).isEqualTo(4);
for (int i = 0; i < 2; i++) {
List<Fetch> fetchList = new ArrayList<Fetch>();
while (proxyPool.getIdleNum() != 0) {
Proxy proxy = proxyPool.getProxy();
HttpHost httphost = proxy.getHttpHost();
// httphostList.add(httphost);
System.out.println(httphost.getHostName() + ":" + httphost.getPort());
Fetch tmp = new Fetch(httphost);
tmp.start();
fetchList.add(tmp);
}
for (Fetch fetch : fetchList) {
proxyPool.returnProxy(fetch.hp, Proxy.SUCCESS);
}
System.out.println(proxyPool.allProxyStatus());
}
}
class Fetch extends Thread { class Fetch extends Thread {
HttpHost hp; HttpHost hp;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment