Commit fab44d3d authored by shenjunlin's avatar shenjunlin

修复一个空指针

parent 2b9ca61e
......@@ -6,7 +6,7 @@
<version>7</version>
</parent>
<groupId>us.codecraft.duiba</groupId>
<version>0.8.2-SNAPSHOT</version>
<version>0.8.3-SNAPSHOT</version>
<modelVersion>4.0.0</modelVersion>
<packaging>pom</packaging>
<properties>
......
......@@ -3,7 +3,7 @@
<parent>
<groupId>us.codecraft.duiba</groupId>
<artifactId>webmagic-parent</artifactId>
<version>0.8.2-SNAPSHOT</version>
<version>0.8.3-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
......
......@@ -3,7 +3,7 @@
<parent>
<groupId>us.codecraft.duiba</groupId>
<artifactId>webmagic-parent</artifactId>
<version>0.8.2-SNAPSHOT</version>
<version>0.8.3-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
......
package us.codecraft.webmagic.downloader;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.phantomjs.PhantomJSDriver;
import org.openqa.selenium.remote.CapabilityType;
import org.openqa.selenium.remote.DesiredCapabilities;
......@@ -9,6 +10,7 @@ import org.slf4j.LoggerFactory;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.proxy.Proxy;
import us.codecraft.webmagic.proxy.ProxyProvider;
import us.codecraft.webmagic.utils.ProxyUtils;
/**
* 获取含有代理的webDriver
......@@ -19,6 +21,7 @@ public class ProxyWebDriverFactory {
public static WebDriver getProxyDriver(ProxyProvider proxyProvider, Task task){
DesiredCapabilities desiredCapabilities = getDesiredCapabilities(proxyProvider, task);
// WebDriver webDriver = new ChromeDriver(desiredCapabilities);
WebDriver webDriver = new PhantomJSDriver(desiredCapabilities);
return webDriver;
}
......@@ -29,11 +32,16 @@ public class ProxyWebDriverFactory {
if (proxyProvider != null) {
Proxy proxy = proxyProvider.getProxy(task);
String proxyIpAndPort = proxy.getHost() + ":" + proxy.getPort();
logger.info("使用代理IP:{}", proxyIpAndPort);
boolean canUse = ProxyUtils.validateProxy(proxy);
logger.info("使用代理IP:{},是否可用{}", proxyIpAndPort, canUse);
// if (!canUse) {
// proxy = proxyProvider.getProxy(task);
// }
org.openqa.selenium.Proxy seleniumProxy = new org.openqa.selenium.Proxy();
seleniumProxy.setHttpProxy(proxyIpAndPort).setFtpProxy(proxyIpAndPort).setSslProxy(proxyIpAndPort);
cap.setCapability(CapabilityType.ForSeleniumServer.AVOIDING_PROXY, true);
cap.setCapability(CapabilityType.ForSeleniumServer.ONLY_PROXYING_SELENIUM_TRAFFIC, true);
// cap.setCapability(CapabilityType.ForSeleniumServer.AVOIDING_PROXY, true);
// cap.setCapability(CapabilityType.ForSeleniumServer.ONLY_PROXYING_SELENIUM_TRAFFIC, true);
System.setProperty("http.nonProxyHosts", "localhost");
cap.setCapability(CapabilityType.PROXY, seleniumProxy);
}
......
......@@ -185,7 +185,9 @@ public class SeleniumDownloader implements Downloader, Closeable {
@Override
public void close() throws IOException {
webDriverPool.closeAll();
if (webDriverPool != null) {
webDriverPool.closeAll();
}
}
public void setProxyProvider(ProxyProvider proxyProvider) {
......
......@@ -116,7 +116,6 @@ public class WebDriverPool {
}
}
}
}
return innerQueue.take();
}
......
......@@ -3,7 +3,7 @@
<parent>
<groupId>us.codecraft.duiba</groupId>
<artifactId>webmagic-parent</artifactId>
<version>0.8.2-SNAPSHOT</version>
<version>0.8.3-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
......
package us.codecraft.webmagic.proxy;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.HttpEntity;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Created by sunchangji on 2017/11/16.
*/
public class MailaProxyIpUtil {
private MailaProxyIpUtil(){}
private static final Logger LOGGER = LoggerFactory.getLogger(MailaProxyIpUtil.class);
private static final String PROXY_IPS_URL = "http://pvt.daxiangdaili.com/ip/?tid=557577041643746&num=1000&delay=1" +
"&sortby=time&protocol=https";
private static final String SEPARATOR = "\r\n";
private static final String SEPARATOR_IP = ":";
private static final RequestConfig REQUEST_CONFIG = RequestConfig.custom()
.setSocketTimeout(10000)
.setConnectTimeout(10000)
.setConnectionRequestTimeout(10000)
.build();
/**
* 获取代理ip对象
*
* @return
*/
public static Proxy[] getProxyIps() {
String result = sendHttpGet(PROXY_IPS_URL);
if (StringUtils.isBlank(result)) {
return new Proxy[]{};
}
String[] ips = StringUtils.split(result, SEPARATOR);
int len = ips.length;
Proxy[] proxies = new Proxy[len];
for (int i = 0; i < len; i++) {
String[] ipPort = ips[i].split(SEPARATOR_IP);
proxies[i] = new Proxy(ipPort[0], Integer.valueOf(ipPort[1]));
}
return proxies;
}
/**
* 发送Get请求
*
* @param httpUrl
* @return
*/
private static String sendHttpGet(String httpUrl) {
HttpGet httpGet = new HttpGet(httpUrl);// 创建get请求
httpGet.setConfig(REQUEST_CONFIG);
String responseContent = "";
try (CloseableHttpClient httpClient = HttpClients.createDefault(); CloseableHttpResponse response = httpClient.execute(httpGet)) {
HttpEntity entity = response.getEntity();
responseContent = EntityUtils.toString(entity, "UTF-8");
} catch (Exception e) {
LOGGER.warn("HttpClients get proxy ip list error", e);
return responseContent;
}
return responseContent;
}
}
package us.codecraft.webmagic.proxy;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.downloader.SeleniumDownloader;
import us.codecraft.webmagic.downloader.WebDriverPool;
import us.codecraft.webmagic.processor.PageProcessor;
public class MonitorCouponTest {
public static void main(String[] args) {
Proxy[] proxyIps = MailaProxyIpUtil.getProxyIps();
SeleniumDownloader downloader = new SeleniumDownloader(WebDriverPool.DriverType.Chrome);
if(null != proxyIps && proxyIps.length != 0){
downloader.setProxyProvider(SimpleProxyProvider.from(proxyIps));
}
Request request = new Request();
request.setUrl("https://uland.taobao.com/coupon/edetail?e=%2Bt3mcdqXpCcGQASttHIRqUfQPq%2BJFja9CwpxDVVf0sIHyUnB597m48JdmzX2uTIj2NbK5%2FgosGvVeV%2FC36bhBsGSOsUw4E0fRtBXy%2FgzXjkFjaZhgpTjjWuFqp8TFaHM5HfRS%2B%2BJrK5WhTajwoPu9w%3D%3D&traceId=ac1d576815220294802996764d09e9&activityId=08ced324b6f04b87a9a59dd631e857bb");
// request.setUrl("http://hz.fang.com/");
request.setMethod("get");
Spider.create(new PageProcessor() {
@Override
public void process(Page page) {
System.out.println(page.getHtml());
}
@Override
public Site getSite() {
return Site.me().setRetryTimes(1)
.setTimeOut(10000)
.setCharset("UTF-8")
.setSleepTime(2000);
}
})
.setDownloader(downloader)
.addRequest(request)
.thread(1)
.run();
}
}
......@@ -3,7 +3,7 @@
<parent>
<groupId>us.codecraft.duiba</groupId>
<artifactId>webmagic-parent</artifactId>
<version>0.8.2-SNAPSHOT</version>
<version>0.8.3-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
......
......@@ -3,7 +3,7 @@
<parent>
<groupId>us.codecraft.duiba</groupId>
<artifactId>webmagic-parent</artifactId>
<version>0.8.2-SNAPSHOT</version>
<version>0.8.3-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment