Commit 65fe2c44 authored by Yihua Huang's avatar Yihua Huang Committed by GitHub

Merge pull request #407 from jsbd/master

为PhantomJSDownloader添加新的构造函数,支持phantomjs自定义命令
parents fdf39eb9 ebc61363
...@@ -20,13 +20,32 @@ import java.io.*; ...@@ -20,13 +20,32 @@ import java.io.*;
public class PhantomJSDownloader extends AbstractDownloader { public class PhantomJSDownloader extends AbstractDownloader {
private static Logger logger = LoggerFactory.getLogger(PhantomJSDownloader.class); private static Logger logger = LoggerFactory.getLogger(PhantomJSDownloader.class);
private static String phantomJSPath; private static String crawlJsPath;
private static String phantomJsCommand = "phantomjs"; // default
private int retryNum; private int retryNum;
private int threadNum; private int threadNum;
public PhantomJSDownloader() { public PhantomJSDownloader() {
PhantomJSDownloader.phantomJSPath = new File(this.getClass().getResource("/").getPath()).getPath() + System.getProperty("file.separator") + "crawl.js "; this.initPhantomjsCrawlPath();
}
/**
* 添加新的构造函数,支持phantomjs自定义命令
*
* example:
* phantomjs.exe 支持windows环境
* phantomjs --ignore-ssl-errors=yes 忽略抓取地址是https时的一些错误
* /usr/local/bin/phantomjs 命令的绝对路径,避免因系统环境变量引起的IOException
*
* @param phantomJsCommand
*/
public PhantomJSDownloader(String phantomJsCommand) {
this.initPhantomjsCrawlPath();
PhantomJSDownloader.phantomJsCommand = phantomJsCommand;
}
private void initPhantomjsCrawlPath() {
PhantomJSDownloader.crawlJsPath = new File(this.getClass().getResource("/").getPath()).getPath() + System.getProperty("file.separator") + "crawl.js ";
} }
@Override @Override
...@@ -67,7 +86,7 @@ public class PhantomJSDownloader extends AbstractDownloader { ...@@ -67,7 +86,7 @@ public class PhantomJSDownloader extends AbstractDownloader {
try { try {
String url = request.getUrl(); String url = request.getUrl();
Runtime runtime = Runtime.getRuntime(); Runtime runtime = Runtime.getRuntime();
Process process = runtime.exec("phantomjs " + phantomJSPath + url); Process process = runtime.exec(phantomJsCommand + " " + crawlJsPath + url);
InputStream is = process.getInputStream(); InputStream is = process.getInputStream();
BufferedReader br = new BufferedReader(new InputStreamReader(is)); BufferedReader br = new BufferedReader(new InputStreamReader(is));
StringBuffer stringBuffer = new StringBuffer(); StringBuffer stringBuffer = new StringBuffer();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment