Merge pull request #414 from jsbd/master

新增构造函数，支持crawl.js路径自定义，因为当其他项目依赖此jar包时，runtime.exec()执行phantomjs命令时无使用法jar包中的crawl.js

Merge pull request #414 from jsbd/master
新增构造函数，支持crawl.js路径自定义，因为当其他项目依赖此jar包时，runtime.exec()执行phantomjs命令时无使用法jar包中的crawl.js
f29a1047 · Yihua Huang · GitHub · 93e7040f · 6d78d51f · f29a1047
Commit f29a1047 authored Jan 21, 2017 by Yihua Huang Committed by GitHub Jan 21, 2017
Hide whitespace changes
Inline Side-by-side

Showing with 48 additions and 10 deletions

PhantomJSDownloader.java ...us/codecraft/webmagic/downloader/PhantomJSDownloader.java +48 -10

No files found.
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/downloader/PhantomJSDownloader.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/downloader/PhantomJSDownloader.java
@@ -29,21 +29,59 @@ public class PhantomJSDownloader extends AbstractDownloader {
    public PhantomJSDownloader() {
        this.initPhantomjsCrawlPath();
    }
+    
    /**
-       * 添加新的构造函数，支持phantomjs自定义命令
-       * 
-       * example: 
-       *    phantomjs.exe 支持windows环境
-       *    phantomjs --ignore-ssl-errors=yes 忽略抓取地址是https时的一些错误
-       *    /usr/local/bin/phantomjs 命令的绝对路径，避免因系统环境变量引起的IOException
-       *   
-       * @param phantomJsCommand phantomJsCommand
-       */
+     * 添加新的构造函数，支持phantomjs自定义命令
+     * 
+     * example: 
+     *    phantomjs.exe 支持windows环境
+     *    phantomjs --ignore-ssl-errors=yes 忽略抓取地址是https时的一些错误
+     *    /usr/local/bin/phantomjs 命令的绝对路径，避免因系统环境变量引起的IOException
+     *   
+     * @param phantomJsCommand
+     */
    public PhantomJSDownloader(String phantomJsCommand) {
        this.initPhantomjsCrawlPath();
        PhantomJSDownloader.phantomJsCommand = phantomJsCommand;
    }
    
+    /**
+     * 新增构造函数，支持crawl.js路径自定义，因为当其他项目依赖此jar包时，runtime.exec()执行phantomjs命令时无使用法jar包中的crawl.js
+     * 
+     * crawl.js start -->>
+     * 
+     *   var system = require('system');
+     *   var url = system.args[1];
+     *   
+     *   var page = require('webpage').create();
+     *   page.settings.loadImages = false;
+     *   page.settings.resourceTimeout = 5000;
+     *   
+     *   page.open(url, function (status) {
+     *       if (status != 'success') {
+     *           console.log("HTTP request failed!");
+     *       } else {
+     *           console.log(page.content);
+     *       }
+     *   
+     *       page.close();
+     *       phantom.exit();
+     *   });
+     *   
+     * <<-- crawl.js end
+     * 具体项目时可以将以上js代码复制下来使用
+     *   
+     * example:
+     *    new PhantomJSDownloader("/your/path/phantomjs", "/your/path/crawl.js");
+     * 
+     * @param phantomJsCommand
+     * @param crawlJsPath
+     */
+    public PhantomJSDownloader(String phantomJsCommand, String crawlJsPath) {
+      PhantomJSDownloader.phantomJsCommand = phantomJsCommand;
+      PhantomJSDownloader.crawlJsPath = crawlJsPath;
+    }
+    
    private void initPhantomjsCrawlPath() {
        PhantomJSDownloader.crawlJsPath = new File(this.getClass().getResource("/").getPath()).getPath() + System.getProperty("file.separator") + "crawl.js ";
    }
@@ -86,7 +124,7 @@ public class PhantomJSDownloader extends AbstractDownloader {
        try {
            String url = request.getUrl();
            Runtime runtime = Runtime.getRuntime();
-            Process process = runtime.exec(phantomJsCommand + " " + crawlJsPath + url);
+            Process process = runtime.exec(phantomJsCommand + " " + crawlJsPath + " " + url);
            InputStream is = process.getInputStream();
            BufferedReader br = new BufferedReader(new InputStreamReader(is));
            StringBuffer stringBuffer = new StringBuffer();