Commit 362fdd06 authored by yihua.huang's avatar yihua.huang

Merge branch 'master' of github.com:code4craft/webmagic

parents af809c4d 7e8a5c7d
target/* target
*.iml *.iml
out/ out/
.idea .idea
...@@ -127,3 +127,7 @@ To write webmagic, I refered to the projects below : ...@@ -127,3 +127,7 @@ To write webmagic, I refered to the projects below :
### Mail-list: ### Mail-list:
[https://groups.google.com/forum/#!forum/webmagic-java](https://groups.google.com/forum/#!forum/webmagic-java) [https://groups.google.com/forum/#!forum/webmagic-java](https://groups.google.com/forum/#!forum/webmagic-java)
[![Bitdeli Badge](https://d2weczhvl823v0.cloudfront.net/code4craft/webmagic/trend.png)](https://bitdeli.com/free "Bitdeli Badge")
...@@ -126,7 +126,7 @@ public class FileCacheQueueScheduler implements Scheduler { ...@@ -126,7 +126,7 @@ public class FileCacheQueueScheduler implements Scheduler {
private void readCursorFile() throws IOException { private void readCursorFile() throws IOException {
BufferedReader fileCursorReader = null; BufferedReader fileCursorReader = null;
try { try {
new BufferedReader(new FileReader(getFileName(fileCursor))); fileCursorReader = new BufferedReader(new FileReader(getFileName(fileCursor)));
String line; String line;
//read the last number //read the last number
while ((line = fileCursorReader.readLine()) != null) { while ((line = fileCursorReader.readLine()) != null) {
......
...@@ -84,6 +84,7 @@ public class SeleniumDownloader implements Downloader, Closeable { ...@@ -84,6 +84,7 @@ public class SeleniumDownloader implements Downloader, Closeable {
WebElement webElement = webDriver.findElement(By.xpath("/html")); WebElement webElement = webDriver.findElement(By.xpath("/html"));
String content = webElement.getAttribute("outerHTML"); String content = webElement.getAttribute("outerHTML");
Page page = new Page(); Page page = new Page();
page.setRawText(content);
page.setHtml(new Html(UrlUtils.fixAllRelativeHrefs(content, request.getUrl()))); page.setHtml(new Html(UrlUtils.fixAllRelativeHrefs(content, request.getUrl())));
page.setUrl(new PlainText(request.getUrl())); page.setUrl(new PlainText(request.getUrl()));
page.setRequest(request); page.setRequest(request);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment