Commit 362fdd06 authored by yihua.huang's avatar yihua.huang

Merge branch 'master' of github.com:code4craft/webmagic

parents af809c4d 7e8a5c7d
target/*
target
*.iml
out/
.idea
......@@ -127,3 +127,7 @@ To write webmagic, I refered to the projects below :
### Mail-list:
[https://groups.google.com/forum/#!forum/webmagic-java](https://groups.google.com/forum/#!forum/webmagic-java)
[![Bitdeli Badge](https://d2weczhvl823v0.cloudfront.net/code4craft/webmagic/trend.png)](https://bitdeli.com/free "Bitdeli Badge")
......@@ -126,7 +126,7 @@ public class FileCacheQueueScheduler implements Scheduler {
private void readCursorFile() throws IOException {
BufferedReader fileCursorReader = null;
try {
new BufferedReader(new FileReader(getFileName(fileCursor)));
fileCursorReader = new BufferedReader(new FileReader(getFileName(fileCursor)));
String line;
//read the last number
while ((line = fileCursorReader.readLine()) != null) {
......
......@@ -84,6 +84,7 @@ public class SeleniumDownloader implements Downloader, Closeable {
WebElement webElement = webDriver.findElement(By.xpath("/html"));
String content = webElement.getAttribute("outerHTML");
Page page = new Page();
page.setRawText(content);
page.setHtml(new Html(UrlUtils.fixAllRelativeHrefs(content, request.getUrl())));
page.setUrl(new PlainText(request.getUrl()));
page.setRequest(request);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment