Commit e7668e01 authored by yihua.huang's avatar yihua.huang

fix SourceRegion error and add some tests on it #144

parent 4e5ba020
...@@ -43,6 +43,19 @@ public class HtmlNode extends AbstractSelectable { ...@@ -43,6 +43,19 @@ public class HtmlNode extends AbstractSelectable {
return selectElements(xpathSelector); return selectElements(xpathSelector);
} }
@Override
public Selectable selectList(Selector selector) {
if (selector instanceof BaseElementSelector) {
return selectElements((BaseElementSelector) selector);
}
return selectList(selector, getSourceTexts());
}
@Override
public Selectable select(Selector selector) {
return selectList(selector);
}
/** /**
* select elements * select elements
* *
......
package us.codecraft.webmagic.model; package us.codecraft.webmagic.model;
import junit.framework.Assert;
import org.junit.Test; import org.junit.Test;
import us.codecraft.webmagic.downloader.MockGithubDownloader;
import us.codecraft.webmagic.Site; import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Task; import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.downloader.MockGithubDownloader;
import us.codecraft.webmagic.pipeline.PageModelPipeline; import us.codecraft.webmagic.pipeline.PageModelPipeline;
import static org.assertj.core.api.Assertions.assertThat;
/** /**
* @author code4crafter@gmail.com <br> * @author code4crafter@gmail.com <br>
*/ */
...@@ -14,13 +15,13 @@ public class GithubRepoTest { ...@@ -14,13 +15,13 @@ public class GithubRepoTest {
@Test @Test
public void test() { public void test() {
OOSpider.create(Site.me().addStartUrl("https://github.com/code4craft/webmagic").setSleepTime(0) OOSpider.create(Site.me().setSleepTime(0)
, new PageModelPipeline<GithubRepo>() { , new PageModelPipeline<GithubRepo>() {
@Override @Override
public void process(GithubRepo o, Task task) { public void process(GithubRepo o, Task task) {
Assert.assertEquals(86, o.getStar()); assertThat(o.getStar()).isEqualTo(86);
Assert.assertEquals(70, o.getFork()); assertThat(o.getFork()).isEqualTo(70);
} }
}, GithubRepo.class).setDownloader(new MockGithubDownloader()).test("https://github.com/code4craft/webmagic"); }, GithubRepo.class).addUrl("https://github.com/code4craft/webmagic").setDownloader(new MockGithubDownloader()).test("https://github.com/code4craft/webmagic");
} }
} }
package us.codecraft.webmagic.model;
import us.codecraft.webmagic.model.annotation.HelpUrl;
import us.codecraft.webmagic.model.annotation.TargetUrl;
/**
* @author code4crafer@gmail.com
*/
@TargetUrl(value = "http://webmagic.io/post/\\d+",sourceRegion = "//li[@class='post']")
@HelpUrl(value = "http://webmagic.io/list/\\d+",sourceRegion = "//li[@class='list']")
public class MockModel {
}
package us.codecraft.webmagic.model; package us.codecraft.webmagic.model;
import org.apache.commons.io.IOUtils;
import org.junit.Test; import org.junit.Test;
import us.codecraft.webmagic.Page; import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Request; import us.codecraft.webmagic.Request;
...@@ -7,6 +8,8 @@ import us.codecraft.webmagic.model.annotation.ExtractBy; ...@@ -7,6 +8,8 @@ import us.codecraft.webmagic.model.annotation.ExtractBy;
import us.codecraft.webmagic.model.annotation.TargetUrl; import us.codecraft.webmagic.model.annotation.TargetUrl;
import us.codecraft.webmagic.selector.PlainText; import us.codecraft.webmagic.selector.PlainText;
import java.io.IOException;
import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThat;
/** /**
...@@ -40,6 +43,22 @@ public class ModelPageProcessorTest { ...@@ -40,6 +43,22 @@ public class ModelPageProcessorTest {
ModelPageProcessor modelPageProcessor = ModelPageProcessor.create(null, ModelFoo.class, ModelBar.class); ModelPageProcessor modelPageProcessor = ModelPageProcessor.create(null, ModelFoo.class, ModelBar.class);
modelPageProcessor.process(page); modelPageProcessor.process(page);
assertThat(page.getResultItems().isSkip()).isFalse(); assertThat(page.getResultItems().isSkip()).isFalse();
}
@Test
public void testExtractLinks() throws Exception {
ModelPageProcessor modelPageProcessor = ModelPageProcessor.create(null, MockModel.class);
Page page = getMockPage();
modelPageProcessor.process(page);
assertThat(page.getTargetRequests()).containsExactly(new Request("http://webmagic.io/list/1"), new Request("http://webmagic.io/list/2"), new Request("http://webmagic.io/post/1"), new Request("http://webmagic.io/post/2"));
}
private Page getMockPage() throws IOException {
Page page = new Page();
page.setRawText(IOUtils.toString(getClass().getClassLoader().getResourceAsStream("html/mock-webmagic.html")));
page.setRequest(new Request("http://webmagic.io/list/0"));
page.setUrl(new PlainText("http://webmagic.io/list/0"));
return page;
} }
} }
<!DOCTYPE html>
<html>
<head lang="zh">
<meta charset="UTF-8">
<title></title>
</head>
<body>
<ul>
<li class="list"><a href="http://webmagic.io/list/1"></a></li>
<li class="list"><a href="http://webmagic.io/list/2"></a></li>
<li class="list"><a href="http://webmagic.io/post/3"></a></li>
<li class="list"><a href="http://webmagic.io/post/4"></a></li>
</ul>
<ul>
<li class="post"><a href="http://webmagic.io/post/1"></a></li>
<li class="post"><a href="http://webmagic.io/post/2"></a></li>
<li class="post"><a href="http://webmagic.io/list/3"></a></li>
<li class="post"><a href="http://webmagic.io/list/4"></a></li>
</ul>
</body>
</html>
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment