Commit b1cba78b authored by yihua.huang's avatar yihua.huang

xsoup test

parent e87489d5
...@@ -17,6 +17,11 @@ ...@@ -17,6 +17,11 @@
<artifactId>webmagic-core</artifactId> <artifactId>webmagic-core</artifactId>
<version>${project.version}</version> <version>${project.version}</version>
</dependency> </dependency>
<dependency>
<groupId>us.codecraft</groupId>
<artifactId>xsoup</artifactId>
<version>0.0.1-SNAPSHOT</version>
</dependency>
<dependency> <dependency>
<groupId>net.sf.saxon</groupId> <groupId>net.sf.saxon</groupId>
<artifactId>Saxon-HE</artifactId> <artifactId>Saxon-HE</artifactId>
......
package us.codecraft.webmagic.selector; package us.codecraft.webmagic.selector;
import org.htmlcleaner.HtmlCleaner;
import org.htmlcleaner.TagNode;
import org.htmlcleaner.XPatherException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.junit.Assert; import org.junit.Assert;
import org.junit.Ignore; import org.junit.Ignore;
import org.junit.Test; import org.junit.Test;
import us.codecraft.xsoup.XPathEvaluator;
import us.codecraft.xsoup.Xsoup;
/** /**
* @author code4crafter@gmail.com <br> Date: 13-4-21 Time: 上午10:06 * @author code4crafter@gmail.com <br> Date: 13-4-21 Time: 上午10:06
...@@ -1353,6 +1360,7 @@ public class XpathSelectorTest { ...@@ -1353,6 +1360,7 @@ public class XpathSelectorTest {
Html html1 = new Html(html); Html html1 = new Html(html);
Assert.assertEquals("再次吐槽easyui", html1.xpath(".//*[@class='QTitle']/h1/a").toString()); Assert.assertEquals("再次吐槽easyui", html1.xpath(".//*[@class='QTitle']/h1/a").toString());
Assert.assertNotNull(html1.$("a[href]").xpath("//@href").all()); Assert.assertNotNull(html1.$("a[href]").xpath("//@href").all());
Selectors.xpath("/abc/").select("");
} }
@Test @Test
...@@ -1379,17 +1387,86 @@ public class XpathSelectorTest { ...@@ -1379,17 +1387,86 @@ public class XpathSelectorTest {
xpath2Selector.selectList(html); xpath2Selector.selectList(html);
} }
System.out.println(System.currentTimeMillis()-time); System.out.println(System.currentTimeMillis()-time);
XpathSelector xpathSelector = new XpathSelector("//a"); XpathSelector xpathSelector = new XpathSelector("//a");
time =System.currentTimeMillis(); time =System.currentTimeMillis();
for (int i = 0; i < 1000; i++) { for (int i = 0; i < 1000; i++) {
xpathSelector.selectList(html); xpathSelector.selectList(html);
} }
System.out.println(System.currentTimeMillis()-time); System.out.println(System.currentTimeMillis()-time);
time =System.currentTimeMillis(); time =System.currentTimeMillis();
for (int i = 0; i < 1000; i++) { for (int i = 0; i < 1000; i++) {
xpath2Selector.selectList(html); xpath2Selector.selectList(html);
} }
System.out.println(System.currentTimeMillis() - time);
CssSelector cssSelector = new CssSelector("a");
time =System.currentTimeMillis();
for (int i = 0; i < 1000; i++) {
cssSelector.selectList(html);
}
System.out.println("css "+(System.currentTimeMillis()-time));
}
@Ignore("take long time")
@Test
public void parserPerformanceTest() throws XPatherException {
System.out.println(html.length());
HtmlCleaner htmlCleaner = new HtmlCleaner();
TagNode tagNode = htmlCleaner.clean(html);
Document document = Jsoup.parse(html);
long time =System.currentTimeMillis();
for (int i = 0; i < 2000; i++) {
htmlCleaner.clean(html);
}
System.out.println(System.currentTimeMillis()-time);
time =System.currentTimeMillis();
for (int i = 0; i < 2000; i++) {
tagNode.evaluateXPath("//a");
}
System.out.println(System.currentTimeMillis()-time);
System.out.println("=============");
time =System.currentTimeMillis();
for (int i = 0; i < 2000; i++) {
Jsoup.parse(html);
}
System.out.println(System.currentTimeMillis()-time);
time =System.currentTimeMillis();
for (int i = 0; i < 2000; i++) {
document.select("a");
}
System.out.println(System.currentTimeMillis()-time);
System.out.println("=============");
time =System.currentTimeMillis();
for (int i = 0; i < 2000; i++) {
htmlCleaner.clean(html);
}
System.out.println(System.currentTimeMillis()-time); System.out.println(System.currentTimeMillis()-time);
time =System.currentTimeMillis();
for (int i = 0; i < 2000; i++) {
tagNode.evaluateXPath("//a");
}
System.out.println(System.currentTimeMillis()-time);
System.out.println("=============");
XPathEvaluator compile = Xsoup.compile("//a");
time =System.currentTimeMillis();
for (int i = 0; i < 2000; i++) {
compile.evaluate(document);
}
System.out.println(System.currentTimeMillis()-time);
} }
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment