Commit 7a64847a authored by yihua.huang's avatar yihua.huang

Bugfix: selector does not works well in element #113

parent 8d67fd03
package us.codecraft.webmagic.selector;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import java.util.ArrayList;
import java.util.List;
import java.util.ListIterator;
/**
* @author code4crafer@gmail.com
......@@ -48,9 +50,11 @@ public class HtmlNode extends AbstractSelectable {
* @return
*/
protected Selectable selectElements(BaseElementSelector elementSelector) {
ListIterator<Element> elementIterator = getElements().listIterator();
if (!elementSelector.hasAttribute()) {
List<Element> resultElements = new ArrayList<Element>();
for (Element element : getElements()) {
while (elementIterator.hasNext()) {
Element element = checkElementAndConvert(elementIterator);
List<Element> selectElements = elementSelector.selectElements(element);
resultElements.addAll(selectElements);
}
......@@ -58,7 +62,8 @@ public class HtmlNode extends AbstractSelectable {
} else {
// has attribute, consider as plaintext
List<String> resultStrings = new ArrayList<String>();
for (Element element : getElements()) {
while (elementIterator.hasNext()) {
Element element = checkElementAndConvert(elementIterator);
List<String> selectList = elementSelector.selectList(element);
resultStrings.addAll(selectList);
}
......@@ -67,6 +72,25 @@ public class HtmlNode extends AbstractSelectable {
}
}
/**
* Only document can be select
* See: https://github.com/code4craft/webmagic/issues/113
*
* @param elementIterator
* @param element
*/
private Element checkElementAndConvert(ListIterator<Element> elementIterator) {
Element element = elementIterator.next();
if (!(element instanceof Document)) {
Document root = new Document(element.ownerDocument().baseUri());
Element clone = element.clone();
root.appendChild(clone);
elementIterator.set(root);
return root;
}
return element;
}
@Override
public Selectable $(String selector) {
CssSelector cssSelector = Selectors.$(selector);
......
......@@ -28,7 +28,6 @@ public class SelectorTest {
public void testNodes() throws Exception {
Html selectable = new Html(html);
List<Selectable> links = selectable.xpath("//a").nodes();
assertThat(links.get(0).xpath("/@href").get()).isEqualTo("http://whatever.com/aaa");
assertThat(links.get(1).xpath("/@href").get()).isEqualTo("http://whatever.com/bbb");
assertThat(links.get(0).links().get()).isEqualTo("http://whatever.com/aaa");
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment