Commit 956d5cb3 authored by yihua.huang's avatar yihua.huang

docs

parent fb0797b6
......@@ -16,6 +16,8 @@ import java.util.concurrent.ConcurrentHashMap;
* 主要方法:
* {@link #getUrl()} 获取页面的Url
* {@link #getHtml()} 获取页面的html内容
* {@link #putField(String, us.codecraft.webmagic.selector.Selectable)} 保存抽取的结果
* {@link #getFields()} 获取抽取的结果,在 {@link us.codecraft.webmagic.pipeline.Pipeline} 中调用
* {@link #addTargetRequests(java.util.List)} {@link #addTargetRequest(String)} 添加待抓取的链接
*
* </pre>
......@@ -33,25 +35,30 @@ public class Page {
private List<Request> targetRequests = new ArrayList<Request>();
public void process() {
fields.put("title", html.x("").r(""));
}
public Page() {
}
/**
*
* @return fields
* 获取抽取的结果,在{@link us.codecraft.webmagic.pipeline.Pipeline} 中调用
* @return fields 抽取的结果
*/
public Map<String, Selectable> getFields() {
return fields;
}
/**
* 保存抽取的结果
* @param key 结果的key
* @param field 结果的value
*/
public void putField(String key, Selectable field) {
fields.put(key, field);
}
/**
* 获取页面的html内容
* @return html 页面的html内容
*/
public Selectable getHtml() {
return html;
}
......@@ -64,6 +71,10 @@ public class Page {
return targetRequests;
}
/**
* 添加待抓取的链接
* @param requests 待抓取的链接
*/
public void addTargetRequests(List<String> requests) {
synchronized (targetRequests) {
for (String s : requests) {
......@@ -76,6 +87,10 @@ public class Page {
}
}
/**
* 添加待抓取的链接
* @param requestString 待抓取的链接
*/
public void addTargetRequest(String requestString) {
if (StringUtils.isBlank(requestString) || requestString.equals("#")) {
return;
......@@ -86,20 +101,36 @@ public class Page {
}
}
/**
* 添加待抓取的页面,在需要传递附加信息时使用
* @param request 待抓取的页面
*/
public void addTargetRequest(Request request) {
synchronized (targetRequests) {
targetRequests.add(request);
}
}
/**
* 获取页面的Url
* @return url 当前页面的url,可用于抽取
*/
public Selectable getUrl() {
return url;
}
/**
* 设置url
* @param url
*/
public void setUrl(Selectable url) {
this.url = url;
}
/**
* 获取抓取请求
* @return request 抓取请求
*/
public Request getRequest() {
return request;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment