Commit 956d5cb3 authored by yihua.huang's avatar yihua.huang

docs

parent fb0797b6
...@@ -16,6 +16,8 @@ import java.util.concurrent.ConcurrentHashMap; ...@@ -16,6 +16,8 @@ import java.util.concurrent.ConcurrentHashMap;
* 主要方法: * 主要方法:
* {@link #getUrl()} 获取页面的Url * {@link #getUrl()} 获取页面的Url
* {@link #getHtml()} 获取页面的html内容 * {@link #getHtml()} 获取页面的html内容
* {@link #putField(String, us.codecraft.webmagic.selector.Selectable)} 保存抽取的结果
* {@link #getFields()} 获取抽取的结果,在 {@link us.codecraft.webmagic.pipeline.Pipeline} 中调用
* {@link #addTargetRequests(java.util.List)} {@link #addTargetRequest(String)} 添加待抓取的链接 * {@link #addTargetRequests(java.util.List)} {@link #addTargetRequest(String)} 添加待抓取的链接
* *
* </pre> * </pre>
...@@ -33,25 +35,30 @@ public class Page { ...@@ -33,25 +35,30 @@ public class Page {
private List<Request> targetRequests = new ArrayList<Request>(); private List<Request> targetRequests = new ArrayList<Request>();
public void process() {
fields.put("title", html.x("").r(""));
}
public Page() { public Page() {
} }
/** /**
* * 获取抽取的结果,在{@link us.codecraft.webmagic.pipeline.Pipeline} 中调用
* @return fields * @return fields 抽取的结果
*/ */
public Map<String, Selectable> getFields() { public Map<String, Selectable> getFields() {
return fields; return fields;
} }
/**
* 保存抽取的结果
* @param key 结果的key
* @param field 结果的value
*/
public void putField(String key, Selectable field) { public void putField(String key, Selectable field) {
fields.put(key, field); fields.put(key, field);
} }
/**
* 获取页面的html内容
* @return html 页面的html内容
*/
public Selectable getHtml() { public Selectable getHtml() {
return html; return html;
} }
...@@ -64,6 +71,10 @@ public class Page { ...@@ -64,6 +71,10 @@ public class Page {
return targetRequests; return targetRequests;
} }
/**
* 添加待抓取的链接
* @param requests 待抓取的链接
*/
public void addTargetRequests(List<String> requests) { public void addTargetRequests(List<String> requests) {
synchronized (targetRequests) { synchronized (targetRequests) {
for (String s : requests) { for (String s : requests) {
...@@ -76,6 +87,10 @@ public class Page { ...@@ -76,6 +87,10 @@ public class Page {
} }
} }
/**
* 添加待抓取的链接
* @param requestString 待抓取的链接
*/
public void addTargetRequest(String requestString) { public void addTargetRequest(String requestString) {
if (StringUtils.isBlank(requestString) || requestString.equals("#")) { if (StringUtils.isBlank(requestString) || requestString.equals("#")) {
return; return;
...@@ -86,20 +101,36 @@ public class Page { ...@@ -86,20 +101,36 @@ public class Page {
} }
} }
/**
* 添加待抓取的页面,在需要传递附加信息时使用
* @param request 待抓取的页面
*/
public void addTargetRequest(Request request) { public void addTargetRequest(Request request) {
synchronized (targetRequests) { synchronized (targetRequests) {
targetRequests.add(request); targetRequests.add(request);
} }
} }
/**
* 获取页面的Url
* @return url 当前页面的url,可用于抽取
*/
public Selectable getUrl() { public Selectable getUrl() {
return url; return url;
} }
/**
* 设置url
* @param url
*/
public void setUrl(Selectable url) { public void setUrl(Selectable url) {
this.url = url; this.url = url;
} }
/**
* 获取抓取请求
* @return request 抓取请求
*/
public Request getRequest() { public Request getRequest() {
return request; return request;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment