Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
W
webmagic
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
沈俊林
webmagic
Commits
8b90b91e
Commit
8b90b91e
authored
Jan 21, 2016
by
yihua.huang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
complete some javadoc
parent
2b556cf0
Changes
12
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
40 additions
and
33 deletions
+40
-33
Formatter.java
...n/java/us/codecraft/forger/property/format/Formatter.java
+2
-2
Site.java
webmagic-core/src/main/java/us/codecraft/webmagic/Site.java
+11
-6
Spider.java
...agic-core/src/main/java/us/codecraft/webmagic/Spider.java
+10
-9
PageProcessor.java
...n/java/us/codecraft/webmagic/processor/PageProcessor.java
+1
-1
DuplicateRemover.java
...ecraft/webmagic/scheduler/component/DuplicateRemover.java
+2
-2
Json.java
...re/src/main/java/us/codecraft/webmagic/selector/Json.java
+1
-1
Selectable.java
.../main/java/us/codecraft/webmagic/selector/Selectable.java
+4
-4
Selectors.java
...c/main/java/us/codecraft/webmagic/selector/Selectors.java
+1
-0
UrlUtils.java
...e/src/main/java/us/codecraft/webmagic/utils/UrlUtils.java
+1
-1
Formatter.java
...ava/us/codecraft/webmagic/model/annotation/Formatter.java
+2
-2
ProcessorBenchmark.java
.../java/us/codecraft/webmagic/model/ProcessorBenchmark.java
+3
-3
XpathSelectorTest.java
...ava/us/codecraft/webmagic/selector/XpathSelectorTest.java
+2
-2
No files found.
webmagic-avalon/forger/src/main/java/us/codecraft/forger/property/format/Formatter.java
View file @
8b90b91e
...
...
@@ -22,9 +22,9 @@ public @interface Formatter {
String
[]
value
();
/**
* Specific the class of field of class of elements in collection for field. <br
/
>
* Specific the class of field of class of elements in collection for field. <br>
* It is not necessary to be set because we can detect the class by class of field,
* unless you use a collection as a field. <br
/
>
* unless you use a collection as a field. <br>
*
* @return the class of field
*/
...
...
webmagic-core/src/main/java/us/codecraft/webmagic/Site.java
View file @
8b90b91e
...
...
@@ -96,7 +96,7 @@ public class Site {
* @param domain domain
* @param name name
* @param value value
* @return
* @return
this
*/
public
Site
addCookie
(
String
domain
,
String
name
,
String
value
)
{
cookies
.
put
(
domain
,
name
,
value
);
...
...
@@ -190,6 +190,7 @@ public class Site {
* set timeout for downloader in ms
*
* @param timeOut timeOut
* @return this
*/
public
Site
setTimeOut
(
int
timeOut
)
{
this
.
timeOut
=
timeOut
;
...
...
@@ -301,12 +302,12 @@ public class Site {
}
/**
* Put an Http header for downloader. <br
/
>
* Use {@link #addCookie(String, String)} for cookie and {@link #setUserAgent(String)} for user-agent. <br
/
>
* Put an Http header for downloader. <br>
* Use {@link #addCookie(String, String)} for cookie and {@link #setUserAgent(String)} for user-agent. <br>
*
* @param key key of http header, there are some keys constant in {@link HeaderConst}
* @param value value of header
* @return
* @return
this
*/
public
Site
addHeader
(
String
key
,
String
value
)
{
headers
.
put
(
key
,
value
);
...
...
@@ -316,6 +317,7 @@ public class Site {
/**
* Set retry times when download fail, 0 by default.<br>
*
* @param retryTimes retryTimes
* @return this
*/
public
Site
setRetryTimes
(
int
retryTimes
)
{
...
...
@@ -335,6 +337,7 @@ public class Site {
/**
* Set cycleRetryTimes times when download fail, 0 by default. <br>
*
* @param cycleRetryTimes cycleRetryTimes
* @return this
*/
public
Site
setCycleRetryTimes
(
int
cycleRetryTimes
)
{
...
...
@@ -350,7 +353,7 @@ public class Site {
* set up httpProxy for this site
*
* @param httpProxy httpProxy
* @return
* @return
this
*/
public
Site
setHttpProxy
(
HttpHost
httpProxy
)
{
this
.
httpProxy
=
httpProxy
;
...
...
@@ -369,6 +372,7 @@ public class Site {
* Set retry sleep times when download fail, 1000 by default. <br>
*
* @param retrySleepTime retrySleepTime
* @return this
*/
public
Site
setRetrySleepTime
(
int
retrySleepTime
)
{
this
.
retrySleepTime
=
retrySleepTime
;
...
...
@@ -380,7 +384,7 @@ public class Site {
* Default is true, you can set it to false to disable gzip.
*
* @param useGzip useGzip
* @return
* @return
this
*/
public
Site
setUseGzip
(
boolean
useGzip
)
{
this
.
useGzip
=
useGzip
;
...
...
@@ -462,6 +466,7 @@ public class Site {
/**
* Set httpProxyPool, String[0]:ip, String[1]:port <br>
*
* @param httpProxyList httpProxyList
* @return this
*/
public
Site
setHttpProxyPool
(
List
<
String
[]>
httpProxyList
)
{
...
...
webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java
View file @
8b90b91e
...
...
@@ -466,10 +466,10 @@ public class Spider implements Runnable, Task {
}
/**
* Add urls to crawl. <br
/
>
* Add urls to crawl. <br>
*
* @param urls urls
* @return
* @return
this
*/
public
Spider
addUrl
(
String
...
urls
)
{
for
(
String
url
:
urls
)
{
...
...
@@ -483,7 +483,7 @@ public class Spider implements Runnable, Task {
* Download urls synchronizing.
*
* @param urls urls
* @return
* @return
list downloaded
*/
public
<
T
>
List
<
T
>
getAll
(
Collection
<
String
>
urls
)
{
destroyWhenExit
=
false
;
...
...
@@ -515,7 +515,7 @@ public class Spider implements Runnable, Task {
}
/**
* Add urls with information to crawl.<br
/
>
* Add urls with information to crawl.<br>
*
* @param requests requests
* @return
...
...
@@ -582,6 +582,7 @@ public class Spider implements Runnable, Task {
/**
* start with more than one threads
*
* @param executorService executorService to run the spider
* @param threadNum threadNum
* @return this
*/
...
...
@@ -599,12 +600,12 @@ public class Spider implements Runnable, Task {
}
/**
* Exit when complete. <br
/
>
* True: exit when all url of the site is downloaded. <br
/
>
* False: not exit until call stop() manually.<br
/
>
* Exit when complete. <br>
* True: exit when all url of the site is downloaded. <br>
* False: not exit until call stop() manually.<br>
*
* @param exitWhenComplete exitWhenComplete
* @return
* @return
this
*/
public
Spider
setExitWhenComplete
(
boolean
exitWhenComplete
)
{
this
.
exitWhenComplete
=
exitWhenComplete
;
...
...
@@ -680,7 +681,7 @@ public class Spider implements Runnable, Task {
* DO NOT set it unless you know what it means!
*
* @param spawnUrl spawnUrl
* @return
* @return
this
* @since 0.4.0
*/
public
Spider
setSpawnUrl
(
boolean
spawnUrl
)
{
...
...
webmagic-core/src/main/java/us/codecraft/webmagic/processor/PageProcessor.java
View file @
8b90b91e
...
...
@@ -7,7 +7,7 @@ import us.codecraft.webmagic.Site;
* Interface to be implemented to customize a crawler.<br>
* <br>
* In PageProcessor, you can customize:
* <
p/
>
* <
br
>
* start urls and other settings in {@link Site}<br>
* how the urls to fetch are detected <br>
* how the data are extracted and stored <br>
...
...
webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/component/DuplicateRemover.java
View file @
8b90b91e
...
...
@@ -15,7 +15,7 @@ public interface DuplicateRemover {
*
* @param request request
* @param task task
* @return
* @return
true if is duplicate
*/
public
boolean
isDuplicate
(
Request
request
,
Task
task
);
...
...
@@ -28,7 +28,7 @@ public interface DuplicateRemover {
/**
* Get TotalRequestsCount for monitor.
* @param task task
* @return
* @return
number of total request
*/
public
int
getTotalRequestsCount
(
Task
task
);
...
...
webmagic-core/src/main/java/us/codecraft/webmagic/selector/Json.java
View file @
8b90b91e
...
...
@@ -23,7 +23,7 @@ public class Json extends PlainText {
/**
* remove padding for JSONP
* @param padding padding
* @return
* @return
json after padding removed
*/
public
Json
removePadding
(
String
padding
)
{
String
text
=
getFirstSourceText
();
...
...
webmagic-core/src/main/java/us/codecraft/webmagic/selector/Selectable.java
View file @
8b90b91e
...
...
@@ -124,7 +124,7 @@ public interface Selectable {
* extract by JSON Path expression
*
* @param jsonPath jsonPath
* @return
* @return
result
*/
public
Selectable
jsonPath
(
String
jsonPath
);
...
...
@@ -132,7 +132,7 @@ public interface Selectable {
* extract by custom selector
*
* @param selector selector
* @return
* @return
result
*/
public
Selectable
select
(
Selector
selector
);
...
...
@@ -140,13 +140,13 @@ public interface Selectable {
* extract by custom selector
*
* @param selector selector
* @return
* @return
result
*/
public
Selectable
selectList
(
Selector
selector
);
/**
* get all nodes
* @return
* @return
result
*/
public
List
<
Selectable
>
nodes
();
}
webmagic-core/src/main/java/us/codecraft/webmagic/selector/Selectors.java
View file @
8b90b91e
...
...
@@ -35,6 +35,7 @@ public abstract class Selectors {
/**
* @Deprecated
* @see #xpath(String)
* @return new selector
*/
public
static
XpathSelector
xsoup
(
String
expr
)
{
return
new
XpathSelector
(
expr
);
...
...
webmagic-core/src/main/java/us/codecraft/webmagic/utils/UrlUtils.java
View file @
8b90b91e
...
...
@@ -22,7 +22,7 @@ public class UrlUtils {
/**
* canonicalizeUrl
* <
p/
>
* <
br
>
* Borrowed from Jsoup.
*
* @param url url
...
...
webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/Formatter.java
View file @
8b90b91e
...
...
@@ -24,9 +24,9 @@ public @interface Formatter {
String
[]
value
()
default
""
;
/**
* Specific the class of field of class of elements in collection for field. <br
/
>
* Specific the class of field of class of elements in collection for field. <br>
* It is not necessary to be set because we can detect the class by class of field,
* unless you use a collection as a field. <br
/
>
* unless you use a collection as a field. <br>
*
* @return the class of field
*/
...
...
webmagic-samples/src/test/java/us/codecraft/webmagic/model/ProcessorBenchmark.java
View file @
8b90b91e
...
...
@@ -150,7 +150,7 @@ public class ProcessorBenchmark {
"#MyResume textarea {width:170px;height:60px;font-size:9pt;}\n"
+
"</style>\n"
+
"<div class='Resume' id='MyResume'>\n"
+
"码农一枚<br
/>实用主义者<br/>抵制重复造轮子,却造了不少轮子<br/
>http://codecraft.us</div>\n"
+
"码农一枚<br
>实用主义者<br>抵制重复造轮子,却造了不少轮子<br
>http://codecraft.us</div>\n"
+
"<script type=\"text/javascript\" src=\"/js/2012/jquery.editinplace.js\"></script>\n"
+
"<script type=\"text/javascript\">\n"
+
"$(\"#MyResume\").editInPlace({\n"
+
...
...
@@ -453,7 +453,7 @@ public class ProcessorBenchmark {
"<div class=\"BlogCommentForm\">\n"
+
"\t<form id=\"form_inline_comment\" action=\"/action/blog/add_comment?blog=158200\" method=\"POST\">\n"
+
"\t <input type='hidden' id='inline_reply_id' name='reply_id' value=''/> \n"
+
" <textarea name=\"content\" style=\"width:550px;height:60px;\" onkeydown=\"if((event.metaKey || event.ctrlKey)&&event.keyCode==13){$('#form_inline_comment').submit();}\"></textarea><br
/
>\n"
+
" <textarea name=\"content\" style=\"width:550px;height:60px;\" onkeydown=\"if((event.metaKey || event.ctrlKey)&&event.keyCode==13){$('#form_inline_comment').submit();}\"></textarea><br>\n"
+
"\t <input type=\"submit\" value=\"回复\" id=\"btn_comment\" class=\"SUBMIT\"/> \n"
+
"\t <input type=\"button\" value=\"关闭\" class=\"SUBMIT\" id='btn_close_inline_reply'/> 文明上网,理性发言\n"
+
" </form>\n"
+
...
...
@@ -463,7 +463,7 @@ public class ProcessorBenchmark {
" <a name=\"comments\" id=\"postform\"></a>\n"
+
" <div class=\"BlogCommentForm\">\n"
+
" <form id=\"form_comment\" action=\"/action/blog/add_comment?blog=158200\" method=\"POST\"> \n"
+
" <textarea id='ta_post_content' name=\"content\" style=\"width:550px;height:100px;\" onkeydown=\"if((event.metaKey || event.ctrlKey)&&event.keyCode==13){$('#form_comment').submit();}\"></textarea><br
/
>\n"
+
" <textarea id='ta_post_content' name=\"content\" style=\"width:550px;height:100px;\" onkeydown=\"if((event.metaKey || event.ctrlKey)&&event.keyCode==13){$('#form_comment').submit();}\"></textarea><br>\n"
+
"\t <input type=\"submit\" value=\"发表评论\" id=\"btn_comment\" class=\"SUBMIT\" /> \n"
+
"\t <img id=\"submiting\" style=\"display:none\" src=\"/img/loading.gif\" align=\"absmiddle\"/>\n"
+
"\t <span id='cmt_tip'>文明上网,理性发言</span>\n"
+
...
...
webmagic-saxon/src/test/java/us/codecraft/webmagic/selector/XpathSelectorTest.java
View file @
8b90b91e
...
...
@@ -337,7 +337,7 @@ public class XpathSelectorTest {
+
"\t\t\t\t<textarea id='txt_answner' name='body' style='width:560px;height:160px;'></textarea>\n"
+
"\t\t\t\t<input type='submit' value=' 我要回答 ' id=\"FormSubmitButton\" class='rndbutton'/>\n"
+
"\t\t\t\t<span id='form_msg' style='display:none;'></span>\n"
+
"\t\t\t\t<br
/><br/
>\n"
+
"\t\t\t\t<br
><br
>\n"
+
"\t\t\t\t<a href=\"#answers\">回答案顶部</a> | <a href=\"#top\">回页面顶部</a>\n"
+
"\t\t\t</form>\n"
+
"\t\t\t<div class='clear'></div>\n"
...
...
@@ -822,7 +822,7 @@ public class XpathSelectorTest {
+
"\t}\n"
+
"}\n"
+
"\n"
+
"var favor_ok = \"<p class='favor_ok'>已成功添加到收藏夹<br
/><br/
> <a href='http://my.oschina.net/flashsword/favorites?type=$DAISY_OBJ_TYPE'>我的收藏夹</a> | <a href='javascript:close_favor()'>关闭</a></p>\";\n"
+
"var favor_ok = \"<p class='favor_ok'>已成功添加到收藏夹<br
><br
> <a href='http://my.oschina.net/flashsword/favorites?type=$DAISY_OBJ_TYPE'>我的收藏夹</a> | <a href='javascript:close_favor()'>关闭</a></p>\";\n"
+
"function delete_favor(obi_id, obj_type){\n"
+
"\tif(!confirm('确定取消收藏?')) return;\n"
+
"\t$.post(\"/action/favorite/cancel?type=\"+obj_type+\"&id=\"+obi_id+\"&user=190591\",function(html){\n"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment