Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
W
webmagic
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
沈俊林
webmagic
Commits
7edfa26f
Commit
7edfa26f
authored
Jan 21, 2016
by
yihua.huang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
complete javadoc
parent
8b90b91e
Changes
13
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
18 additions
and
16 deletions
+18
-16
Spider.java
...agic-core/src/main/java/us/codecraft/webmagic/Spider.java
+2
-2
DuplicateRemovedScheduler.java
...decraft/webmagic/scheduler/DuplicateRemovedScheduler.java
+1
-1
Html.java
...re/src/main/java/us/codecraft/webmagic/selector/Html.java
+1
-1
HtmlNode.java
...rc/main/java/us/codecraft/webmagic/selector/HtmlNode.java
+1
-1
Selectors.java
...c/main/java/us/codecraft/webmagic/selector/Selectors.java
+1
-0
CountableThreadPool.java
...ava/us/codecraft/webmagic/thread/CountableThreadPool.java
+4
-4
UrlUtils.java
...e/src/main/java/us/codecraft/webmagic/utils/UrlUtils.java
+1
-1
PatternRequestMatcher.java
.../us/codecraft/webmagic/handler/PatternRequestMatcher.java
+1
-1
RequestMatcher.java
...in/java/us/codecraft/webmagic/handler/RequestMatcher.java
+2
-2
SubPipeline.java
.../main/java/us/codecraft/webmagic/handler/SubPipeline.java
+1
-1
OOSpider.java
...n/src/main/java/us/codecraft/webmagic/model/OOSpider.java
+1
-1
SpiderMonitor.java
...ain/java/us/codecraft/webmagic/monitor/SpiderMonitor.java
+1
-1
DoubleKeyMap.java
...c/main/java/us/codecraft/webmagic/utils/DoubleKeyMap.java
+1
-0
No files found.
webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java
View file @
7edfa26f
...
@@ -518,7 +518,7 @@ public class Spider implements Runnable, Task {
...
@@ -518,7 +518,7 @@ public class Spider implements Runnable, Task {
* Add urls with information to crawl.<br>
* Add urls with information to crawl.<br>
*
*
* @param requests requests
* @param requests requests
* @return
* @return
this
*/
*/
public
Spider
addRequest
(
Request
...
requests
)
{
public
Spider
addRequest
(
Request
...
requests
)
{
for
(
Request
request
:
requests
)
{
for
(
Request
request
:
requests
)
{
...
@@ -730,7 +730,7 @@ public class Spider implements Runnable, Task {
...
@@ -730,7 +730,7 @@ public class Spider implements Runnable, Task {
}
}
/**
/**
* Set wait time when no url is polled.<br><
/
br>
* Set wait time when no url is polled.<br><br>
*
*
* @param emptySleepTime In MILLISECONDS.
* @param emptySleepTime In MILLISECONDS.
*/
*/
...
...
webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/DuplicateRemovedScheduler.java
View file @
7edfa26f
...
@@ -8,7 +8,7 @@ import us.codecraft.webmagic.scheduler.component.DuplicateRemover;
...
@@ -8,7 +8,7 @@ import us.codecraft.webmagic.scheduler.component.DuplicateRemover;
import
us.codecraft.webmagic.scheduler.component.HashSetDuplicateRemover
;
import
us.codecraft.webmagic.scheduler.component.HashSetDuplicateRemover
;
/**
/**
* Remove duplicate urls and only push urls which are not duplicate.<br><
/
br>
* Remove duplicate urls and only push urls which are not duplicate.<br><br>
*
*
* @author code4crafer@gmail.com
* @author code4crafer@gmail.com
* @since 0.5.0
* @since 0.5.0
...
...
webmagic-core/src/main/java/us/codecraft/webmagic/selector/Html.java
View file @
7edfa26f
...
@@ -69,7 +69,7 @@ public class Html extends HtmlNode {
...
@@ -69,7 +69,7 @@ public class Html extends HtmlNode {
/**
/**
* @param selector selector
* @param selector selector
* @return
* @return
result
*/
*/
public
String
selectDocument
(
Selector
selector
)
{
public
String
selectDocument
(
Selector
selector
)
{
if
(
selector
instanceof
ElementSelector
)
{
if
(
selector
instanceof
ElementSelector
)
{
...
...
webmagic-core/src/main/java/us/codecraft/webmagic/selector/HtmlNode.java
View file @
7edfa26f
...
@@ -60,7 +60,7 @@ public class HtmlNode extends AbstractSelectable {
...
@@ -60,7 +60,7 @@ public class HtmlNode extends AbstractSelectable {
* select elements
* select elements
*
*
* @param elementSelector elementSelector
* @param elementSelector elementSelector
* @return
* @return
result
*/
*/
protected
Selectable
selectElements
(
BaseElementSelector
elementSelector
)
{
protected
Selectable
selectElements
(
BaseElementSelector
elementSelector
)
{
ListIterator
<
Element
>
elementIterator
=
getElements
().
listIterator
();
ListIterator
<
Element
>
elementIterator
=
getElements
().
listIterator
();
...
...
webmagic-core/src/main/java/us/codecraft/webmagic/selector/Selectors.java
View file @
7edfa26f
...
@@ -35,6 +35,7 @@ public abstract class Selectors {
...
@@ -35,6 +35,7 @@ public abstract class Selectors {
/**
/**
* @Deprecated
* @Deprecated
* @see #xpath(String)
* @see #xpath(String)
* @param expr expr
* @return new selector
* @return new selector
*/
*/
public
static
XpathSelector
xsoup
(
String
expr
)
{
public
static
XpathSelector
xsoup
(
String
expr
)
{
...
...
webmagic-core/src/main/java/us/codecraft/webmagic/thread/CountableThreadPool.java
View file @
7edfa26f
...
@@ -7,10 +7,10 @@ import java.util.concurrent.locks.Condition;
...
@@ -7,10 +7,10 @@ import java.util.concurrent.locks.Condition;
import
java.util.concurrent.locks.ReentrantLock
;
import
java.util.concurrent.locks.ReentrantLock
;
/**
/**
* Thread pool for workers.<br><
/
br>
* Thread pool for workers.<br><br>
* Use {@link java.util.concurrent.ExecutorService} as inner implement. <br><
/
br>
* Use {@link java.util.concurrent.ExecutorService} as inner implement. <br><br>
* New feature: <br><
/
br>
* New feature: <br><br>
* 1. Block when thread pool is full to avoid poll many urls without process. <br><
/
br>
* 1. Block when thread pool is full to avoid poll many urls without process. <br><br>
* 2. Count of thread alive for monitor.
* 2. Count of thread alive for monitor.
*
*
* @author code4crafer@gmail.com
* @author code4crafer@gmail.com
...
...
webmagic-core/src/main/java/us/codecraft/webmagic/utils/UrlUtils.java
View file @
7edfa26f
...
@@ -52,7 +52,7 @@ public class UrlUtils {
...
@@ -52,7 +52,7 @@ public class UrlUtils {
/**
/**
*
*
* @param url url
* @param url url
* @return
* @return
new url
*/
*/
public
static
String
encodeIllegalCharacterInUrl
(
String
url
)
{
public
static
String
encodeIllegalCharacterInUrl
(
String
url
)
{
//TODO more charator support
//TODO more charator support
...
...
webmagic-extension/src/main/java/us/codecraft/webmagic/handler/PatternRequestMatcher.java
View file @
7edfa26f
...
@@ -9,7 +9,7 @@ import java.util.regex.Pattern;
...
@@ -9,7 +9,7 @@ import java.util.regex.Pattern;
* User: Sebastian MA
* User: Sebastian MA
* Date: April 03, 2014
* Date: April 03, 2014
* Time: 10:00
* Time: 10:00
* <p>
</p>
* <p>
* A PatternHandler is in charge of both page extraction and data processing by implementing
* A PatternHandler is in charge of both page extraction and data processing by implementing
* its two abstract methods.
* its two abstract methods.
*/
*/
...
...
webmagic-extension/src/main/java/us/codecraft/webmagic/handler/RequestMatcher.java
View file @
7edfa26f
...
@@ -9,12 +9,12 @@ import us.codecraft.webmagic.Request;
...
@@ -9,12 +9,12 @@ import us.codecraft.webmagic.Request;
public
interface
RequestMatcher
{
public
interface
RequestMatcher
{
/**
/**
* Check whether to process the page.<br><
/
br>
* Check whether to process the page.<br><br>
* Please DO NOT change page status in this method.
* Please DO NOT change page status in this method.
*
*
* @param page page
* @param page page
*
*
* @return
* @return
whether matches
*/
*/
public
boolean
match
(
Request
page
);
public
boolean
match
(
Request
page
);
...
...
webmagic-extension/src/main/java/us/codecraft/webmagic/handler/SubPipeline.java
View file @
7edfa26f
...
@@ -12,7 +12,7 @@ public interface SubPipeline extends RequestMatcher {
...
@@ -12,7 +12,7 @@ public interface SubPipeline extends RequestMatcher {
/**
/**
* process the page, extract urls to fetch, extract the data and store
* process the page, extract urls to fetch, extract the data and store
*
*
* @param
page page
* @param
resultItems resultItems
* @param task task
* @param task task
* @return whether continue to match
* @return whether continue to match
*/
*/
...
...
webmagic-extension/src/main/java/us/codecraft/webmagic/model/OOSpider.java
View file @
7edfa26f
...
@@ -25,7 +25,7 @@ import java.util.List;
...
@@ -25,7 +25,7 @@ import java.util.List;
* private String content;
* private String content;
*
*
* {@literal @}ExtractBy(value = "//div[@class='BlogTags']/a/text()", multi = true)
* {@literal @}ExtractBy(value = "//div[@class='BlogTags']/a/text()", multi = true)
* private List
<String>
tags;
* private List
<String>
tags;
* }
* }
* </pre>
* </pre>
* And start the spider by:
* And start the spider by:
...
...
webmagic-extension/src/main/java/us/codecraft/webmagic/monitor/SpiderMonitor.java
View file @
7edfa26f
...
@@ -43,7 +43,7 @@ public class SpiderMonitor {
...
@@ -43,7 +43,7 @@ public class SpiderMonitor {
* Register spider for monitor.
* Register spider for monitor.
*
*
* @param spiders spiders
* @param spiders spiders
* @return
* @return
this
*/
*/
public
synchronized
SpiderMonitor
register
(
Spider
...
spiders
)
throws
JMException
{
public
synchronized
SpiderMonitor
register
(
Spider
...
spiders
)
throws
JMException
{
for
(
Spider
spider
:
spiders
)
{
for
(
Spider
spider
:
spiders
)
{
...
...
webmagic-extension/src/main/java/us/codecraft/webmagic/utils/DoubleKeyMap.java
View file @
7edfa26f
...
@@ -30,6 +30,7 @@ public class DoubleKeyMap<K1, K2, V> extends MultiKeyMapBase {
...
@@ -30,6 +30,7 @@ public class DoubleKeyMap<K1, K2, V> extends MultiKeyMapBase {
/**
/**
* init map with protoMapClass
* init map with protoMapClass
*
*
* @param map the origin map to contains the DoubleKeyMap
* @param protoMapClass protoMapClass
* @param protoMapClass protoMapClass
*/
*/
@SuppressWarnings
(
"rawtypes"
)
@SuppressWarnings
(
"rawtypes"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment