Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
W
webmagic
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
沈俊林
webmagic
Commits
827972d8
Commit
827972d8
authored
Jul 24, 2013
by
yihua.huang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update java docs
parent
96454fd7
Changes
10
Show whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
13 additions
and
4 deletions
+13
-4
Scheduler.java
.../main/java/us/codecraft/webmagic/schedular/Scheduler.java
+1
-1
Html.java
...re/src/main/java/us/codecraft/webmagic/selector/Html.java
+1
-0
PlainText.java
...c/main/java/us/codecraft/webmagic/selector/PlainText.java
+1
-0
RegexSelector.java
...in/java/us/codecraft/webmagic/selector/RegexSelector.java
+1
-0
ReplaceSelector.java
.../java/us/codecraft/webmagic/selector/ReplaceSelector.java
+1
-0
Selectable.java
.../main/java/us/codecraft/webmagic/selector/Selectable.java
+3
-2
Selector.java
...rc/main/java/us/codecraft/webmagic/selector/Selector.java
+1
-0
SelectorFactory.java
.../java/us/codecraft/webmagic/selector/SelectorFactory.java
+1
-0
ThreadUtils.java
...rc/main/java/us/codecraft/webmagic/utils/ThreadUtils.java
+1
-0
UrlUtils.java
...e/src/main/java/us/codecraft/webmagic/utils/UrlUtils.java
+2
-1
No files found.
webmagic-core/src/main/java/us/codecraft/webmagic/schedular/Scheduler.java
View file @
827972d8
...
...
@@ -22,7 +22,7 @@ public interface Scheduler {
/**
* 返回下一个要抓取的链接
* @param task 定义的任务,以满足单Scheduler多Task的情况
* @return
* @return
下一个要抓取的链接
*/
public
Request
poll
(
Task
task
);
...
...
webmagic-core/src/main/java/us/codecraft/webmagic/selector/Html.java
View file @
827972d8
...
...
@@ -4,6 +4,7 @@ import java.util.ArrayList;
import
java.util.List
;
/**
* 可抽取的html文本。<br>
* @author code4crafter@gmail.com <br>
* Date: 13-4-21
* Time: 上午7:54
...
...
webmagic-core/src/main/java/us/codecraft/webmagic/selector/PlainText.java
View file @
827972d8
...
...
@@ -6,6 +6,7 @@ import java.util.ArrayList;
import
java.util.List
;
/**
* 可抽取的纯文本,不包括xpath和css selector实现。<br>
* @author code4crafter@gmail.com <br>
* Date: 13-4-21
* Time: 上午7:54
...
...
webmagic-core/src/main/java/us/codecraft/webmagic/selector/RegexSelector.java
View file @
827972d8
...
...
@@ -9,6 +9,7 @@ import java.util.regex.Pattern;
import
java.util.regex.PatternSyntaxException
;
/**
* 正则表达式抽取器。<br>
* @author code4crafter@gmail.com <br>
* Date: 13-4-21
* Time: 上午7:09
...
...
webmagic-core/src/main/java/us/codecraft/webmagic/selector/ReplaceSelector.java
View file @
827972d8
...
...
@@ -6,6 +6,7 @@ import java.util.regex.Pattern;
import
java.util.regex.PatternSyntaxException
;
/**
* 对文本进行替换。<br>
* @author code4crafter@gmail.com <br>
* Date: 13-4-21
* Time: 上午7:09
...
...
webmagic-core/src/main/java/us/codecraft/webmagic/selector/Selectable.java
View file @
827972d8
...
...
@@ -3,6 +3,7 @@ package us.codecraft.webmagic.selector;
import
java.util.List
;
/**
* 可进行抽取的文本。<br>
* @author code4crafter@gmail.com <br>
* Date: 13-4-20
* Time: 下午7:51
...
...
@@ -20,8 +21,8 @@ public interface Selectable {
/**
* select list with css selector
*
* @param
* @return
* @param
selector css selector expression
* @return
new Selectable after extract
*/
public
Selectable
$
(
String
selector
);
...
...
webmagic-core/src/main/java/us/codecraft/webmagic/selector/Selector.java
View file @
827972d8
...
...
@@ -3,6 +3,7 @@ package us.codecraft.webmagic.selector;
import
java.util.List
;
/**
* 抽取器。<br>
* @author code4crafter@gmail.com <br>
* Date: 13-4-20
* Time: 下午8:02
...
...
webmagic-core/src/main/java/us/codecraft/webmagic/selector/SelectorFactory.java
View file @
827972d8
...
...
@@ -7,6 +7,7 @@ import java.util.Map;
import
java.util.concurrent.ConcurrentHashMap
;
/**
* 产生selector的工厂。<br>
* @author code4crafter@gmail.com <br>
* Date: 13-4-21
* Time: 上午7:56
...
...
webmagic-core/src/main/java/us/codecraft/webmagic/utils/ThreadUtils.java
View file @
827972d8
...
...
@@ -6,6 +6,7 @@ import java.util.concurrent.ThreadPoolExecutor;
import
java.util.concurrent.TimeUnit
;
/**
* 线程工具类。<br>
* @author code4crafer@gmail.com
* Date: 13-6-23
* Time: 下午7:11
...
...
webmagic-core/src/main/java/us/codecraft/webmagic/utils/UrlUtils.java
View file @
827972d8
...
...
@@ -6,6 +6,7 @@ import java.util.regex.Matcher;
import
java.util.regex.Pattern
;
/**
* url及html处理工具类。<br>
* @author code4crafter@gmail.com <br>
* Date: 13-4-21
* Time: 下午1:52
...
...
@@ -18,7 +19,7 @@ public class UrlUtils {
* 将url想对地址转化为绝对地址
* @param url url地址
* @param refer url地址来自哪个页面
* @return
* @return
url绝对地址
*/
public
static
String
canonicalizeUrl
(
String
url
,
String
refer
)
{
if
(
StringUtils
.
isBlank
(
url
)
||
StringUtils
.
isBlank
(
refer
))
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment