Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
W
webmagic
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
沈俊林
webmagic
Commits
c51ac601
Commit
c51ac601
authored
Mar 20, 2017
by
yihua.huang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
remove Site.addStartRequest() etc. #494
parent
68050fc8
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
0 additions
and
72 deletions
+0
-72
Site.java
webmagic-core/src/main/java/us/codecraft/webmagic/Site.java
+0
-71
Spider.java
...agic-core/src/main/java/us/codecraft/webmagic/Spider.java
+0
-1
No files found.
webmagic-core/src/main/java/us/codecraft/webmagic/Site.java
View file @
c51ac601
...
...
@@ -3,7 +3,6 @@ package us.codecraft.webmagic;
import
org.apache.http.HttpHost
;
import
org.apache.http.auth.UsernamePasswordCredentials
;
import
us.codecraft.webmagic.proxy.ProxyProvider
;
import
us.codecraft.webmagic.utils.UrlUtils
;
import
java.util.*
;
...
...
@@ -26,11 +25,6 @@ public class Site {
private
String
charset
;
/**
* startUrls is the urls the crawler to start with.
*/
private
List
<
Request
>
startRequests
=
new
ArrayList
<
Request
>();
private
int
sleepTime
=
5000
;
private
int
retryTimes
=
0
;
...
...
@@ -223,52 +217,6 @@ public class Site {
return
acceptStatCode
;
}
/**
* get start urls
*
* @return start urls
* @see #getStartRequests
* @deprecated
*/
@Deprecated
public
List
<
String
>
getStartUrls
()
{
return
UrlUtils
.
convertToUrls
(
startRequests
);
}
public
List
<
Request
>
getStartRequests
()
{
return
startRequests
;
}
/**
* Add a url to start url.<br>
* Because urls are more a Spider's property than Site, move it to {@link Spider#addUrl(String...)}}
*
* @param startUrl startUrl
* @return this
* @see Spider#addUrl(String...)
* @deprecated
*/
public
Site
addStartUrl
(
String
startUrl
)
{
return
addStartRequest
(
new
Request
(
startUrl
));
}
/**
* Add a url to start url.<br>
* Because urls are more a Spider's property than Site, move it to {@link Spider#addRequest(Request...)}}
*
* @param startRequest startRequest
* @return this
* @see Spider#addRequest(Request...)
* @deprecated
*/
public
Site
addStartRequest
(
Request
startRequest
)
{
this
.
startRequests
.
add
(
startRequest
);
if
(
domain
==
null
&&
startRequest
.
getUrl
()
!=
null
)
{
domain
=
UrlUtils
.
getDomain
(
startRequest
.
getUrl
());
}
return
this
;
}
/**
* Set the interval between the processing of two pages.<br>
* Time unit is micro seconds.<br>
...
...
@@ -348,21 +296,6 @@ public class Site {
return
this
;
}
public
HttpHost
getHttpProxy
()
{
return
httpProxy
;
}
/**
* set up httpProxy for this site
*
* @param httpProxy httpProxy
* @return this
*/
public
Site
setHttpProxy
(
HttpHost
httpProxy
)
{
this
.
httpProxy
=
httpProxy
;
return
this
;
}
public
boolean
isUseGzip
()
{
return
useGzip
;
}
...
...
@@ -430,8 +363,6 @@ public class Site {
return
false
;
if
(
domain
!=
null
?
!
domain
.
equals
(
site
.
domain
)
:
site
.
domain
!=
null
)
return
false
;
if
(
headers
!=
null
?
!
headers
.
equals
(
site
.
headers
)
:
site
.
headers
!=
null
)
return
false
;
if
(
startRequests
!=
null
?
!
startRequests
.
equals
(
site
.
startRequests
)
:
site
.
startRequests
!=
null
)
return
false
;
if
(
userAgent
!=
null
?
!
userAgent
.
equals
(
site
.
userAgent
)
:
site
.
userAgent
!=
null
)
return
false
;
return
true
;
...
...
@@ -443,7 +374,6 @@ public class Site {
result
=
31
*
result
+
(
userAgent
!=
null
?
userAgent
.
hashCode
()
:
0
);
result
=
31
*
result
+
(
defaultCookies
!=
null
?
defaultCookies
.
hashCode
()
:
0
);
result
=
31
*
result
+
(
charset
!=
null
?
charset
.
hashCode
()
:
0
);
result
=
31
*
result
+
(
startRequests
!=
null
?
startRequests
.
hashCode
()
:
0
);
result
=
31
*
result
+
sleepTime
;
result
=
31
*
result
+
retryTimes
;
result
=
31
*
result
+
cycleRetryTimes
;
...
...
@@ -460,7 +390,6 @@ public class Site {
", userAgent='"
+
userAgent
+
'\''
+
", cookies="
+
defaultCookies
+
", charset='"
+
charset
+
'\''
+
", startRequests="
+
startRequests
+
", sleepTime="
+
sleepTime
+
", retryTimes="
+
retryTimes
+
", cycleRetryTimes="
+
cycleRetryTimes
+
...
...
webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java
View file @
c51ac601
...
...
@@ -126,7 +126,6 @@ public class Spider implements Runnable, Task {
public
Spider
(
PageProcessor
pageProcessor
)
{
this
.
pageProcessor
=
pageProcessor
;
this
.
site
=
pageProcessor
.
getSite
();
this
.
startRequests
=
pageProcessor
.
getSite
().
getStartRequests
();
}
/**
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment