Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
W
webmagic
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
沈俊林
webmagic
Commits
486d9d27
Commit
486d9d27
authored
Nov 28, 2013
by
yihua.huang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
#45 Remove multi in ExtractBy
parent
aaa53f58
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
17 additions
and
5 deletions
+17
-5
AppStore.java
...src/main/java/us/codecraft/webmagic/example/AppStore.java
+5
-1
PageModelExtractor.java
.../java/us/codecraft/webmagic/model/PageModelExtractor.java
+6
-4
ComboExtract.java
.../us/codecraft/webmagic/model/annotation/ComboExtract.java
+2
-0
ExtractBy.java
...ava/us/codecraft/webmagic/model/annotation/ExtractBy.java
+2
-0
ExtractByUrl.java
.../us/codecraft/webmagic/model/annotation/ExtractByUrl.java
+2
-0
No files found.
webmagic-extension/src/main/java/us/codecraft/webmagic/example/AppStore.java
View file @
486d9d27
...
...
@@ -23,14 +23,18 @@ public class AppStore {
@ExtractBy
(
type
=
ExtractBy
.
Type
.
JsonPath
,
value
=
"$..userRatingCount"
)
private
int
userRatingCount
;
@ExtractBy
(
type
=
ExtractBy
.
Type
.
JsonPath
,
value
=
"$..screenshotUrls"
,
multi
=
true
)
@ExtractBy
(
type
=
ExtractBy
.
Type
.
JsonPath
,
value
=
"$..screenshotUrls"
)
private
List
<
String
>
screenshotUrls
;
@ExtractBy
(
type
=
ExtractBy
.
Type
.
JsonPath
,
value
=
"$..supportedDevices"
)
private
List
<
String
>
supportedDevices
;
public
static
void
main
(
String
[]
args
)
{
AppStore
appStore
=
OOSpider
.
create
(
Site
.
me
(),
AppStore
.
class
).<
AppStore
>
get
(
"http://itunes.apple.com/lookup?id=653350791&country=cn&entity=software"
);
System
.
out
.
println
(
appStore
.
trackName
);
System
.
out
.
println
(
appStore
.
description
);
System
.
out
.
println
(
appStore
.
userRatingCount
);
System
.
out
.
println
(
appStore
.
screenshotUrls
);
System
.
out
.
println
(
appStore
.
supportedDevices
);
}
}
webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageModelExtractor.java
View file @
486d9d27
...
...
@@ -131,7 +131,9 @@ class PageModelExtractor {
if
(
regexPattern
.
trim
().
equals
(
""
))
{
regexPattern
=
".*"
;
}
fieldExtractor
=
new
FieldExtractor
(
field
,
new
RegexSelector
(
regexPattern
),
FieldExtractor
.
Source
.
Url
,
extractByUrl
.
notNull
(),
extractByUrl
.
multi
());
fieldExtractor
=
new
FieldExtractor
(
field
,
new
RegexSelector
(
regexPattern
),
FieldExtractor
.
Source
.
Url
,
extractByUrl
.
notNull
(),
extractByUrl
.
multi
()
||
List
.
class
.
isAssignableFrom
(
field
.
getType
()));
Method
setterMethod
=
getSetterMethod
(
clazz
,
field
);
if
(
setterMethod
!=
null
)
{
fieldExtractor
.
setSetterMethod
(
setterMethod
);
...
...
@@ -157,7 +159,7 @@ class PageModelExtractor {
selector
=
new
AndSelector
(
ExtractorUtils
.
getSelectors
(
extractBies
));
}
fieldExtractor
=
new
FieldExtractor
(
field
,
selector
,
comboExtract
.
source
()
==
ComboExtract
.
Source
.
RawHtml
?
FieldExtractor
.
Source
.
RawHtml
:
FieldExtractor
.
Source
.
Html
,
comboExtract
.
notNull
(),
comboExtract
.
multi
());
comboExtract
.
notNull
(),
comboExtract
.
multi
()
||
List
.
class
.
isAssignableFrom
(
field
.
getType
())
);
Method
setterMethod
=
getSetterMethod
(
clazz
,
field
);
if
(
setterMethod
!=
null
)
{
fieldExtractor
.
setSetterMethod
(
setterMethod
);
...
...
@@ -172,7 +174,7 @@ class PageModelExtractor {
if
(
extractBy
!=
null
)
{
Selector
selector
=
ExtractorUtils
.
getSelector
(
extractBy
);
fieldExtractor
=
new
FieldExtractor
(
field
,
selector
,
extractBy
.
source
()
==
ExtractBy
.
Source
.
RawHtml
?
FieldExtractor
.
Source
.
RawHtml
:
FieldExtractor
.
Source
.
Html
,
extractBy
.
notNull
(),
extractBy
.
multi
());
extractBy
.
notNull
(),
extractBy
.
multi
()
||
List
.
class
.
isAssignableFrom
(
field
.
getType
())
);
Method
setterMethod
=
getSetterMethod
(
clazz
,
field
);
if
(
setterMethod
!=
null
)
{
fieldExtractor
.
setSetterMethod
(
setterMethod
);
...
...
@@ -359,7 +361,7 @@ class PageModelExtractor {
}
private
void
setField
(
Object
o
,
FieldExtractor
fieldExtractor
,
Object
value
)
throws
IllegalAccessException
,
InvocationTargetException
{
if
(
value
==
null
)
{
if
(
value
==
null
)
{
return
;
}
if
(
fieldExtractor
.
getSetterMethod
()
!=
null
)
{
...
...
webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/ComboExtract.java
View file @
486d9d27
...
...
@@ -75,6 +75,8 @@ public @interface ComboExtract {
* Define whether the extractor return more than one result.
* When set to 'true', the extractor return a list of string (so you should define the field as List). <br>
*
* Deprecated since 0.4.2. This option is determined automatically by the class of field.
* @deprecated since 0.4.2
* @return whether the extractor return more than one result
*/
boolean
multi
()
default
false
;
...
...
webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/ExtractBy.java
View file @
486d9d27
...
...
@@ -67,6 +67,8 @@ public @interface ExtractBy {
* Define whether the extractor return more than one result.
* When set to 'true', the extractor return a list of string (so you should define the field as List). <br>
*
* Deprecated since 0.4.2. This option is determined automatically by the class of field.
* @deprecated since 0.4.2
* @return whether the extractor return more than one result
*/
boolean
multi
()
default
false
;
...
...
webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/ExtractByUrl.java
View file @
486d9d27
...
...
@@ -33,6 +33,8 @@ public @interface ExtractByUrl {
* Define whether the extractor return more than one result.
* When set to 'true', the extractor return a list of string (so you should define the field as List). <br>
*
* Deprecated since 0.4.2. This option is determined automatically by the class of field.
* @deprecated since 0.4.2
* @return whether the extractor return more than one result
*/
boolean
multi
()
default
false
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment