Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
W
webmagic
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
沈俊林
webmagic
Commits
3a79b1b6
Commit
3a79b1b6
authored
Apr 13, 2014
by
yihua.huang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[Bugfix]formatter property does not work when field is String#100
parent
cc9d319f
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
44 additions
and
15 deletions
+44
-15
PageModelExtractor.java
.../java/us/codecraft/webmagic/model/PageModelExtractor.java
+15
-11
GithubRepo.java
.../java/us/codecraft/webmagic/model/samples/GithubRepo.java
+3
-4
StringTemplateFormatter.java
...t/webmagic/samples/formatter/StringTemplateFormatter.java
+26
-0
No files found.
webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageModelExtractor.java
View file @
3a79b1b6
...
@@ -76,9 +76,21 @@ class PageModelExtractor {
...
@@ -76,9 +76,21 @@ class PageModelExtractor {
}
}
private
void
checkFormat
(
Field
field
,
FieldExtractor
fieldExtractor
)
{
private
void
checkFormat
(
Field
field
,
FieldExtractor
fieldExtractor
)
{
//check custom formatter
Formatter
formatter
=
field
.
getAnnotation
(
Formatter
.
class
);
if
(
formatter
!=
null
&&
!
formatter
.
formatter
().
equals
(
ObjectFormatter
.
class
))
{
if
(
formatter
!=
null
)
{
if
(!
formatter
.
formatter
().
equals
(
ObjectFormatter
.
class
))
{
ObjectFormatter
objectFormatter
=
initFormatter
(
formatter
.
formatter
());
objectFormatter
.
initParam
(
formatter
.
value
());
fieldExtractor
.
setObjectFormatter
(
objectFormatter
);
return
;
}
}
}
if
(!
fieldExtractor
.
isMulti
()
&&
!
String
.
class
.
isAssignableFrom
(
field
.
getType
()))
{
if
(!
fieldExtractor
.
isMulti
()
&&
!
String
.
class
.
isAssignableFrom
(
field
.
getType
()))
{
Class
<?>
fieldClazz
=
BasicTypeFormatter
.
detectBasicClass
(
field
.
getType
());
Class
<?>
fieldClazz
=
BasicTypeFormatter
.
detectBasicClass
(
field
.
getType
());
ObjectFormatter
objectFormatter
=
getObjectFormatter
(
field
,
fieldClazz
);
ObjectFormatter
objectFormatter
=
getObjectFormatter
(
field
,
fieldClazz
,
formatter
);
if
(
objectFormatter
==
null
)
{
if
(
objectFormatter
==
null
)
{
throw
new
IllegalStateException
(
"Can't find formatter for field "
+
field
.
getName
()
+
" of type "
+
fieldClazz
);
throw
new
IllegalStateException
(
"Can't find formatter for field "
+
field
.
getName
()
+
" of type "
+
fieldClazz
);
}
else
{
}
else
{
...
@@ -88,10 +100,9 @@ class PageModelExtractor {
...
@@ -88,10 +100,9 @@ class PageModelExtractor {
if
(!
List
.
class
.
isAssignableFrom
(
field
.
getType
()))
{
if
(!
List
.
class
.
isAssignableFrom
(
field
.
getType
()))
{
throw
new
IllegalStateException
(
"Field "
+
field
.
getName
()
+
" must be list"
);
throw
new
IllegalStateException
(
"Field "
+
field
.
getName
()
+
" must be list"
);
}
}
Formatter
formatter
=
field
.
getAnnotation
(
Formatter
.
class
);
if
(
formatter
!=
null
)
{
if
(
formatter
!=
null
)
{
if
(!
formatter
.
subClazz
().
equals
(
Void
.
class
))
{
if
(!
formatter
.
subClazz
().
equals
(
Void
.
class
))
{
ObjectFormatter
objectFormatter
=
getObjectFormatter
(
field
,
formatter
.
subClazz
());
ObjectFormatter
objectFormatter
=
getObjectFormatter
(
field
,
formatter
.
subClazz
()
,
formatter
);
if
(
objectFormatter
==
null
)
{
if
(
objectFormatter
==
null
)
{
throw
new
IllegalStateException
(
"Can't find formatter for field "
+
field
.
getName
()
+
" of type "
+
formatter
.
subClazz
());
throw
new
IllegalStateException
(
"Can't find formatter for field "
+
field
.
getName
()
+
" of type "
+
formatter
.
subClazz
());
}
else
{
}
else
{
...
@@ -102,14 +113,7 @@ class PageModelExtractor {
...
@@ -102,14 +113,7 @@ class PageModelExtractor {
}
}
}
}
private
ObjectFormatter
getObjectFormatter
(
Field
field
,
Class
<?>
fieldClazz
)
{
private
ObjectFormatter
getObjectFormatter
(
Field
field
,
Class
<?>
fieldClazz
,
Formatter
formatter
)
{
Formatter
formatter
=
field
.
getAnnotation
(
Formatter
.
class
);
if
(
formatter
!=
null
)
{
if
(!
formatter
.
formatter
().
equals
(
ObjectFormatter
.
class
))
{
ObjectFormatter
objectFormatter
=
initFormatter
(
formatter
.
formatter
());
objectFormatter
.
initParam
(
formatter
.
value
());
}
}
return
initFormatter
(
ObjectFormatters
.
get
(
fieldClazz
));
return
initFormatter
(
ObjectFormatters
.
get
(
fieldClazz
));
}
}
...
...
webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/GithubRepo.java
View file @
3a79b1b6
...
@@ -3,11 +3,9 @@ package us.codecraft.webmagic.model.samples;
...
@@ -3,11 +3,9 @@ package us.codecraft.webmagic.model.samples;
import
us.codecraft.webmagic.Site
;
import
us.codecraft.webmagic.Site
;
import
us.codecraft.webmagic.model.HasKey
;
import
us.codecraft.webmagic.model.HasKey
;
import
us.codecraft.webmagic.model.OOSpider
;
import
us.codecraft.webmagic.model.OOSpider
;
import
us.codecraft.webmagic.model.annotation.ExtractBy
;
import
us.codecraft.webmagic.model.annotation.*
;
import
us.codecraft.webmagic.model.annotation.ExtractByUrl
;
import
us.codecraft.webmagic.model.annotation.HelpUrl
;
import
us.codecraft.webmagic.model.annotation.TargetUrl
;
import
us.codecraft.webmagic.pipeline.JsonFilePageModelPipeline
;
import
us.codecraft.webmagic.pipeline.JsonFilePageModelPipeline
;
import
us.codecraft.webmagic.samples.formatter.StringTemplateFormatter
;
import
us.codecraft.webmagic.scheduler.FileCacheQueueScheduler
;
import
us.codecraft.webmagic.scheduler.FileCacheQueueScheduler
;
import
java.util.List
;
import
java.util.List
;
...
@@ -22,6 +20,7 @@ public class GithubRepo implements HasKey {
...
@@ -22,6 +20,7 @@ public class GithubRepo implements HasKey {
@ExtractBy
(
value
=
"//h1[@class='entry-title public']/strong/a/text()"
,
notNull
=
true
)
@ExtractBy
(
value
=
"//h1[@class='entry-title public']/strong/a/text()"
,
notNull
=
true
)
private
String
name
;
private
String
name
;
@Formatter
(
value
=
"author%s"
,
formatter
=
StringTemplateFormatter
.
class
)
@ExtractByUrl
(
"https://github\\.com/(\\w+)/.*"
)
@ExtractByUrl
(
"https://github\\.com/(\\w+)/.*"
)
private
String
author
;
private
String
author
;
...
...
webmagic-samples/src/main/java/us/codecraft/webmagic/samples/formatter/StringTemplateFormatter.java
0 → 100644
View file @
3a79b1b6
package
us
.
codecraft
.
webmagic
.
samples
.
formatter
;
import
us.codecraft.webmagic.model.formatter.ObjectFormatter
;
/**
* @author yihua.huang@dianping.com
*/
public
class
StringTemplateFormatter
implements
ObjectFormatter
<
String
>
{
private
String
template
;
@Override
public
String
format
(
String
raw
)
throws
Exception
{
return
String
.
format
(
template
,
raw
);
}
@Override
public
Class
<
String
>
clazz
()
{
return
String
.
class
;
}
@Override
public
void
initParam
(
String
[]
extra
)
{
template
=
extra
[
0
];
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment