Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
W
webmagic
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
沈俊林
webmagic
Commits
b1ef61b2
Commit
b1ef61b2
authored
Jun 03, 2017
by
yihua.huang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add tests before refactor #586
parent
b363ee6a
Changes
9
Show whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
189 additions
and
101 deletions
+189
-101
PageModelExtractor.java
.../java/us/codecraft/webmagic/model/PageModelExtractor.java
+12
-21
GithubRepoApi.java
.../test/java/us/codecraft/webmagic/model/GithubRepoApi.java
+18
-0
GithubRepoTest.java
...test/java/us/codecraft/webmagic/model/GithubRepoTest.java
+1
-9
MockModel.java
.../src/test/java/us/codecraft/webmagic/model/MockModel.java
+0
-13
ModelPageProcessorTest.java
...a/us/codecraft/webmagic/model/ModelPageProcessorTest.java
+7
-31
PageMapperTest.java
...test/java/us/codecraft/webmagic/model/PageMapperTest.java
+3
-26
PageMocker.java
...src/test/java/us/codecraft/webmagic/model/PageMocker.java
+32
-0
PageModelExtractorTest.java
...a/us/codecraft/webmagic/model/PageModelExtractorTest.java
+103
-0
mock-webmagic.html
...agic-extension/src/test/resources/html/mock-webmagic.html
+13
-1
No files found.
webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageModelExtractor.java
View file @
b1ef61b2
...
...
@@ -83,14 +83,13 @@ class PageModelExtractor {
return
;
}
if
(!
formatter
.
formatter
().
equals
(
Formatter
.
DEFAULT_FORMATTER
))
{
ObjectFormatter
objectFormatter
=
initFormatter
(
formatter
.
formatter
());
objectFormatter
.
initParam
(
formatter
.
value
());
ObjectFormatter
objectFormatter
=
initFormatter
(
formatter
.
formatter
(),
formatter
.
value
());
fieldExtractor
.
setObjectFormatter
(
objectFormatter
);
return
;
}
if
(!
fieldExtractor
.
isMulti
()
&&
!
String
.
class
.
isAssignableFrom
(
field
.
getType
()))
{
Class
<?>
fieldClazz
=
BasicTypeFormatter
.
detectBasicClass
(
field
.
getType
());
ObjectFormatter
objectFormatter
=
getObjectFormatter
(
field
,
fieldClazz
,
formatter
);
ObjectFormatter
objectFormatter
=
initFormatter
(
ObjectFormatters
.
get
(
fieldClazz
),
formatter
.
value
()
);
if
(
objectFormatter
==
null
)
{
throw
new
IllegalStateException
(
"Can't find formatter for field "
+
field
.
getName
()
+
" of type "
+
fieldClazz
);
}
else
{
...
...
@@ -100,9 +99,8 @@ class PageModelExtractor {
if
(!
List
.
class
.
isAssignableFrom
(
field
.
getType
()))
{
throw
new
IllegalStateException
(
"Field "
+
field
.
getName
()
+
" must be list"
);
}
if
(
formatter
!=
null
)
{
if
(!
formatter
.
subClazz
().
equals
(
Void
.
class
))
{
ObjectFormatter
objectFormatter
=
getObjectFormatter
(
field
,
formatter
.
subClazz
(),
formatter
);
ObjectFormatter
objectFormatter
=
initFormatter
(
ObjectFormatters
.
get
(
formatter
.
subClazz
()),
formatter
.
value
()
);
if
(
objectFormatter
==
null
)
{
throw
new
IllegalStateException
(
"Can't find formatter for field "
+
field
.
getName
()
+
" of type "
+
formatter
.
subClazz
());
}
else
{
...
...
@@ -111,19 +109,12 @@ class PageModelExtractor {
}
}
}
}
private
ObjectFormatter
getObjectFormatter
(
Field
field
,
Class
<?>
fieldClazz
,
Formatter
formatter
)
{
ObjectFormatter
objectFormatter
=
initFormatter
(
ObjectFormatters
.
get
(
fieldClazz
));
if
(
formatter
!=
null
&&
formatter
.
value
()
!=
null
){
objectFormatter
.
initParam
(
formatter
.
value
());
}
return
objectFormatter
;
}
private
ObjectFormatter
initFormatter
(
Class
<?
extends
ObjectFormatter
>
formatterClazz
)
{
private
ObjectFormatter
initFormatter
(
Class
<?
extends
ObjectFormatter
>
formatterClazz
,
String
[]
params
)
{
try
{
return
formatterClazz
.
newInstance
();
ObjectFormatter
objectFormatter
=
formatterClazz
.
newInstance
();
objectFormatter
.
initParam
(
params
);
return
objectFormatter
;
}
catch
(
InstantiationException
e
)
{
throw
new
RuntimeException
(
e
);
}
catch
(
IllegalAccessException
e
)
{
...
...
webmagic-extension/src/test/java/us/codecraft/webmagic/model/GithubRepoApi.java
0 → 100644
View file @
b1ef61b2
package
us
.
codecraft
.
webmagic
.
model
;
import
us.codecraft.webmagic.model.annotation.ExtractBy
;
/**
* @author code4crafter@gmail.com
* Date: 2017/6/3
* Time: 下午9:07
*/
public
class
GithubRepoApi
{
@ExtractBy
(
type
=
ExtractBy
.
Type
.
JsonPath
,
value
=
"$.name"
,
source
=
ExtractBy
.
Source
.
RawText
)
private
String
name
;
public
String
getName
()
{
return
name
;
}
}
webmagic-extension/src/test/java/us/codecraft/webmagic/model/GithubRepoTest.java
View file @
b1ef61b2
package
us
.
codecraft
.
webmagic
.
model
;
import
org.junit.Test
;
import
us.codecraft.webmagic.SimpleHttpClient
;
import
us.codecraft.webmagic.Site
;
import
us.codecraft.webmagic.Task
;
import
us.codecraft.webmagic.downloader.MockGithubDownloader
;
import
us.codecraft.webmagic.pipeline.PageModelPipeline
;
import
us.codecraft.webmagic.example.GithubRepo
;
import
us.codecraft.webmagic.pipeline.PageModelPipeline
;
import
static
org
.
assertj
.
core
.
api
.
Assertions
.
assertThat
;
...
...
@@ -27,11 +26,4 @@ public class GithubRepoTest {
},
GithubRepo
.
class
).
addUrl
(
"https://github.com/code4craft/webmagic"
).
setDownloader
(
new
MockGithubDownloader
()).
test
(
"https://github.com/code4craft/webmagic"
);
}
@Test
public
void
test1
()
throws
Exception
{
SimpleHttpClient
simpleHttpClient
=
new
SimpleHttpClient
();
GithubRepo
model
=
simpleHttpClient
.
get
(
"https://github.com/code4craft/webmagic"
,
GithubRepo
.
class
);
System
.
out
.
println
(
model
);
}
}
webmagic-extension/src/test/java/us/codecraft/webmagic/model/MockModel.java
deleted
100644 → 0
View file @
b363ee6a
package
us
.
codecraft
.
webmagic
.
model
;
import
us.codecraft.webmagic.model.annotation.HelpUrl
;
import
us.codecraft.webmagic.model.annotation.TargetUrl
;
/**
* @author code4crafer@gmail.com
*/
@TargetUrl
(
value
=
"http://webmagic.io/post/\\d+"
,
sourceRegion
=
"//li[@class='post']"
)
@HelpUrl
(
value
=
"http://webmagic.io/list/\\d+"
,
sourceRegion
=
"//li[@class='list']"
)
public
class
MockModel
{
}
webmagic-extension/src/test/java/us/codecraft/webmagic/model/ModelPageProcessorTest.java
View file @
b1ef61b2
package
us
.
codecraft
.
webmagic
.
model
;
import
org.apache.commons.io.IOUtils
;
import
org.apache.commons.lang3.time.DateFormatUtils
;
import
org.junit.Test
;
import
us.codecraft.webmagic.Page
;
import
us.codecraft.webmagic.Request
;
import
us.codecraft.webmagic.model.annotation.ExtractBy
;
import
us.codecraft.webmagic.model.annotation.
Formatter
;
import
us.codecraft.webmagic.model.annotation.
HelpUrl
;
import
us.codecraft.webmagic.model.annotation.TargetUrl
;
import
us.codecraft.webmagic.model.formatter.DateFormatter
;
import
us.codecraft.webmagic.selector.PlainText
;
import
java.io.IOException
;
import
java.util.Date
;
import
static
org
.
assertj
.
core
.
api
.
Assertions
.
assertThat
;
/**
...
...
@@ -22,6 +16,8 @@ import static org.assertj.core.api.Assertions.assertThat;
*/
public
class
ModelPageProcessorTest
{
private
PageMocker
pageMocker
=
new
PageMocker
();
@TargetUrl
(
"http://codecraft.us/foo"
)
public
static
class
ModelFoo
{
...
...
@@ -38,15 +34,10 @@ public class ModelPageProcessorTest {
}
public
static
class
ModelDate
{
@Formatter
(
value
=
"yyyyMMdd"
,
formatter
=
DateFormatter
.
class
)
@ExtractBy
(
value
=
"//div[@class='date']/text()"
,
notNull
=
true
)
private
Date
date
;
@TargetUrl
(
value
=
"http://webmagic.io/post/\\d+"
,
sourceRegion
=
"//li[@class='post']"
)
@HelpUrl
(
value
=
"http://webmagic.io/list/\\d+"
,
sourceRegion
=
"//li[@class='list']"
)
public
static
class
MockModel
{
public
Date
getDate
()
{
return
date
;
}
}
@Test
...
...
@@ -63,26 +54,11 @@ public class ModelPageProcessorTest {
@Test
public
void
testExtractLinks
()
throws
Exception
{
ModelPageProcessor
modelPageProcessor
=
ModelPageProcessor
.
create
(
null
,
MockModel
.
class
);
Page
page
=
getMockPage
();
Page
page
=
pageMocker
.
getMockPage
();
modelPageProcessor
.
process
(
page
);
assertThat
(
page
.
getTargetRequests
()).
containsExactly
(
new
Request
(
"http://webmagic.io/list/1"
),
new
Request
(
"http://webmagic.io/list/2"
),
new
Request
(
"http://webmagic.io/post/1"
),
new
Request
(
"http://webmagic.io/post/2"
));
}
@Test
public
void
testExtractDate
()
throws
Exception
{
ModelPageProcessor
modelPageProcessor
=
ModelPageProcessor
.
create
(
null
,
ModelDate
.
class
);
Page
page
=
getMockPage
();
modelPageProcessor
.
process
(
page
);
ModelDate
modelDate
=
(
ModelDate
)
page
.
getResultItems
().
get
(
ModelDate
.
class
.
getCanonicalName
());
assertThat
(
DateFormatUtils
.
format
(
modelDate
.
getDate
(),
"yyyyMMdd"
)).
isEqualTo
(
"20170603"
);
}
private
Page
getMockPage
()
throws
IOException
{
Page
page
=
new
Page
();
page
.
setRawText
(
IOUtils
.
toString
(
getClass
().
getClassLoader
().
getResourceAsStream
(
"html/mock-webmagic.html"
)));
page
.
setRequest
(
new
Request
(
"http://webmagic.io/list/0"
));
page
.
setUrl
(
new
PlainText
(
"http://webmagic.io/list/0"
));
return
page
;
}
}
webmagic-extension/src/test/java/us/codecraft/webmagic/model/PageMapperTest.java
View file @
b1ef61b2
package
us
.
codecraft
.
webmagic
.
model
;
import
org.apache.commons.io.IOUtils
;
import
org.junit.Test
;
import
us.codecraft.webmagic.Page
;
import
us.codecraft.webmagic.Request
;
import
us.codecraft.webmagic.model.annotation.ExtractBy
;
import
us.codecraft.webmagic.selector.PlainText
;
import
java.io.IOException
;
import
static
org
.
assertj
.
core
.
api
.
Assertions
.
assertThat
;
...
...
@@ -18,29 +11,13 @@ import static org.assertj.core.api.Assertions.assertThat;
*/
public
class
PageMapperTest
{
public
static
class
GithubRepo
{
@ExtractBy
(
type
=
ExtractBy
.
Type
.
JsonPath
,
value
=
"$.name"
,
source
=
ExtractBy
.
Source
.
RawText
)
private
String
name
;
public
String
getName
()
{
return
name
;
}
}
private
PageMocker
pageMocker
=
new
PageMocker
();
@Test
public
void
test_get
()
throws
Exception
{
PageMapper
<
GithubRepo
>
pageMapper
=
new
PageMapper
<
GithubRepo
>(
GithubRepo
.
class
);
GithubRepo
githubRepo
=
pageMapper
.
get
(
getMockJsonPage
());
PageMapper
<
GithubRepo
Api
>
pageMapper
=
new
PageMapper
<
GithubRepoApi
>(
GithubRepoApi
.
class
);
GithubRepo
Api
githubRepo
=
pageMapper
.
get
(
pageMocker
.
getMockJsonPage
());
assertThat
(
githubRepo
.
getName
()).
isEqualTo
(
"webmagic"
);
}
private
Page
getMockJsonPage
()
throws
IOException
{
Page
page
=
new
Page
();
page
.
setRawText
(
IOUtils
.
toString
(
getClass
().
getClassLoader
().
getResourceAsStream
(
"json/mock-githubrepo.json"
)));
page
.
setRequest
(
new
Request
(
"https://api.github.com/repos/code4craft/webmagic"
));
page
.
setUrl
(
new
PlainText
(
"https://api.github.com/repos/code4craft/webmagic"
));
return
page
;
}
}
webmagic-extension/src/test/java/us/codecraft/webmagic/model/PageMocker.java
0 → 100644
View file @
b1ef61b2
package
us
.
codecraft
.
webmagic
.
model
;
import
org.apache.commons.io.IOUtils
;
import
us.codecraft.webmagic.Page
;
import
us.codecraft.webmagic.Request
;
import
us.codecraft.webmagic.selector.PlainText
;
import
java.io.IOException
;
/**
* @author code4crafter@gmail.com
* Date: 2017/6/3
* Time: 下午9:08
*/
public
class
PageMocker
{
public
Page
getMockJsonPage
()
throws
IOException
{
Page
page
=
new
Page
();
page
.
setRawText
(
IOUtils
.
toString
(
PageMocker
.
class
.
getClassLoader
().
getResourceAsStream
(
"json/mock-githubrepo.json"
)));
page
.
setRequest
(
new
Request
(
"https://api.github.com/repos/code4craft/webmagic"
));
page
.
setUrl
(
new
PlainText
(
"https://api.github.com/repos/code4craft/webmagic"
));
return
page
;
}
public
Page
getMockPage
()
throws
IOException
{
Page
page
=
new
Page
();
page
.
setRawText
(
IOUtils
.
toString
(
PageMocker
.
class
.
getClassLoader
().
getResourceAsStream
(
"html/mock-webmagic.html"
)));
page
.
setRequest
(
new
Request
(
"http://webmagic.io/list/0"
));
page
.
setUrl
(
new
PlainText
(
"http://webmagic.io/list/0"
));
return
page
;
}
}
webmagic-extension/src/test/java/us/codecraft/webmagic/model/PageModelExtractorTest.java
0 → 100644
View file @
b1ef61b2
package
us
.
codecraft
.
webmagic
.
model
;
import
org.apache.commons.lang3.time.DateFormatUtils
;
import
org.junit.Test
;
import
us.codecraft.webmagic.model.annotation.ExtractBy
;
import
us.codecraft.webmagic.model.annotation.Formatter
;
import
us.codecraft.webmagic.model.formatter.DateFormatter
;
import
java.util.Date
;
import
java.util.List
;
import
static
org
.
assertj
.
core
.
api
.
Assertions
.
assertThat
;
/**
* @author code4crafter@gmail.com
* Date: 2017/6/3
* Time: 下午9:06
*/
public
class
PageModelExtractorTest
{
private
PageMocker
pageMocker
=
new
PageMocker
();
public
static
class
ModelDateStr
{
@ExtractBy
(
value
=
"//div[@class='date']/text()"
,
notNull
=
true
)
private
String
dateStr
;
}
public
static
class
ModelDate
{
@Formatter
(
value
=
"yyyyMMdd"
,
formatter
=
DateFormatter
.
class
)
@ExtractBy
(
value
=
"//div[@class='date']/text()"
,
notNull
=
true
)
private
Date
date
;
}
public
static
class
ModelInt
{
@ExtractBy
(
value
=
"//div[@class='number']/text()"
,
notNull
=
true
)
private
int
number
;
}
public
static
class
ModelStringList
{
@ExtractBy
(
"//a/@href"
)
private
List
<
String
>
links
;
}
public
static
class
ModelIntList
{
@Formatter
(
subClazz
=
Integer
.
class
)
@ExtractBy
(
"//li[@class='numbers']/text()"
)
private
List
<
Integer
>
numbers
;
}
public
static
class
ModelDateList
{
@Formatter
(
subClazz
=
Date
.
class
,
value
=
"yyyyMMdd"
)
@ExtractBy
(
"//li[@class='dates']/text()"
)
private
List
<
Date
>
dates
;
}
@Test
public
void
testXpath
()
throws
Exception
{
ModelDateStr
modelDate
=
(
ModelDateStr
)
PageModelExtractor
.
create
(
ModelDateStr
.
class
).
process
(
pageMocker
.
getMockPage
());
assertThat
(
modelDate
.
dateStr
).
isEqualTo
(
"20170603"
);
}
@Test
public
void
testExtractDate
()
throws
Exception
{
ModelDate
modelDate
=
(
ModelDate
)
PageModelExtractor
.
create
(
ModelDate
.
class
).
process
(
pageMocker
.
getMockPage
());
assertThat
(
DateFormatUtils
.
format
(
modelDate
.
date
,
"yyyyMMdd"
)).
isEqualTo
(
"20170603"
);
}
@Test
public
void
testExtractInt
()
throws
Exception
{
ModelInt
modelDate
=
(
ModelInt
)
PageModelExtractor
.
create
(
ModelInt
.
class
).
process
(
pageMocker
.
getMockPage
());
assertThat
(
modelDate
.
number
).
isEqualTo
(
12
);
}
@Test
public
void
testExtractList
()
throws
Exception
{
ModelStringList
modelDate
=
(
ModelStringList
)
PageModelExtractor
.
create
(
ModelStringList
.
class
).
process
(
pageMocker
.
getMockPage
());
assertThat
(
modelDate
.
links
).
hasSize
(
8
);
}
@Test
public
void
testExtractIntList
()
throws
Exception
{
ModelIntList
modelDate
=
(
ModelIntList
)
PageModelExtractor
.
create
(
ModelIntList
.
class
).
process
(
pageMocker
.
getMockPage
());
assertThat
(
modelDate
.
numbers
).
hasSize
(
4
);
}
@Test
public
void
testExtractDateList
()
throws
Exception
{
ModelDateList
modelDate
=
(
ModelDateList
)
PageModelExtractor
.
create
(
ModelDateList
.
class
).
process
(
pageMocker
.
getMockPage
());
assertThat
(
modelDate
.
dates
).
hasSize
(
4
);
}
}
webmagic-extension/src/test/resources/html/mock-webmagic.html
View file @
b1ef61b2
...
...
@@ -6,6 +6,7 @@
</head>
<body>
<div
class=
"date"
>
20170603
</div>
<div
class=
"number"
>
12
</div>
<ul>
<li
class=
"list"
><a
href=
"http://webmagic.io/list/1"
></a></li>
<li
class=
"list"
><a
href=
"http://webmagic.io/list/2"
></a></li>
...
...
@@ -18,6 +19,17 @@
<li
class=
"post"
><a
href=
"http://webmagic.io/list/3"
></a></li>
<li
class=
"post"
><a
href=
"http://webmagic.io/list/4"
></a></li>
</ul>
<ul>
<li
class=
"numbers"
>
1
</li>
<li
class=
"numbers"
>
2
</li>
<li
class=
"numbers"
>
3
</li>
<li
class=
"numbers"
>
4
</li>
</ul>
<ul>
<li
class=
"dates"
>
20170601
</li>
<li
class=
"dates"
>
20170602
</li>
<li
class=
"dates"
>
20170603
</li>
<li
class=
"dates"
>
20170604
</li>
</ul>
</body>
</html>
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment