Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
W
webmagic
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
沈俊林
webmagic
Commits
05a1f395
Commit
05a1f395
authored
Feb 18, 2015
by
Yihua Huang
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #193 from EdwardsBean/fix-mppipeline
Bug fix:MultiPagePipeline and DoubleKeyMap concurrent bug
parents
6b9d21fc
74962d69
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
47 additions
and
36 deletions
+47
-36
MultiPagePipeline.java
...ava/us/codecraft/webmagic/pipeline/MultiPagePipeline.java
+44
-34
DoubleKeyMap.java
...c/main/java/us/codecraft/webmagic/utils/DoubleKeyMap.java
+3
-2
No files found.
webmagic-extension/src/main/java/us/codecraft/webmagic/pipeline/MultiPagePipeline.java
View file @
05a1f395
...
@@ -36,51 +36,61 @@ public class MultiPagePipeline implements Pipeline {
...
@@ -36,51 +36,61 @@ public class MultiPagePipeline implements Pipeline {
private
void
handleObject
(
Iterator
<
Map
.
Entry
<
String
,
Object
>>
iterator
)
{
private
void
handleObject
(
Iterator
<
Map
.
Entry
<
String
,
Object
>>
iterator
)
{
Map
.
Entry
<
String
,
Object
>
objectEntry
=
iterator
.
next
();
Map
.
Entry
<
String
,
Object
>
objectEntry
=
iterator
.
next
();
Object
o
=
objectEntry
.
getValue
();
Object
o
=
objectEntry
.
getValue
();
//需要拼凑
if
(
o
instanceof
MultiPageModel
)
{
if
(
o
instanceof
MultiPageModel
)
{
MultiPageModel
multiPageModel
=
(
MultiPageModel
)
o
;
MultiPageModel
multiPageModel
=
(
MultiPageModel
)
o
;
pageMap
.
put
(
multiPageModel
.
getPageKey
(),
multiPageModel
.
getPage
(),
Boolean
.
TRUE
);
//这次处理的部分,设置为完成
if
(
multiPageModel
.
getOtherPages
()
!=
null
)
{
pageMap
.
put
(
multiPageModel
.
getPageKey
(),
multiPageModel
.
getPage
(),
Boolean
.
FALSE
);
for
(
String
otherPage
:
multiPageModel
.
getOtherPages
())
{
//每个key单独加锁
Boolean
aBoolean
=
pageMap
.
get
(
multiPageModel
.
getPageKey
(),
otherPage
);
synchronized
(
pageMap
.
get
(
multiPageModel
.
getPageKey
()))
{
if
(
aBoolean
==
null
)
{
pageMap
.
put
(
multiPageModel
.
getPageKey
(),
multiPageModel
.
getPage
(),
Boolean
.
TRUE
);
pageMap
.
put
(
multiPageModel
.
getPageKey
(),
otherPage
,
Boolean
.
FALSE
);
//其他需要拼凑的部分
if
(
multiPageModel
.
getOtherPages
()
!=
null
)
{
for
(
String
otherPage
:
multiPageModel
.
getOtherPages
())
{
Boolean
aBoolean
=
pageMap
.
get
(
multiPageModel
.
getPageKey
(),
otherPage
);
if
(
aBoolean
==
null
)
{
pageMap
.
put
(
multiPageModel
.
getPageKey
(),
otherPage
,
Boolean
.
FALSE
);
}
}
}
}
}
}
//check if all pages are processed
//check if all pages are processed
Map
<
String
,
Boolean
>
booleanMap
=
pageMap
.
get
(
multiPageModel
.
getPageKey
());
Map
<
String
,
Boolean
>
booleanMap
=
pageMap
.
get
(
multiPageModel
.
getPageKey
());
objectMap
.
put
(
multiPageModel
.
getPageKey
(),
multiPageModel
.
getPage
(),
multiPageModel
);
objectMap
.
put
(
multiPageModel
.
getPageKey
(),
multiPageModel
.
getPage
(),
multiPageModel
);
if
(
booleanMap
==
null
)
{
if
(
booleanMap
==
null
)
{
return
;
}
for
(
Map
.
Entry
<
String
,
Boolean
>
stringBooleanEntry
:
booleanMap
.
entrySet
())
{
if
(!
stringBooleanEntry
.
getValue
())
{
iterator
.
remove
();
return
;
return
;
}
}
}
// /过滤,这次完成的page item中,还未拼凑完整的item,不进入下一个pipeline
List
<
Map
.
Entry
<
String
,
MultiPageModel
>>
entryList
=
new
ArrayList
<
Map
.
Entry
<
String
,
MultiPageModel
>>();
for
(
Map
.
Entry
<
String
,
Boolean
>
stringBooleanEntry
:
booleanMap
.
entrySet
())
{
entryList
.
addAll
(
objectMap
.
get
(
multiPageModel
.
getPageKey
()).
entrySet
());
if
(!
stringBooleanEntry
.
getValue
())
{
if
(
entryList
.
size
()
!=
0
)
{
iterator
.
remove
();
Collections
.
sort
(
entryList
,
new
Comparator
<
Map
.
Entry
<
String
,
MultiPageModel
>>()
{
return
;
@Override
}
public
int
compare
(
Map
.
Entry
<
String
,
MultiPageModel
>
o1
,
Map
.
Entry
<
String
,
MultiPageModel
>
o2
)
{
}
try
{
List
<
Map
.
Entry
<
String
,
MultiPageModel
>>
entryList
=
new
ArrayList
<
Map
.
Entry
<
String
,
MultiPageModel
>>();
int
i1
=
Integer
.
parseInt
(
o1
.
getKey
());
entryList
.
addAll
(
objectMap
.
get
(
multiPageModel
.
getPageKey
()).
entrySet
());
int
i2
=
Integer
.
parseInt
(
o2
.
getKey
());
if
(
entryList
.
size
()
!=
0
)
{
return
i1
-
i2
;
Collections
.
sort
(
entryList
,
new
Comparator
<
Map
.
Entry
<
String
,
MultiPageModel
>>()
{
}
catch
(
NumberFormatException
e
)
{
@Override
return
o1
.
getKey
().
compareTo
(
o2
.
getKey
());
public
int
compare
(
Map
.
Entry
<
String
,
MultiPageModel
>
o1
,
Map
.
Entry
<
String
,
MultiPageModel
>
o2
)
{
try
{
int
i1
=
Integer
.
parseInt
(
o1
.
getKey
());
int
i2
=
Integer
.
parseInt
(
o2
.
getKey
());
return
i1
-
i2
;
}
catch
(
NumberFormatException
e
)
{
return
o1
.
getKey
().
compareTo
(
o2
.
getKey
());
}
}
}
});
// 合并
MultiPageModel
value
=
entryList
.
get
(
0
).
getValue
();
for
(
int
i
=
1
;
i
<
entryList
.
size
();
i
++)
{
value
=
value
.
combine
(
entryList
.
get
(
i
).
getValue
());
}
}
});
objectEntry
.
setValue
(
value
);
MultiPageModel
value
=
entryList
.
get
(
0
).
getValue
();
for
(
int
i
=
1
;
i
<
entryList
.
size
();
i
++)
{
value
=
value
.
combine
(
entryList
.
get
(
i
).
getValue
());
}
}
objectEntry
.
setValue
(
value
);
}
}
}
}
}
}
}
}
webmagic-extension/src/main/java/us/codecraft/webmagic/utils/DoubleKeyMap.java
View file @
05a1f395
...
@@ -75,8 +75,9 @@ public class DoubleKeyMap<K1, K2, V> extends MultiKeyMapBase {
...
@@ -75,8 +75,9 @@ public class DoubleKeyMap<K1, K2, V> extends MultiKeyMapBase {
* @param value
* @param value
* @return value
* @return value
*/
*/
public
V
put
(
K1
key1
,
K2
key2
,
V
value
)
{
public
synchronized
V
put
(
K1
key1
,
K2
key2
,
V
value
)
{
if
(
map
.
get
(
key1
)
==
null
)
{
if
(
map
.
get
(
key1
)
==
null
)
{
//不加锁的话,多个线程有可能都会执行到这里
map
.
put
(
key1
,
this
.<
K2
,
V
>
newMap
());
map
.
put
(
key1
,
this
.<
K2
,
V
>
newMap
());
}
}
return
get
(
key1
).
put
(
key2
,
value
);
return
get
(
key1
).
put
(
key2
,
value
);
...
@@ -87,7 +88,7 @@ public class DoubleKeyMap<K1, K2, V> extends MultiKeyMapBase {
...
@@ -87,7 +88,7 @@ public class DoubleKeyMap<K1, K2, V> extends MultiKeyMapBase {
* @param key2
* @param key2
* @return value
* @return value
*/
*/
public
V
remove
(
K1
key1
,
K2
key2
)
{
public
synchronized
V
remove
(
K1
key1
,
K2
key2
)
{
if
(
get
(
key1
)
==
null
)
{
if
(
get
(
key1
)
==
null
)
{
return
null
;
return
null
;
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment