Commit 01aec7e1 authored by yihua.huang's avatar yihua.huang

extension point of geturl #118

parent ec1c2e8c
......@@ -46,14 +46,18 @@ public class BloomFilterDuplicateRemover implements DuplicateRemover {
@Override
public boolean isDuplicate(Request request, Task task) {
boolean isDuplicate = bloomFilter.mightContain(request.getUrl());
boolean isDuplicate = bloomFilter.mightContain(getUrl(request));
if (!isDuplicate) {
bloomFilter.put(request.getUrl());
bloomFilter.put(getUrl(request));
counter.incrementAndGet();
}
return isDuplicate;
}
protected String getUrl(Request request) {
return request.getUrl();
}
@Override
public void resetDuplicateCheck(Task task) {
rebuildBloomFilter();
......
......@@ -16,7 +16,11 @@ public class HashSetDuplicateRemover implements DuplicateRemover {
@Override
public boolean isDuplicate(Request request, Task task) {
return !urls.add(request.getUrl());
return !urls.add(getUrl(request));
}
protected String getUrl(Request request) {
return request.getUrl();
}
@Override
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment