Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
W
webmagic
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
沈俊林
webmagic
Commits
0e98183f
Commit
0e98183f
authored
Feb 12, 2014
by
yihua.huang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Change log4j to slf4j #55
parent
fa33b158
Changes
13
Show whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
42 additions
and
29 deletions
+42
-29
HttpClientDownloader.java
...s/codecraft/webmagic/downloader/HttpClientDownloader.java
+3
-2
FilePipeline.java
...ain/java/us/codecraft/webmagic/pipeline/FilePipeline.java
+3
-2
PriorityScheduler.java
...va/us/codecraft/webmagic/scheduler/PriorityScheduler.java
+4
-5
QueueScheduler.java
.../java/us/codecraft/webmagic/scheduler/QueueScheduler.java
+4
-2
Html.java
...re/src/main/java/us/codecraft/webmagic/selector/Html.java
+3
-2
PropertyLoader.java
...va/us/codecraft/webmagic/configurable/PropertyLoader.java
+6
-3
FileCache.java
...main/java/us/codecraft/webmagic/downloader/FileCache.java
+3
-2
GithubRepoApi.java
...ain/java/us/codecraft/webmagic/example/GithubRepoApi.java
+1
-1
PageModelExtractor.java
.../java/us/codecraft/webmagic/model/PageModelExtractor.java
+3
-2
FilePageModelPipeline.java
...us/codecraft/webmagic/pipeline/FilePageModelPipeline.java
+3
-2
JsonFilePageModelPipeline.java
...odecraft/webmagic/pipeline/JsonFilePageModelPipeline.java
+3
-2
JsonFilePipeline.java
...java/us/codecraft/webmagic/pipeline/JsonFilePipeline.java
+3
-2
FileCacheQueueScheduler.java
...codecraft/webmagic/scheduler/FileCacheQueueScheduler.java
+3
-2
No files found.
webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java
View file @
0e98183f
...
...
@@ -10,7 +10,8 @@ import org.apache.http.client.methods.CloseableHttpResponse;
import
org.apache.http.client.methods.RequestBuilder
;
import
org.apache.http.impl.client.CloseableHttpClient
;
import
org.apache.http.util.EntityUtils
;
import
org.apache.log4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
us.codecraft.webmagic.Page
;
import
us.codecraft.webmagic.Request
;
import
us.codecraft.webmagic.Site
;
...
...
@@ -34,7 +35,7 @@ import java.util.Set;
@ThreadSafe
public
class
HttpClientDownloader
implements
Downloader
{
private
Logger
logger
=
Logger
.
getLogger
(
getClass
());
private
Logger
logger
=
Logger
Factory
.
getLogger
(
getClass
());
private
final
Map
<
String
,
CloseableHttpClient
>
httpClients
=
new
HashMap
<
String
,
CloseableHttpClient
>();
...
...
webmagic-core/src/main/java/us/codecraft/webmagic/pipeline/FilePipeline.java
View file @
0e98183f
...
...
@@ -2,7 +2,8 @@ package us.codecraft.webmagic.pipeline;
import
org.apache.commons.codec.digest.DigestUtils
;
import
org.apache.http.annotation.ThreadSafe
;
import
org.apache.log4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
us.codecraft.webmagic.ResultItems
;
import
us.codecraft.webmagic.Task
;
import
us.codecraft.webmagic.utils.FilePersistentBase
;
...
...
@@ -21,7 +22,7 @@ import java.util.Map;
@ThreadSafe
public
class
FilePipeline
extends
FilePersistentBase
implements
Pipeline
{
private
Logger
logger
=
Logger
.
getLogger
(
getClass
());
private
Logger
logger
=
Logger
Factory
.
getLogger
(
getClass
());
/**
* create a FilePipeline with default path"/data/webmagic/"
...
...
webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/PriorityScheduler.java
View file @
0e98183f
package
us
.
codecraft
.
webmagic
.
scheduler
;
import
org.apache.http.annotation.ThreadSafe
;
import
org.apache.log4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
us.codecraft.webmagic.Request
;
import
us.codecraft.webmagic.Task
;
import
us.codecraft.webmagic.utils.NumberUtils
;
...
...
@@ -24,7 +25,7 @@ public class PriorityScheduler implements Scheduler {
public
static
final
int
INITIAL_CAPACITY
=
5
;
private
Logger
logger
=
Logger
.
getLogger
(
getClass
());
private
Logger
logger
=
Logger
Factory
.
getLogger
(
getClass
());
private
BlockingQueue
<
Request
>
noPriorityQueue
=
new
LinkedBlockingQueue
<
Request
>();
...
...
@@ -46,9 +47,7 @@ public class PriorityScheduler implements Scheduler {
@Override
public
synchronized
void
push
(
Request
request
,
Task
task
)
{
if
(
logger
.
isDebugEnabled
())
{
logger
.
debug
(
"push to queue "
+
request
.
getUrl
());
}
if
(
urls
.
add
(
request
.
getUrl
()))
{
if
(
request
.
getPriority
()
==
0
)
{
noPriorityQueue
.
add
(
request
);
...
...
webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/QueueScheduler.java
View file @
0e98183f
package
us
.
codecraft
.
webmagic
.
scheduler
;
import
org.apache.http.annotation.ThreadSafe
;
import
org.apache.log4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
us.codecraft.webmagic.Request
;
import
us.codecraft.webmagic.Task
;
...
...
@@ -10,6 +11,7 @@ import java.util.Set;
import
java.util.concurrent.BlockingQueue
;
import
java.util.concurrent.LinkedBlockingQueue
;
/**
* Basic Scheduler implementation.<br>
* Store urls to fetch in LinkedBlockingQueue and remove duplicate urls by HashMap.
...
...
@@ -20,7 +22,7 @@ import java.util.concurrent.LinkedBlockingQueue;
@ThreadSafe
public
class
QueueScheduler
implements
Scheduler
{
private
Logger
logger
=
Logger
.
getLogger
(
getClass
());
private
Logger
logger
=
Logger
Factory
.
getLogger
(
getClass
());
private
BlockingQueue
<
Request
>
queue
=
new
LinkedBlockingQueue
<
Request
>();
...
...
webmagic-core/src/main/java/us/codecraft/webmagic/selector/Html.java
View file @
0e98183f
package
us
.
codecraft
.
webmagic
.
selector
;
import
org.apache.log4j.Logger
;
import
org.jsoup.Jsoup
;
import
org.jsoup.nodes.Document
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
us.codecraft.webmagic.utils.EnvironmentUtil
;
import
java.util.ArrayList
;
...
...
@@ -16,7 +17,7 @@ import java.util.List;
*/
public
class
Html
extends
PlainText
{
private
Logger
logger
=
Logger
.
getLogger
(
getClass
());
private
Logger
logger
=
Logger
Factory
.
getLogger
(
getClass
());
/**
* Store parsed document for better performance when only one text exist.
...
...
webmagic-extension/src/main/java/us/codecraft/webmagic/configurable/PropertyLoader.java
View file @
0e98183f
...
...
@@ -5,11 +5,14 @@ import us.codecraft.webmagic.processor.PageProcessor;
import
java.util.Map
;
/**
* Inject property to object by {@link Inject} annotation.
*
* @author yihua.huang@dianping.com
*/
public
interface
PropertyLoader
<
T
>
{
public
class
PropertyLoader
<
T
>
{
PropertyLoader
<
T
>
clazz
(
Class
<?>
clazz
);
public
T
load
(
T
object
,
Map
<
String
,
String
>
properties
)
{
return
object
;
}
T
load
(
Map
<
String
,
String
>
properties
);
}
webmagic-extension/src/main/java/us/codecraft/webmagic/downloader/FileCache.java
View file @
0e98183f
...
...
@@ -2,7 +2,8 @@ package us.codecraft.webmagic.downloader;
import
org.apache.commons.codec.digest.DigestUtils
;
import
org.apache.commons.lang3.StringUtils
;
import
org.apache.log4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
us.codecraft.webmagic.*
;
import
us.codecraft.webmagic.utils.Experimental
;
import
us.codecraft.webmagic.pipeline.Pipeline
;
...
...
@@ -28,7 +29,7 @@ public class FileCache extends FilePersistentBase implements Downloader, Pipelin
private
final
PageProcessor
pageProcessor
;
private
Logger
logger
=
Logger
.
getLogger
(
getClass
());
private
Logger
logger
=
Logger
Factory
.
getLogger
(
getClass
());
public
FileCache
(
String
startUrl
,
String
urlPattern
)
{
this
(
startUrl
,
urlPattern
,
"/data/webmagic/temp/"
);
...
...
webmagic-extension/src/main/java/us/codecraft/webmagic/example/GithubRepoApi.java
View file @
0e98183f
...
...
@@ -27,7 +27,7 @@ public class GithubRepoApi implements HasKey {
@ExtractBy
(
type
=
ExtractBy
.
Type
.
JsonPath
,
value
=
"$.stargazers_count"
)
private
int
star
;
@ExtractBy
(
type
=
ExtractBy
.
Type
.
JsonPath
,
value
=
"$.
forks_count
"
)
@ExtractBy
(
type
=
ExtractBy
.
Type
.
JsonPath
,
value
=
"$.
homepage
"
)
private
int
fork
;
@ExtractByUrl
...
...
webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageModelExtractor.java
View file @
0e98183f
package
us
.
codecraft
.
webmagic
.
model
;
import
org.apache.commons.lang3.StringUtils
;
import
org.apache.log4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
us.codecraft.webmagic.Page
;
import
us.codecraft.webmagic.model.annotation.*
;
import
us.codecraft.webmagic.model.formatter.BasicTypeFormatter
;
...
...
@@ -40,7 +41,7 @@ class PageModelExtractor {
private
Extractor
objectExtractor
;
private
Logger
logger
=
Logger
.
getLogger
(
getClass
());
private
Logger
logger
=
Logger
Factory
.
getLogger
(
getClass
());
public
static
PageModelExtractor
create
(
Class
clazz
)
{
PageModelExtractor
pageModelExtractor
=
new
PageModelExtractor
();
...
...
webmagic-extension/src/main/java/us/codecraft/webmagic/pipeline/FilePageModelPipeline.java
View file @
0e98183f
...
...
@@ -2,7 +2,8 @@ package us.codecraft.webmagic.pipeline;
import
org.apache.commons.codec.digest.DigestUtils
;
import
org.apache.commons.lang3.builder.ToStringBuilder
;
import
org.apache.log4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
us.codecraft.webmagic.Task
;
import
us.codecraft.webmagic.model.HasKey
;
import
us.codecraft.webmagic.utils.FilePersistentBase
;
...
...
@@ -21,7 +22,7 @@ import java.io.PrintWriter;
*/
public
class
FilePageModelPipeline
extends
FilePersistentBase
implements
PageModelPipeline
{
private
Logger
logger
=
Logger
.
getLogger
(
getClass
());
private
Logger
logger
=
Logger
Factory
.
getLogger
(
getClass
());
/**
* new JsonFilePageModelPipeline with default path "/data/webmagic/"
...
...
webmagic-extension/src/main/java/us/codecraft/webmagic/pipeline/JsonFilePageModelPipeline.java
View file @
0e98183f
...
...
@@ -3,7 +3,8 @@ package us.codecraft.webmagic.pipeline;
import
com.alibaba.fastjson.JSON
;
import
org.apache.commons.codec.digest.DigestUtils
;
import
org.apache.commons.lang3.builder.ToStringBuilder
;
import
org.apache.log4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
us.codecraft.webmagic.Task
;
import
us.codecraft.webmagic.model.HasKey
;
import
us.codecraft.webmagic.utils.FilePersistentBase
;
...
...
@@ -22,7 +23,7 @@ import java.io.PrintWriter;
*/
public
class
JsonFilePageModelPipeline
extends
FilePersistentBase
implements
PageModelPipeline
{
private
Logger
logger
=
Logger
.
getLogger
(
getClass
());
private
Logger
logger
=
Logger
Factory
.
getLogger
(
getClass
());
/**
* new JsonFilePageModelPipeline with default path "/data/webmagic/"
...
...
webmagic-extension/src/main/java/us/codecraft/webmagic/pipeline/JsonFilePipeline.java
View file @
0e98183f
...
...
@@ -2,7 +2,8 @@ package us.codecraft.webmagic.pipeline;
import
com.alibaba.fastjson.JSON
;
import
org.apache.commons.codec.digest.DigestUtils
;
import
org.apache.log4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
us.codecraft.webmagic.ResultItems
;
import
us.codecraft.webmagic.Task
;
import
us.codecraft.webmagic.utils.FilePersistentBase
;
...
...
@@ -20,7 +21,7 @@ import java.io.PrintWriter;
*/
public
class
JsonFilePipeline
extends
FilePersistentBase
implements
Pipeline
{
private
Logger
logger
=
Logger
.
getLogger
(
getClass
());
private
Logger
logger
=
Logger
Factory
.
getLogger
(
getClass
());
/**
* new JsonFilePageModelPipeline with default path "/data/webmagic/"
...
...
webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/FileCacheQueueScheduler.java
View file @
0e98183f
...
...
@@ -2,7 +2,8 @@ package us.codecraft.webmagic.scheduler;
import
org.apache.commons.io.IOUtils
;
import
org.apache.commons.lang3.math.NumberUtils
;
import
org.apache.log4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
us.codecraft.webmagic.Request
;
import
us.codecraft.webmagic.Task
;
...
...
@@ -24,7 +25,7 @@ import java.util.concurrent.atomic.AtomicInteger;
*/
public
class
FileCacheQueueScheduler
implements
Scheduler
{
private
Logger
logger
=
Logger
.
getLogger
(
getClass
());
private
Logger
logger
=
Logger
Factory
.
getLogger
(
getClass
());
private
String
filePath
=
System
.
getProperty
(
"java.io.tmpdir"
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment