Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
spider-center
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
周烽
spider-center
Commits
f6f2a2c3
Commit
f6f2a2c3
authored
Mar 11, 2020
by
周烽
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
初始化提交
parents
Changes
16
Hide whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
736 additions
and
0 deletions
+736
-0
.gitlab-ci.yml
.gitlab-ci.yml
+37
-0
README.md
README.md
+1
-0
build.gradle
build.gradle
+139
-0
settings.gradle
settings.gradle
+3
-0
build.gradle
spider-center-api/build.gradle
+7
-0
package-info.java
...m/duiba/spider/center/api/remoteservice/package-info.java
+4
-0
build.gradle
spider-center-biz/build.gradle
+43
-0
Application.java
...src/main/java/cn/com/duiba/spider/center/Application.java
+21
-0
XiguaJob.java
...rc/main/java/cn/com/duiba/spider/center/job/XiguaJob.java
+146
-0
XiguaJob2.java
...c/main/java/cn/com/duiba/spider/center/job/XiguaJob2.java
+141
-0
XiguaJob3.java
...c/main/java/cn/com/duiba/spider/center/job/XiguaJob3.java
+114
-0
application.properties
spider-center-biz/src/main/resources/application.properties
+18
-0
bootstrap.properties
spider-center-biz/src/main/resources/bootstrap.properties
+6
-0
logback.xml
spider-center-biz/src/main/resources/logback.xml
+6
-0
default.xml
spider-center-biz/src/main/resources/logback/default.xml
+34
-0
BaseTest.java
...iz/src/test/java/cn/com/duiba/spider/center/BaseTest.java
+16
-0
No files found.
.gitlab-ci.yml
0 → 100755
View file @
f6f2a2c3
sonarqube_feature_preview
:
script
:
-
git config --global user.email "sonar@duiba"
-
git config --global user.name "sonar"
-
gradle clean
-
export
-
gradle sonarqube -Dsonar.host.url=http://sonar.dui88.com -Dsonar.branch=dummy -Dsonar.login=reporter -Dsonar.password=duiba123456 -Dsonar.analysis.mode=preview -Dsonar.gitlab.project_id=$CI_PROJECT_PATH -Dsonar.gitlab.commit_sha=$CI_COMMIT_SHA -Dsonar.gitlab.ref_name=$CI_COMMIT_REF_NAME -x test
stage
:
test
only
:
-
/^feature.*$/
tags
:
-
test
sonarqube_hotfix_preview
:
script
:
-
git config --global user.email "sonar@duiba"
-
git config --global user.name "sonar"
-
gradle clean
-
export
-
gradle sonarqube -Dsonar.host.url=http://sonar.dui88.com -Dsonar.branch=dummy -Dsonar.login=reporter -Dsonar.password=duiba123456 -Dsonar.analysis.mode=preview -Dsonar.gitlab.project_id=$CI_PROJECT_PATH -Dsonar.gitlab.commit_sha=$CI_COMMIT_SHA -Dsonar.gitlab.ref_name=$CI_COMMIT_REF_NAME -x test
stage
:
test
only
:
-
/^(hotfix|release).*$/
tags
:
-
test
sonarqube_develop_preview
:
script
:
-
git config --global user.email "sonar@duiba"
-
git config --global user.name "sonar"
-
git checkout origin/develop
-
gradle clean
-
gradle sonarqube -Dsonar.host.url=http://sonar.dui88.com -Dsonar.login=reporter -Dsonar.password=duiba123456 -x test
only
:
-
develop
tags
:
-
test
README.md
0 → 100755
View file @
f6f2a2c3
# readme
build.gradle
0 → 100755
View file @
f6f2a2c3
buildscript
{
ext
[
"duibaExtVersion"
]
=
"1.3.31"
ext
[
"springBootVersion"
]
=
"1.5.22.RELEASE"
ext
[
"springCloudVersion"
]
=
"Edgware.SR6"
ext
[
"hazelcast.version"
]
=
"3.11"
repositories
{
mavenLocal
()
maven
{
url
"http://nexus.dui88.com:8081/nexus/content/groups/public/"
}
maven
{
url
"https://plugins.gradle.org/m2/"
}
//sonarqube
maven
{
url
"http://repo.spring.io/plugins-release"
}
mavenCentral
()
}
dependencies
{
classpath
"io.spring.gradle:dependency-management-plugin:1.0.5.RELEASE"
classpath
"org.sonarsource.scanner.gradle:sonarqube-gradle-plugin:2.5"
//sonarqube
classpath
"org.springframework.boot:spring-boot-gradle-plugin:${springBootVersion}"
classpath
"org.springframework.build.gradle:propdeps-plugin:0.0.7"
classpath
"cn.com.duiba:duiba-gradle-plugin:0.1.9"
}
}
apply
plugin:
"org.sonarqube"
sonarqube
{
properties
{
property
"sonar.projectName"
,
"spider-center"
property
"sonar.projectKey"
,
"cn.com.duiba:spider-center"
property
"sonar.sourceEncoding"
,
"utf-8"
property
"sonar.host.url"
,
"http://sonar.dui88.com"
}
}
allprojects
{
apply
plugin:
"duiba.gradle.plugin"
apply
plugin:
"maven"
apply
plugin:
"java"
apply
plugin:
"idea"
apply
plugin:
"eclipse"
apply
plugin:
"jacoco"
apply
plugin:
"io.spring.dependency-management"
apply
plugin:
"propdeps"
test
{
ignoreFailures
=
true
}
group
=
"cn.com.duiba"
version
=
"0.0.1-SNAPSHOT"
}
subprojects
{
sourceCompatibility
=
1.8
targetCompatibility
=
1.8
configurations
{
all
*.
exclude
group:
"log4j"
,
module:
"log4j"
all
*.
exclude
group:
"org.slf4j"
,
module:
"slf4j-log4j12"
all
*.
exclude
group:
"javax.servlet"
,
module:
"servlet-api"
//servlet 2.5
all
*.
exclude
group:
"com.alibaba"
,
module:
"dubbo"
}
dependencyManagement
{
dependencies
{
imports
{
mavenBom
"cn.com.duiba.boot:spring-boot-ext-dependencies:${duibaExtVersion}"
mavenBom
"org.springframework.boot:spring-boot-dependencies:${springBootVersion}"
mavenBom
"org.springframework.cloud:spring-cloud-dependencies:${springCloudVersion}"
}
dependency
'io.elasticjob:elastic-job-spring-boot-starter:1.0.0'
dependency
group:
'org.jsoup'
,
name:
'jsoup'
,
version:
'1.13.1'
dependency
'org.projectlombok:lombok:1.16.16'
dependency
group:
'org.seleniumhq.selenium'
,
name:
'selenium-java'
,
version:
'3.141.59'
}
}
repositories
{
maven
{
url
"http://nexus.dui88.com:8081/nexus/content/groups/public/"
}
mavenCentral
()
mavenLocal
()
}
uploadArchives
{
repositories
{
mavenDeployer
{
snapshotRepository
(
url:
"http://nexus.dui88.com:8081/nexus/content/repositories/snapshots/"
)
{
authentication
(
userName:
"admin"
,
password:
"admin123"
)
}
repository
(
url:
"http://nexus.dui88.com:8081/nexus/content/repositories/releases/"
)
{
authentication
(
userName:
"admin"
,
password:
"admin123"
)
}
pom
.
project
{
name
project
.
name
packaging
"jar"
description
project
.
name
url
"www.duiba.com.cn"
scm
{
url
""
connection
""
developerConnection
""
}
licenses
{
license
{
name
"No License"
url
"http://www.duiba.com.cn"
distribution
"repo"
}
}
developers
{
developer
{
id
"xuhengfei"
name
"Hengfei Xu"
}
}
}
}
}
}
task
sourcesJar
(
type:
Jar
,
dependsOn:
classes
)
{
classifier
=
"sources"
from
sourceSets
.
main
.
allSource
}
artifacts
{
archives
sourcesJar
}
}
settings.gradle
0 → 100755
View file @
f6f2a2c3
rootProject
.
name
=
"spider-center"
include
":spider-center-biz"
include
":spider-center-api"
spider-center-api/build.gradle
0 → 100755
View file @
f6f2a2c3
description
=
"spider-center-api"
dependencies
{
provided
"cn.com.duiba:wolf"
provided
"cn.com.duiba.boot:spring-boot-ext-api"
provided
"org.springframework:spring-web"
}
spider-center-api/src/main/java/cn/com/duiba/spider/center/api/remoteservice/package-info.java
0 → 100755
View file @
f6f2a2c3
@AdvancedFeignClient
(
"spider-center"
)
package
cn
.
com
.
duiba
.
spider
.
center
.
api
.
remoteservice
;
import
cn.com.duiba.boot.netflix.feign.AdvancedFeignClient
;
spider-center-biz/build.gradle
0 → 100755
View file @
f6f2a2c3
apply
plugin:
"org.springframework.boot"
jar
{
baseName
=
"spider-center"
version
=
""
}
description
=
"spider-center-biz"
dependencies
{
compile
project
(
":spider-center-api"
)
compile
"cn.com.duiba.boot:spring-boot-starter-perftest"
compile
"cn.com.duiba.boot:spring-boot-starter-mybatis"
compile
"cn.com.duiba.boot:spring-boot-starter-cat"
compile
"org.springframework.boot:spring-boot-starter-web"
compile
"org.springframework.boot:spring-boot-starter-actuator"
compile
"org.springframework.boot:spring-boot-starter-jdbc"
compile
"org.springframework.cloud:spring-cloud-starter-config"
compile
"org.springframework.cloud:spring-cloud-starter-netflix-eureka-client"
compile
"org.springframework.cloud:spring-cloud-starter-netflix-hystrix"
compile
"org.springframework.cloud:spring-cloud-starter-netflix-ribbon"
compile
"org.springframework.cloud:spring-cloud-starter-openfeign"
compile
"org.apache.commons:commons-dbcp2"
compile
"mysql:mysql-connector-java"
compile
'io.elasticjob:elastic-job-spring-boot-starter'
compile
group:
'org.jsoup'
,
name:
'jsoup'
compile
'org.projectlombok:lombok'
compile
group:
'org.seleniumhq.selenium'
,
name:
'selenium-java'
testCompile
"org.springframework.boot:spring-boot-starter-test"
}
install
{
enabled
=
false
}
uploadArchives
{
enabled
=
false
}
spider-center-biz/src/main/java/cn/com/duiba/spider/center/Application.java
0 → 100755
View file @
f6f2a2c3
package
cn
.
com
.
duiba
.
spider
.
center
;
import
org.springframework.boot.SpringApplication
;
import
org.springframework.boot.autoconfigure.SpringBootApplication
;
import
org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration
;
import
org.springframework.cloud.client.circuitbreaker.EnableCircuitBreaker
;
import
org.springframework.cloud.client.discovery.EnableDiscoveryClient
;
import
org.springframework.cloud.netflix.feign.EnableDuibaFeignClients
;
@EnableDiscoveryClient
@EnableCircuitBreaker
@EnableDuibaFeignClients
(
basePackages
=
{
"cn.com.duiba"
})
@SpringBootApplication
(
exclude
=
DataSourceAutoConfiguration
.
class
)
public
class
Application
{
public
static
void
main
(
String
[]
args
)
{
SpringApplication
sa
=
new
SpringApplication
(
Application
.
class
);
sa
.
run
(
args
);
}
}
spider-center-biz/src/main/java/cn/com/duiba/spider/center/job/XiguaJob.java
0 → 100644
View file @
f6f2a2c3
package
cn
.
com
.
duiba
.
spider
.
center
.
job
;
import
com.alibaba.fastjson.JSON
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.google.common.collect.Maps
;
import
io.elasticjob.autoconfigure.annotation.ElasticJob
;
import
io.elasticjob.lite.api.ShardingContext
;
import
io.elasticjob.lite.api.simple.SimpleJob
;
import
lombok.extern.slf4j.Slf4j
;
import
org.apache.commons.collections.CollectionUtils
;
import
org.apache.commons.lang.StringUtils
;
import
org.jsoup.Connection
;
import
org.jsoup.Jsoup
;
import
java.net.URLEncoder
;
import
java.util.Map
;
/**
* @author ZhouFeng zhoufeng@duiba.com.cn
* @version $Id: XiguaJob.java , v 0.1 2020-03-10 10:53 上午 ZhouFeng Exp $
*/
@ElasticJob
(
name
=
"XiguaJob"
,
cron
=
"0/10 * * * * ?"
,
description
=
"简单任务"
)
@Slf4j
public
class
XiguaJob
implements
SimpleJob
{
/**
* 西瓜视频分类
*/
private
static
final
Map
<
String
,
String
>
XIGUA_VIDEO_TYPES
=
Maps
.
newHashMap
();
/**
* 西瓜视频url
*/
private
static
final
String
XIGUA_VIDEO_URL
=
"https://www.ixigua.com/channel/%s/"
;
private
static
final
String
INDEX_VERIFY
=
"https://pc-basic.ixigua.com/api/verify?key=%s&psm=toutiao.fe"
+
".xigua_video_web_pc&_signature=%s"
;
/**
* 首页重定向referer
*/
private
static
final
String
INDEX_REFERER
=
"https://pc-basic.ixigua.com/challenge?key=%s&psm=toutiao.fe"
+
".xigua_video_web_pc&from=%s"
;
/**
* 西瓜视频host
*/
private
static
final
String
XIGUA_VIDEO_HOST
=
"https://www.ixigua.com/i"
;
protected
static
final
Map
<
String
,
String
>
HEADERS
=
Maps
.
newHashMap
();
static
{
//西瓜视频分类
//影视
// XIGUA_VIDEO_TYPES.put("61887739373", "yingshi");
// //综艺
// XIGUA_VIDEO_TYPES.put("61887739345", "zongyi");
// //农人
// XIGUA_VIDEO_TYPES.put("61887739390", "nongren");
// //美食
// XIGUA_VIDEO_TYPES.put("6141508391", "meishi");
// //音乐
// XIGUA_VIDEO_TYPES.put("61887739368", "yinyue");
// //宠物
// XIGUA_VIDEO_TYPES.put("6141508406", "chongwu");
// //搞笑
// XIGUA_VIDEO_TYPES.put("61887739369", "gaoxiao");
// //时尚
// XIGUA_VIDEO_TYPES.put("6141508390", "shishang");
// //懂车帝
// XIGUA_VIDEO_TYPES.put("7005980951", "dongchedi");
// //体育
// XIGUA_VIDEO_TYPES.put("5798809184", "tiyu");
// //娱乐
// XIGUA_VIDEO_TYPES.put("61887739374", "yule");
// //文化
// XIGUA_VIDEO_TYPES.put("6141508399", "wenhua");
// //手工
// XIGUA_VIDEO_TYPES.put("94349531488", "shougong");
//科技
XIGUA_VIDEO_TYPES
.
put
(
"6141508396"
,
"keji"
);
// //广场舞
// XIGUA_VIDEO_TYPES.put("61887739388", "guangchangwu");
// //亲子
// XIGUA_VIDEO_TYPES.put("61887739344", "qinzi");
// //nba
// XIGUA_VIDEO_TYPES.put("94349530916", "nba");
// //vlog
// XIGUA_VIDEO_TYPES.put("94349533351", "vlog");
HEADERS
.
put
(
"User-Agent"
,
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) "
+
"Chrome/74.0.3729.169 Safari/537.36"
);
}
@Override
public
void
execute
(
ShardingContext
shardingContext
)
{
XIGUA_VIDEO_TYPES
.
forEach
((
type
,
v
)
->
{
for
(
int
i
=
0
;
i
<
1
;
i
++)
{
try
{
String
url
=
String
.
format
(
XIGUA_VIDEO_URL
,
v
);
Map
<
String
,
String
>
cookies
=
Jsoup
.
connect
(
url
).
headers
(
HEADERS
)
.
ignoreHttpErrors
(
true
).
ignoreContentType
(
true
)
.
method
(
Connection
.
Method
.
GET
).
execute
().
cookies
();
String
xiguavideopcwebid
=
cookies
.
get
(
"xiguavideopcwebid"
);
String
xiguavideopcwebid_sig
=
cookies
.
get
(
"xiguavideopcwebid.sig"
);
//验证cookies
Jsoup
.
connect
(
String
.
format
(
INDEX_VERIFY
,
xiguavideopcwebid
,
xiguavideopcwebid_sig
)).
headers
(
HEADERS
)
.
ignoreHttpErrors
(
true
).
ignoreContentType
(
true
)
.
method
(
Connection
.
Method
.
GET
).
execute
().
cookies
();
String
body
=
Jsoup
.
connect
(
url
).
headers
(
HEADERS
)
.
header
(
"referer"
,
String
.
format
(
INDEX_REFERER
,
xiguavideopcwebid
,
URLEncoder
.
encode
(
url
,
"UTF-8"
)))
.
cookies
(
cookies
)
.
ignoreHttpErrors
(
true
).
ignoreContentType
(
true
)
.
method
(
Connection
.
Method
.
GET
).
execute
().
body
();
String
dataStr
=
StringUtils
.
splitByWholeSeparator
(
StringUtils
.
splitByWholeSeparator
(
body
,
"<script type=\"application/json\" id=\"SSR_HYDRATED_DATA\">"
)[
1
],
"</script>"
)[
0
];
JSONObject
data
=
JSON
.
parseObject
(
dataStr
);
JSONArray
channelFeedList
=
data
.
getJSONArray
(
"ChannelFeedV2"
);
if
(
CollectionUtils
.
isEmpty
(
channelFeedList
))
{
continue
;
}
JSONObject
feedData
=
channelFeedList
.
getJSONObject
(
0
);
if
(
feedData
==
null
||
feedData
.
isEmpty
())
{
continue
;
}
JSONArray
cards
=
feedData
.
getJSONArray
(
"channelFeedData"
);
for
(
int
j
=
0
;
j
<
cards
.
size
();
j
++)
{
if
(
j
>=
25
)
{
break
;
}
JSONObject
card
=
cards
.
getJSONObject
(
j
);
String
sid
=
card
.
getString
(
"videoId"
);
String
surl
=
XIGUA_VIDEO_HOST
+
sid
;
String
videoTitle
=
card
.
getString
(
"videoTitle"
);
System
.
out
.
println
(
"视频标题:["
+
videoTitle
+
"],视频URL:"
+
surl
);
}
}
catch
(
Exception
e
)
{
throw
new
RuntimeException
(
e
);
}
}
});
}
}
spider-center-biz/src/main/java/cn/com/duiba/spider/center/job/XiguaJob2.java
0 → 100644
View file @
f6f2a2c3
package
cn
.
com
.
duiba
.
spider
.
center
.
job
;
import
com.google.common.collect.Maps
;
import
io.elasticjob.autoconfigure.annotation.ElasticJob
;
import
io.elasticjob.lite.api.ShardingContext
;
import
io.elasticjob.lite.api.simple.SimpleJob
;
import
org.openqa.selenium.By
;
import
org.openqa.selenium.WebElement
;
import
org.openqa.selenium.chrome.ChromeDriver
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.concurrent.TimeUnit
;
/**
* @author ZhouFeng zhoufeng@duiba.com.cn
* @version $Id: XiguaJob.java , v 0.1 2020-03-10 10:53 上午 ZhouFeng Exp $
*/
@ElasticJob
(
name
=
"XiguaJob"
,
cron
=
"0/10 * * * * ?"
,
description
=
"简单任务"
)
public
class
XiguaJob2
implements
SimpleJob
{
/**
* 西瓜视频分类
*/
private
static
final
Map
<
String
,
String
>
XIGUA_VIDEO_TYPES
=
Maps
.
newHashMap
();
/**
* 西瓜视频url
*/
private
static
final
String
XIGUA_VIDEO_URL
=
"https://www.ixigua.com/channel/%s/"
;
private
static
final
String
INDEX_VERIFY
=
"https://pc-basic.ixigua.com/api/verify?key=%s&psm=toutiao.fe"
+
".xigua_video_web_pc&_signature=%s"
;
/**
* 首页重定向referer
*/
private
static
final
String
INDEX_REFERER
=
"https://pc-basic.ixigua.com/challenge?key=%s&psm=toutiao.fe"
+
".xigua_video_web_pc&from=%s"
;
/**
* 西瓜视频host
*/
private
static
final
String
XIGUA_VIDEO_HOST
=
"https://www.ixigua.com/i"
;
protected
static
final
Map
<
String
,
String
>
HEADERS
=
Maps
.
newHashMap
();
static
{
//西瓜视频分类
//影视
// XIGUA_VIDEO_TYPES.put("61887739373", "yingshi");
// //综艺
// XIGUA_VIDEO_TYPES.put("61887739345", "zongyi");
// //农人
// XIGUA_VIDEO_TYPES.put("61887739390", "nongren");
// //美食
// XIGUA_VIDEO_TYPES.put("6141508391", "meishi");
// //音乐
// XIGUA_VIDEO_TYPES.put("61887739368", "yinyue");
// //宠物
// XIGUA_VIDEO_TYPES.put("6141508406", "chongwu");
// //搞笑
// XIGUA_VIDEO_TYPES.put("61887739369", "gaoxiao");
// //时尚
// XIGUA_VIDEO_TYPES.put("6141508390", "shishang");
// //懂车帝
// XIGUA_VIDEO_TYPES.put("7005980951", "dongchedi");
// //体育
// XIGUA_VIDEO_TYPES.put("5798809184", "tiyu");
// //娱乐
// XIGUA_VIDEO_TYPES.put("61887739374", "yule");
// //文化
// XIGUA_VIDEO_TYPES.put("6141508399", "wenhua");
// //手工
// XIGUA_VIDEO_TYPES.put("94349531488", "shougong");
//科技
XIGUA_VIDEO_TYPES
.
put
(
"6141508396"
,
"keji"
);
// //广场舞
// XIGUA_VIDEO_TYPES.put("61887739388", "guangchangwu");
// //亲子
// XIGUA_VIDEO_TYPES.put("61887739344", "qinzi");
// //nba
// XIGUA_VIDEO_TYPES.put("94349530916", "nba");
// //vlog
// XIGUA_VIDEO_TYPES.put("94349533351", "vlog");
HEADERS
.
put
(
"User-Agent"
,
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) "
+
"Chrome/74.0.3729.169 Safari/537.36"
);
}
public
static
void
main
(
String
[]
args
)
{
XiguaJob2
job
=
new
XiguaJob2
();
job
.
execute
(
null
);
}
@Override
public
void
execute
(
ShardingContext
shardingContext
)
{
XIGUA_VIDEO_TYPES
.
forEach
((
type
,
v
)
->
{
for
(
int
i
=
0
;
i
<
1
;
i
++)
{
ChromeDriver
driver
=
new
ChromeDriver
();
ChromeDriver
videoDriver
=
new
ChromeDriver
();
try
{
String
url
=
String
.
format
(
XIGUA_VIDEO_URL
,
v
);
driver
.
get
(
url
);
driver
.
manage
().
timeouts
().
implicitlyWait
(
5
,
TimeUnit
.
SECONDS
);
List
<
WebElement
>
items
=
driver
.
findElementsByClassName
(
"FeedContainer__itemWrapper"
);
for
(
WebElement
item
:
items
)
{
WebElement
element
=
item
.
findElement
(
By
.
className
(
"HorizontalFeedCard__title"
));
if
(
element
!=
null
)
{
String
subUrl
=
element
.
getAttribute
(
"href"
);
String
title
=
element
.
getAttribute
(
"title"
);
videoDriver
.
get
(
subUrl
);
driver
.
manage
().
timeouts
().
implicitlyWait
(
5
,
TimeUnit
.
SECONDS
);
WebElement
video
=
videoDriver
.
findElementByClassName
(
"playerContainer"
).
findElement
(
By
.
tagName
(
"video"
));
String
videoUrl
=
video
.
getAttribute
(
"src"
);
System
.
out
.
println
(
"标题:"
+
title
+
" URL:"
+
videoUrl
);
TimeUnit
.
SECONDS
.
sleep
(
1
);
}
}
}
catch
(
Exception
e
)
{
throw
new
RuntimeException
(
e
);
}
finally
{
driver
.
close
();
videoDriver
.
close
();
}
}
});
}
}
spider-center-biz/src/main/java/cn/com/duiba/spider/center/job/XiguaJob3.java
0 → 100644
View file @
f6f2a2c3
package
cn
.
com
.
duiba
.
spider
.
center
.
job
;
import
io.elasticjob.lite.api.ShardingContext
;
import
io.elasticjob.lite.api.simple.SimpleJob
;
import
org.apache.commons.lang3.RandomUtils
;
import
org.openqa.selenium.By
;
import
org.openqa.selenium.WebElement
;
import
org.openqa.selenium.chrome.ChromeDriver
;
import
org.openqa.selenium.chrome.ChromeDriverService
;
import
org.openqa.selenium.chrome.ChromeOptions
;
import
org.openqa.selenium.interactions.HasTouchScreen
;
import
org.openqa.selenium.interactions.TouchScreen
;
import
org.openqa.selenium.interactions.touch.ScrollAction
;
import
org.openqa.selenium.remote.RemoteExecuteMethod
;
import
org.openqa.selenium.remote.RemoteTouchScreen
;
import
org.openqa.selenium.remote.RemoteWebDriver
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
java.util.Collections
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.concurrent.TimeUnit
;
/**
* @author ZhouFeng zhoufeng@duiba.com.cn
* @version $Id: XiguaJob.java , v 0.1 2020-03-10 10:53 上午 ZhouFeng Exp $
*/
//@ElasticJob(name = "XiguaJob", cron = "0/10 * * * * ?", description = "简单任务")
public
class
XiguaJob3
implements
SimpleJob
{
/**
* logger
*/
private
static
final
Logger
LOGGER
=
LoggerFactory
.
getLogger
(
XiguaJob3
.
class
);
public
static
void
main
(
String
[]
args
)
{
XiguaJob3
job
=
new
XiguaJob3
();
job
.
execute
(
null
);
}
@Override
public
void
execute
(
ShardingContext
shardingContext
)
{
ChromeOptions
options
=
new
ChromeOptions
();
Map
<
String
,
String
>
mobileEmulation
=
new
HashMap
<>();
mobileEmulation
.
put
(
"deviceName"
,
"iPhone X"
);
options
.
setExperimentalOption
(
"mobileEmulation"
,
mobileEmulation
);
options
.
addArguments
(
"--headless"
);
int
count
=
10
;
RemoteWebDriver
driver
=
new
ChromeDriverWithTouchScreen
(
options
);
RemoteWebDriver
videoDriver
=
new
ChromeDriverWithTouchScreen
(
options
);
try
{
String
url
=
"https://m.ixigua.com/"
;
driver
.
get
(
url
);
List
<
WebElement
>
items
=
Collections
.
emptyList
();
while
(
items
.
size
()
<
count
)
{
driver
.
manage
().
timeouts
().
implicitlyWait
(
5
,
TimeUnit
.
SECONDS
);
items
=
driver
.
findElementsByClassName
(
"has_action"
);
pullUp
(
driver
,
RandomUtils
.
nextInt
(
800
,
1500
));
TimeUnit
.
MILLISECONDS
.
sleep
(
RandomUtils
.
nextInt
(
200
,
600
));
// LOGGER.info("滚动后,当前有" + items.size());
}
for
(
WebElement
item
:
items
)
{
String
id
=
item
.
getAttribute
(
"data-group-id"
);
String
subUrl
=
"https://m.ixigua.com/i"
+
id
;
WebElement
h3
=
item
.
findElement
(
By
.
tagName
(
"h3"
));
String
title
=
h3
.
getText
();
videoDriver
.
get
(
subUrl
);
videoDriver
.
manage
().
timeouts
().
implicitlyWait
(
5
,
TimeUnit
.
SECONDS
);
List
<
WebElement
>
elements
=
videoDriver
.
findElementsByTagName
(
"source"
);
if
(!
elements
.
isEmpty
())
{
String
videoUrl
=
elements
.
get
(
0
).
getAttribute
(
"src"
);
System
.
out
.
println
(
"标题:"
+
title
+
" URL:"
+
videoUrl
);
TimeUnit
.
MILLISECONDS
.
sleep
(
RandomUtils
.
nextInt
(
300
,
800
));
}
}
}
catch
(
Exception
e
)
{
LOGGER
.
error
(
"爬取西瓜视频失败"
,
e
);
}
finally
{
driver
.
close
();
videoDriver
.
close
();
}
}
private
void
pullUp
(
RemoteWebDriver
driver
,
int
offset
)
{
TouchScreen
touchScreen
=
((
HasTouchScreen
)
driver
).
getTouch
();
new
ScrollAction
(
touchScreen
,
0
,
offset
).
perform
();
}
class
ChromeDriverWithTouchScreen
extends
ChromeDriver
implements
HasTouchScreen
{
public
ChromeDriverWithTouchScreen
(
ChromeOptions
options
)
{
super
(
ChromeDriverService
.
createDefaultService
(),
options
);
}
@Override
public
TouchScreen
getTouch
()
{
return
new
RemoteTouchScreen
(
new
RemoteExecuteMethod
(
this
));
}
}
}
spider-center-biz/src/main/resources/application.properties
0 → 100755
View file @
f6f2a2c3
# tomcat
server.port
=
1111
server.tomcat.access-log-enabled
=
false
server.tomcat.uri-encoding
=
UTF-8
# logging
logging.path
=
${user.home}/logs/spider-center
# database
duiba.datasource.demo.url
=
jdbc:mysql://dev.config.duibar.com:3306/demo?createDatabaseIfNotExist=true&useUnicode=true&characterEncoding=UTF-8&zeroDateTimeBehavior=convertToNull&allowMultiQueries=true&connectTimeout=5000&socketTimeout=60000
duiba.datasource.demo.username
=
dev
duiba.datasource.demo.password
=
dev_fas015
# feign的序列化方式默认使用hessian2
duiba.feign.serialization
=
hessian2
#ej任务
elastic.job.zk.serverLists
=
127.0.0.1:2181
spider-center-biz/src/main/resources/bootstrap.properties
0 → 100755
View file @
f6f2a2c3
spring.application.name
=
spider-center
spring.cloud.config.uri
=
http://configserver.dui88.com
spring.cloud.config.fail-fast
=
true
spring.profiles.active
=
dev
spring.devtools.restart.enabled
=
true
spider-center-biz/src/main/resources/logback.xml
0 → 100755
View file @
f6f2a2c3
<?xml version="1.0" encoding="UTF-8"?>
<configuration>
<include
resource=
"org/springframework/boot/logging/logback/defaults.xml"
/>
<include
resource=
"logback/default.xml"
/>
</configuration>
\ No newline at end of file
spider-center-biz/src/main/resources/logback/default.xml
0 → 100755
View file @
f6f2a2c3
<?xml version="1.0" encoding="UTF-8"?>
<included>
<property
name=
"logpath"
value=
"${LOG_PATH:-${LOG_TEMP:-${java.io.tmpdir:-/tmp}}/}"
/>
<property
name=
"logPattern"
value=
"%d{HH:mm:ss.SSS} %-5level [%thread] %logger{32}[%file:%line] -> %msg%n"
/>
<appender
name=
"CONSOLE"
class=
"ch.qos.logback.core.ConsoleAppender"
>
<encoder>
<pattern>
${logPattern}
</pattern>
<charset
class=
"java.nio.charset.Charset"
>
utf8
</charset>
</encoder>
</appender>
<appender
name=
"FILE"
class=
"ch.qos.logback.core.rolling.RollingFileAppender"
>
<File>
${logpath}/application.log
</File>
<rollingPolicy
class=
"ch.qos.logback.core.rolling.TimeBasedRollingPolicy"
>
<FileNamePattern>
${logpath}/application_%d{yyyy-MM-dd}.log
</FileNamePattern>
<maxHistory>
10
</maxHistory>
</rollingPolicy>
<encoder>
<pattern>
${logPattern}
</pattern>
</encoder>
</appender>
<!-- 需要记录日志的包 -->
<logger
name=
"cn.com.duiba"
level=
"INFO"
/>
<logger
name=
"cn.com.duiba"
level=
"INFO"
/>
<root
level=
"WARN"
>
<appender-ref
ref=
"CONSOLE"
/>
<appender-ref
ref=
"FILE"
/>
</root>
</included>
spider-center-biz/src/test/java/cn/com/duiba/spider/center/BaseTest.java
0 → 100755
View file @
f6f2a2c3
package
cn
.
com
.
duiba
.
spider
.
center
;
import
org.junit.runner.RunWith
;
import
org.springframework.boot.test.autoconfigure.web.servlet.AutoConfigureMockMvc
;
import
org.springframework.boot.test.context.SpringBootTest
;
import
org.springframework.test.context.junit4.SpringJUnit4ClassRunner
;
@RunWith
(
SpringJUnit4ClassRunner
.
class
)
@SpringBootTest
(
webEnvironment
=
SpringBootTest
.
WebEnvironment
.
RANDOM_PORT
,
classes
=
Application
.
class
)
@AutoConfigureMockMvc
// @Transactional("demo")
public
abstract
class
BaseTest
{
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment