Commit ec77eca0 authored by shenjunlin's avatar shenjunlin

优化代码

parent 6448c69f
......@@ -6,7 +6,7 @@
<version>7</version>
</parent>
<groupId>us.codecraft.duiba</groupId>
<version>0.7.8-SNAPSHOT</version>
<version>0.7.8-sjl-SNAPSHOT</version>
<modelVersion>4.0.0</modelVersion>
<packaging>pom</packaging>
<properties>
......
......@@ -3,7 +3,7 @@
<parent>
<groupId>us.codecraft.duiba</groupId>
<artifactId>webmagic-parent</artifactId>
<version>0.7.8-SNAPSHOT</version>
<version>0.7.8-sjl-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
......
......@@ -266,4 +266,7 @@ public class Page {
", bytes=" + Arrays.toString(bytes) +
'}';
}
public void addObjects() {
}
}
package us.codecraft.webmagic;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.*;
/**
* Object contains extract results.<br>
......@@ -17,11 +15,14 @@ public class ResultItems {
private Map<String, Object> fields = new LinkedHashMap<String, Object>();
private List<Object> Objects = new ArrayList<>();
private Request request;
private boolean skip;
public <T> T get(String key) {
Object o = fields.get(key);
if (o == null) {
return null;
......
......@@ -8,10 +8,10 @@
</layout>
</appender>
<logger name="org.apache" additivity="false">
<level value="error" />
<appender-ref ref="stdout" />
</logger>
<!--<logger name="org.apache" additivity="false">-->
<!--<level value="error" />-->
<!--<appender-ref ref="stdout" />-->
<!--</logger>-->
<root>
<level value="info" />
......
......@@ -3,7 +3,7 @@
<parent>
<groupId>us.codecraft.duiba</groupId>
<artifactId>webmagic-parent</artifactId>
<version>0.7.8-SNAPSHOT</version>
<version>0.7.8-sjl-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
......@@ -56,6 +56,21 @@
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.6</version>
</dependency>
<dependency>
<groupId>commons-dbutils</groupId>
<artifactId>commons-dbutils</artifactId>
<version>1.7</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>druid</artifactId>
<version>1.1.6</version>
</dependency>
</dependencies>
</project>
\ No newline at end of file
package us.codecraft.webmagic.utils;
import com.alibaba.druid.pool.DruidDataSource;
import org.apache.commons.dbutils.QueryRunner;
import java.sql.SQLException;
public class MySQLTool {
private DruidDataSource dataSource;
private QueryRunner queryRunner;
private String insertSql;
public MySQLTool setInsertSql(String insertSql){
this.insertSql = insertSql;
return this;
}
public MySQLTool(String url, String userName, String password){
dataSource = new DruidDataSource();
dataSource.setDriverClassName("com.mysql.jdbc.Driver");
dataSource.setUrl(url);
dataSource.setUsername(userName);
dataSource.setPassword(password);
queryRunner = new QueryRunner(dataSource);
}
public void insert(Object...params) throws SQLException {
queryRunner.update(insertSql, params);
}
}
......@@ -8,10 +8,10 @@
</layout>
</appender>
<logger name="org.apache" additivity="false">
<level value="error" />
<appender-ref ref="stdout" />
</logger>
<!--<logger name="org.apache" additivity="false">-->
<!--<level value="error" />-->
<!--<appender-ref ref="stdout" />-->
<!--</logger>-->
<root>
<level value="info" />
......
......@@ -3,7 +3,7 @@
<parent>
<groupId>us.codecraft.duiba</groupId>
<artifactId>webmagic-parent</artifactId>
<version>0.7.8-SNAPSHOT</version>
<version>0.7.8-sjl-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
......
......@@ -8,6 +8,7 @@ import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.downloader.HttpClientDownloader;
import us.codecraft.webmagic.model.samples.AnjuKeVO;
import us.codecraft.webmagic.pipeline.ConsolePipeline;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.scheduler.FileCacheQueueScheduler;
import us.codecraft.webmagic.scheduler.component.HashSetDuplicateRemover;
......@@ -53,14 +54,20 @@ public class AnjukeSpider implements PageProcessor {
String area = StringUtils.substringBetween(areaAddress, "[","]").trim();
String address = StringUtils.substringAfter(areaAddress, "]").trim();
String price = selectable.$(".li-side > p > strong","text").get().trim();
AnjuKeVO anjuKeVO = new AnjuKeVO();
anjuKeVO.setAddress(address);
anjuKeVO.setArea(area);
anjuKeVO.setCity(city);
anjuKeVO.setPrice(price);
anjuKeVO.setCommunityName(communityName);
anjuKeVO.setUrl(page.getRequest().getUrl());
DBUtils.add(anjuKeVO);
page.putField("communityName", communityName);
page.putField("areaAddress", areaAddress);
page.putField("address", address);
page.putField("area", area);
page.putField("price", price);
page.addObjects();
// AnjuKeVO anjuKeVO = new AnjuKeVO();
// anjuKeVO.setAddress(address);
// anjuKeVO.setArea(area);
// anjuKeVO.setCity(city);
// anjuKeVO.setPrice(price);
// anjuKeVO.setCommunityName(communityName);
// anjuKeVO.setUrl(page.getRequest().getUrl());
// DBUtils.add(anjuKeVO);
}
page.addTargetRequests(page.getHtml().$(".page-content").links().all());
}
......@@ -78,7 +85,7 @@ public class AnjukeSpider implements PageProcessor {
.setScheduler(new FileCacheQueueScheduler("spider").setDuplicateRemover(new HashSetDuplicateRemover()))
.addUrl("https://www.anjuke.com/sy-city.html")
// .addUrl("https://lvliang.anjuke.com/community/p1/")
.thread(2);
.thread(2).addPipeline(new ConsolePipeline());
anjuke.start();
}
......
......@@ -3,7 +3,7 @@
<parent>
<groupId>us.codecraft.duiba</groupId>
<artifactId>webmagic-parent</artifactId>
<version>0.7.8-SNAPSHOT</version>
<version>0.7.8-sjl-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
......
......@@ -3,7 +3,7 @@
<parent>
<groupId>us.codecraft.duiba</groupId>
<artifactId>webmagic-parent</artifactId>
<version>0.7.8-SNAPSHOT</version>
<version>0.7.8-sjl-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment