Commit ec77eca0 authored by shenjunlin's avatar shenjunlin

优化代码

parent 6448c69f
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
<version>7</version> <version>7</version>
</parent> </parent>
<groupId>us.codecraft.duiba</groupId> <groupId>us.codecraft.duiba</groupId>
<version>0.7.8-SNAPSHOT</version> <version>0.7.8-sjl-SNAPSHOT</version>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<packaging>pom</packaging> <packaging>pom</packaging>
<properties> <properties>
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
<parent> <parent>
<groupId>us.codecraft.duiba</groupId> <groupId>us.codecraft.duiba</groupId>
<artifactId>webmagic-parent</artifactId> <artifactId>webmagic-parent</artifactId>
<version>0.7.8-SNAPSHOT</version> <version>0.7.8-sjl-SNAPSHOT</version>
</parent> </parent>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
......
...@@ -266,4 +266,7 @@ public class Page { ...@@ -266,4 +266,7 @@ public class Page {
", bytes=" + Arrays.toString(bytes) + ", bytes=" + Arrays.toString(bytes) +
'}'; '}';
} }
public void addObjects() {
}
} }
package us.codecraft.webmagic; package us.codecraft.webmagic;
import java.util.HashMap; import java.util.*;
import java.util.LinkedHashMap;
import java.util.Map;
/** /**
* Object contains extract results.<br> * Object contains extract results.<br>
...@@ -17,11 +15,14 @@ public class ResultItems { ...@@ -17,11 +15,14 @@ public class ResultItems {
private Map<String, Object> fields = new LinkedHashMap<String, Object>(); private Map<String, Object> fields = new LinkedHashMap<String, Object>();
private List<Object> Objects = new ArrayList<>();
private Request request; private Request request;
private boolean skip; private boolean skip;
public <T> T get(String key) { public <T> T get(String key) {
Object o = fields.get(key); Object o = fields.get(key);
if (o == null) { if (o == null) {
return null; return null;
......
...@@ -8,10 +8,10 @@ ...@@ -8,10 +8,10 @@
</layout> </layout>
</appender> </appender>
<logger name="org.apache" additivity="false"> <!--<logger name="org.apache" additivity="false">-->
<level value="error" /> <!--<level value="error" />-->
<appender-ref ref="stdout" /> <!--<appender-ref ref="stdout" />-->
</logger> <!--</logger>-->
<root> <root>
<level value="info" /> <level value="info" />
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
<parent> <parent>
<groupId>us.codecraft.duiba</groupId> <groupId>us.codecraft.duiba</groupId>
<artifactId>webmagic-parent</artifactId> <artifactId>webmagic-parent</artifactId>
<version>0.7.8-SNAPSHOT</version> <version>0.7.8-sjl-SNAPSHOT</version>
</parent> </parent>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
...@@ -56,6 +56,21 @@ ...@@ -56,6 +56,21 @@
</exclusion> </exclusion>
</exclusions> </exclusions>
</dependency> </dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.6</version>
</dependency>
<dependency>
<groupId>commons-dbutils</groupId>
<artifactId>commons-dbutils</artifactId>
<version>1.7</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>druid</artifactId>
<version>1.1.6</version>
</dependency>
</dependencies> </dependencies>
</project> </project>
\ No newline at end of file
package us.codecraft.webmagic.utils;
import com.alibaba.druid.pool.DruidDataSource;
import org.apache.commons.dbutils.QueryRunner;
import java.sql.SQLException;
public class MySQLTool {
private DruidDataSource dataSource;
private QueryRunner queryRunner;
private String insertSql;
public MySQLTool setInsertSql(String insertSql){
this.insertSql = insertSql;
return this;
}
public MySQLTool(String url, String userName, String password){
dataSource = new DruidDataSource();
dataSource.setDriverClassName("com.mysql.jdbc.Driver");
dataSource.setUrl(url);
dataSource.setUsername(userName);
dataSource.setPassword(password);
queryRunner = new QueryRunner(dataSource);
}
public void insert(Object...params) throws SQLException {
queryRunner.update(insertSql, params);
}
}
...@@ -8,10 +8,10 @@ ...@@ -8,10 +8,10 @@
</layout> </layout>
</appender> </appender>
<logger name="org.apache" additivity="false"> <!--<logger name="org.apache" additivity="false">-->
<level value="error" /> <!--<level value="error" />-->
<appender-ref ref="stdout" /> <!--<appender-ref ref="stdout" />-->
</logger> <!--</logger>-->
<root> <root>
<level value="info" /> <level value="info" />
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
<parent> <parent>
<groupId>us.codecraft.duiba</groupId> <groupId>us.codecraft.duiba</groupId>
<artifactId>webmagic-parent</artifactId> <artifactId>webmagic-parent</artifactId>
<version>0.7.8-SNAPSHOT</version> <version>0.7.8-sjl-SNAPSHOT</version>
</parent> </parent>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
......
...@@ -8,6 +8,7 @@ import us.codecraft.webmagic.Site; ...@@ -8,6 +8,7 @@ import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider; import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.downloader.HttpClientDownloader; import us.codecraft.webmagic.downloader.HttpClientDownloader;
import us.codecraft.webmagic.model.samples.AnjuKeVO; import us.codecraft.webmagic.model.samples.AnjuKeVO;
import us.codecraft.webmagic.pipeline.ConsolePipeline;
import us.codecraft.webmagic.processor.PageProcessor; import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.scheduler.FileCacheQueueScheduler; import us.codecraft.webmagic.scheduler.FileCacheQueueScheduler;
import us.codecraft.webmagic.scheduler.component.HashSetDuplicateRemover; import us.codecraft.webmagic.scheduler.component.HashSetDuplicateRemover;
...@@ -53,14 +54,20 @@ public class AnjukeSpider implements PageProcessor { ...@@ -53,14 +54,20 @@ public class AnjukeSpider implements PageProcessor {
String area = StringUtils.substringBetween(areaAddress, "[","]").trim(); String area = StringUtils.substringBetween(areaAddress, "[","]").trim();
String address = StringUtils.substringAfter(areaAddress, "]").trim(); String address = StringUtils.substringAfter(areaAddress, "]").trim();
String price = selectable.$(".li-side > p > strong","text").get().trim(); String price = selectable.$(".li-side > p > strong","text").get().trim();
AnjuKeVO anjuKeVO = new AnjuKeVO(); page.putField("communityName", communityName);
anjuKeVO.setAddress(address); page.putField("areaAddress", areaAddress);
anjuKeVO.setArea(area); page.putField("address", address);
anjuKeVO.setCity(city); page.putField("area", area);
anjuKeVO.setPrice(price); page.putField("price", price);
anjuKeVO.setCommunityName(communityName); page.addObjects();
anjuKeVO.setUrl(page.getRequest().getUrl()); // AnjuKeVO anjuKeVO = new AnjuKeVO();
DBUtils.add(anjuKeVO); // anjuKeVO.setAddress(address);
// anjuKeVO.setArea(area);
// anjuKeVO.setCity(city);
// anjuKeVO.setPrice(price);
// anjuKeVO.setCommunityName(communityName);
// anjuKeVO.setUrl(page.getRequest().getUrl());
// DBUtils.add(anjuKeVO);
} }
page.addTargetRequests(page.getHtml().$(".page-content").links().all()); page.addTargetRequests(page.getHtml().$(".page-content").links().all());
} }
...@@ -78,7 +85,7 @@ public class AnjukeSpider implements PageProcessor { ...@@ -78,7 +85,7 @@ public class AnjukeSpider implements PageProcessor {
.setScheduler(new FileCacheQueueScheduler("spider").setDuplicateRemover(new HashSetDuplicateRemover())) .setScheduler(new FileCacheQueueScheduler("spider").setDuplicateRemover(new HashSetDuplicateRemover()))
.addUrl("https://www.anjuke.com/sy-city.html") .addUrl("https://www.anjuke.com/sy-city.html")
// .addUrl("https://lvliang.anjuke.com/community/p1/") // .addUrl("https://lvliang.anjuke.com/community/p1/")
.thread(2); .thread(2).addPipeline(new ConsolePipeline());
anjuke.start(); anjuke.start();
} }
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
<parent> <parent>
<groupId>us.codecraft.duiba</groupId> <groupId>us.codecraft.duiba</groupId>
<artifactId>webmagic-parent</artifactId> <artifactId>webmagic-parent</artifactId>
<version>0.7.8-SNAPSHOT</version> <version>0.7.8-sjl-SNAPSHOT</version>
</parent> </parent>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
<parent> <parent>
<groupId>us.codecraft.duiba</groupId> <groupId>us.codecraft.duiba</groupId>
<artifactId>webmagic-parent</artifactId> <artifactId>webmagic-parent</artifactId>
<version>0.7.8-SNAPSHOT</version> <version>0.7.8-sjl-SNAPSHOT</version>
</parent> </parent>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment