Commit 807aefe9 authored by yihua.huang's avatar yihua.huang

change EntityUtil to IOUtil because some encoding error

parent 00b0a751
package us.codecraft.webmagic.downloader;
import com.google.common.collect.Sets;
import org.apache.commons.io.IOUtils;
import org.apache.http.HttpResponse;
import org.apache.http.annotation.ThreadSafe;
import org.apache.http.client.config.CookieSpecs;
......@@ -8,7 +9,6 @@ import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.RequestBuilder;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.util.EntityUtils;
import org.apache.log4j.Logger;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Request;
......@@ -158,7 +158,7 @@ public class HttpClientDownloader implements Downloader {
}
protected Page handleResponse(Request request, String charset, HttpResponse httpResponse, Task task) throws IOException {
String content = EntityUtils.toString(httpResponse.getEntity(), charset);
String content = IOUtils.toString(httpResponse.getEntity().getContent(), charset);
Page page = new Page();
page.setHtml(new Html(UrlUtils.fixAllRelativeHrefs(content, request.getUrl())));
page.setUrl(new PlainText(request.getUrl()));
......
......@@ -16,7 +16,7 @@ import java.util.List;
public class BaiduBaikePageProcesser implements PageProcessor {
private Site site = Site.me()//.setHttpProxy(new HttpHost("127.0.0.1",8888))
.setCharset("utf-8").setRetryTimes(3).setSleepTime(1000).setUseGzip(true);
.setRetryTimes(3).setSleepTime(1000).setUseGzip(true);
@Override
public void process(Page page) {
......
......@@ -4,7 +4,6 @@ import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.model.AfterExtractor;
import us.codecraft.webmagic.model.OOSpider;
import us.codecraft.webmagic.model.annotation.ExtractBy;
import us.codecraft.webmagic.model.annotation.UrlTemplate;
import us.codecraft.webmagic.model.direct.Param;
import java.util.ArrayList;
......@@ -12,10 +11,8 @@ import java.util.List;
/**
* @since 0.4.0
* NO implement yet!!!!!!!!!!!!
* @author code4crafter@gmail.com
*/
@UrlTemplate("http://baike.baidu.com/search/word?word=${word}&enc=utf8")
public class BaiduBaike implements AfterExtractor{
private String word;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment