Commit 3c3f0011 authored by yihua.huang's avatar yihua.huang

fix charset bug

parent d1fc1cf3
...@@ -31,8 +31,12 @@ public class HttpClientDownloader implements Downloader { ...@@ -31,8 +31,12 @@ public class HttpClientDownloader implements Downloader {
HttpResponse httpResponse = httpClient.execute(httpGet); HttpResponse httpResponse = httpClient.execute(httpGet);
int statusCode = httpResponse.getStatusLine().getStatusCode(); int statusCode = httpResponse.getStatusLine().getStatusCode();
if (site.getAcceptStatCode().contains(statusCode)) { if (site.getAcceptStatCode().contains(statusCode)) {
if (site.getEncoding() == null){
String value = httpResponse.getEntity().getContentType().getValue();
site.setEncoding(new PlainText(value).r("charset=([^\\s]+)").toString());
}
String content = IOUtils.toString(httpResponse.getEntity().getContent(), String content = IOUtils.toString(httpResponse.getEntity().getContent(),
site.getEncoding() == null ? httpResponse.getEntity().getContentType().getValue() : site.getEncoding()); site.getEncoding());
Page page = new Page(); Page page = new Page();
page.setHtml(new Html(UrlUtils.fixAllRelativeHrefs(content, request.getUrl()))); page.setHtml(new Html(UrlUtils.fixAllRelativeHrefs(content, request.getUrl())));
page.setUrl(new PlainText(request.getUrl())); page.setUrl(new PlainText(request.getUrl()));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment