Commit 32f1f2cf authored by yihua.huang's avatar yihua.huang

#613 add charset to page

parent 65049bac
...@@ -113,7 +113,11 @@ public class HttpClientDownloader extends AbstractDownloader { ...@@ -113,7 +113,11 @@ public class HttpClientDownloader extends AbstractDownloader {
Page page = new Page(); Page page = new Page();
page.setBytes(bytes); page.setBytes(bytes);
if (!request.isBinaryContent()){ if (!request.isBinaryContent()){
page.setRawText(getResponseContent(charset, contentType, bytes)); if (charset == null) {
charset = getHtmlCharset(contentType, bytes);
}
page.setCharset(charset);
page.setRawText(new String(bytes, charset));
} }
page.setUrl(new PlainText(request.getUrl())); page.setUrl(new PlainText(request.getUrl()));
page.setRequest(request); page.setRequest(request);
...@@ -125,21 +129,12 @@ public class HttpClientDownloader extends AbstractDownloader { ...@@ -125,21 +129,12 @@ public class HttpClientDownloader extends AbstractDownloader {
return page; return page;
} }
private String getResponseContent(String charset, String contentType, byte[] bytes) throws IOException { private String getHtmlCharset(String contentType, byte[] contentBytes) throws IOException {
String charset = CharsetUtils.detectCharset(contentType, contentBytes);
if (charset == null) { if (charset == null) {
String htmlCharset = getHtmlCharset(contentType, bytes); charset = Charset.defaultCharset().name();
if (htmlCharset != null) { logger.warn("Charset autodetect failed, use {} as charset. Please specify charset in Site.setCharset()", Charset.defaultCharset());
return new String(bytes, htmlCharset);
} else {
logger.warn("Charset autodetect failed, use {} as charset. Please specify charset in Site.setCharset()", Charset.defaultCharset());
return new String(bytes);
}
} else {
return new String(bytes, charset);
} }
} return charset;
private String getHtmlCharset(String contentType, byte[] contentBytes) throws IOException {
return CharsetUtils.detectCharset(contentType, contentBytes);
} }
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment