Commit 3266ea15 authored by yihua.huang's avatar yihua.huang

#629 correct illegal url in HttpUriRequestConverter

parent 5daf92e8
...@@ -58,7 +58,7 @@ public class HttpUriRequestConverter { ...@@ -58,7 +58,7 @@ public class HttpUriRequestConverter {
} }
private HttpUriRequest convertHttpUriRequest(Request request, Site site, Proxy proxy) { private HttpUriRequest convertHttpUriRequest(Request request, Site site, Proxy proxy) {
RequestBuilder requestBuilder = selectRequestMethod(request).setUri(request.getUrl()); RequestBuilder requestBuilder = selectRequestMethod(request).setUri(UrlUtils.fixIllegalCharacterInUrl(request.getUrl()));
if (site.getHeaders() != null) { if (site.getHeaders() != null) {
for (Map.Entry<String, String> headerEntry : site.getHeaders().entrySet()) { for (Map.Entry<String, String> headerEntry : site.getHeaders().entrySet()) {
requestBuilder.addHeader(headerEntry.getKey(), headerEntry.getValue()); requestBuilder.addHeader(headerEntry.getKey(), headerEntry.getValue());
......
...@@ -43,7 +43,7 @@ public class UrlUtils { ...@@ -43,7 +43,7 @@ public class UrlUtils {
if (url.startsWith("?")) if (url.startsWith("?"))
url = base.getPath() + url; url = base.getPath() + url;
URL abs = new URL(base, url); URL abs = new URL(base, url);
return encodeIllegalCharacterInUrl(abs.toExternalForm()); return abs.toExternalForm();
} catch (MalformedURLException e) { } catch (MalformedURLException e) {
return ""; return "";
} }
...@@ -53,12 +53,17 @@ public class UrlUtils { ...@@ -53,12 +53,17 @@ public class UrlUtils {
* *
* @param url url * @param url url
* @return new url * @return new url
* @deprecated
*/ */
public static String encodeIllegalCharacterInUrl(String url) { public static String encodeIllegalCharacterInUrl(String url) {
//TODO more charator support
return url.replace(" ", "%20"); return url.replace(" ", "%20");
} }
public static String fixIllegalCharacterInUrl(String url) {
//TODO more charator support
return url.replace(" ", "%20").replaceAll("#+", "#");
}
public static String getHost(String url) { public static String getHost(String url) {
String host = url; String host = url;
int i = StringUtils.ordinalIndexOf(url, "/", 3); int i = StringUtils.ordinalIndexOf(url, "/", 3);
......
package us.codecraft.webmagic.downloader;
import org.junit.Test;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.utils.UrlUtils;
import java.net.URI;
import static org.assertj.core.api.Assertions.assertThat;
/**
* @author code4crafter@gmail.com
* Date: 2017/7/22
* Time: 下午5:29
*/
public class HttpUriRequestConverterTest {
@Test(expected = IllegalArgumentException.class)
public void test_illegal_uri() throws Exception {
HttpUriRequestConverter httpUriRequestConverter = new HttpUriRequestConverter();
httpUriRequestConverter.convert(new Request("http://bj.zhongkao.com/beikao/yimo/##"), Site.me(), null);
}
@Test
public void test_illegal_uri_correct() throws Exception {
HttpUriRequestConverter httpUriRequestConverter = new HttpUriRequestConverter();
HttpClientRequestContext requestContext = httpUriRequestConverter.convert(new Request(UrlUtils.fixIllegalCharacterInUrl("http://bj.zhongkao.com/beikao/yimo/##")), Site.me(), null);
assertThat(requestContext.getHttpUriRequest().getURI()).isEqualTo(new URI("http://bj.zhongkao.com/beikao/yimo/#"));
}
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment