Commit 2b46b11e authored by Almark Ming's avatar Almark Ming Committed by yihua.huang

Update RegexSelector.java

Optimize regex format check

Conflicts:
	webmagic-core/src/main/java/us/codecraft/webmagic/selector/RegexSelector.java
parent d9a15ad6
...@@ -26,12 +26,12 @@ public class RegexSelector implements Selector { ...@@ -26,12 +26,12 @@ public class RegexSelector implements Selector {
if (StringUtils.isBlank(regexStr)) { if (StringUtils.isBlank(regexStr)) {
throw new IllegalArgumentException("regex must not be empty"); throw new IllegalArgumentException("regex must not be empty");
} }
if (!StringUtils.contains(regexStr, "(") && !StringUtils.contains(regexStr, ")")) { // Check bracket for regex group. Add default group 1 if there is no group.
// Only check if there exists the valid left parenthesis, leave regexp validation for Pattern.
if (StringUtils.countMatches(regexStr, "(") - StringUtils.countMatches(regexStr, "\\(") ==
StringUtils.countMatches(regexStr, "(?:") - StringUtils.countMatches(regexStr, "\\(?:")) {
regexStr = "(" + regexStr + ")"; regexStr = "(" + regexStr + ")";
} }
if (!StringUtils.contains(regexStr, "(") || !StringUtils.contains(regexStr, ")")) {
throw new IllegalArgumentException("regex must have capture group 1");
}
this.regexStr = regexStr; this.regexStr = regexStr;
try { try {
regex = Pattern.compile(regexStr, Pattern.DOTALL | Pattern.CASE_INSENSITIVE); regex = Pattern.compile(regexStr, Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
......
package us.codecraft.webmagic.selector; package us.codecraft.webmagic.selector;
import junit.framework.Assert; import org.junit.Assert;
import org.junit.Test; import org.junit.Test;
/** /**
...@@ -8,14 +8,18 @@ import org.junit.Test; ...@@ -8,14 +8,18 @@ import org.junit.Test;
*/ */
public class RegexSelectorTest { public class RegexSelectorTest {
@Test @Test(expected = IllegalArgumentException.class)
public void testInvalidRegex() { public void testRegexWithSingleLeftBracket() {
String regex = "\\d+("; String regex = "\\d+(";
try { new RegexSelector(regex);
new RegexSelector(regex); }
Assert.assertNotNull(regex);
} catch (Exception e) {
} @Test
public void testRegexWithLeftBracketQuoted() {
String regex = "\\(.+";
String source = "(hello world";
RegexSelector regexSelector = new RegexSelector(regex);
String select = regexSelector.select(source);
Assert.assertEquals(source,select);
} }
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment