Merge "Exclude unicode space characters from autoLink URL patterns" into nyc-dev

This commit is contained in:
Siyamed Sinir
2016-04-12 00:35:27 +00:00
committed by Android (Google) Code Review
2 changed files with 33 additions and 2 deletions

View File

@@ -251,7 +251,7 @@ public class Patterns {
+ "|[1-9][0-9]|[0-9]))");
/**
* Valid UCS characters defined in RFC 3987.
* Valid UCS characters defined in RFC 3987. Excludes space characters.
*/
private static final String UCS_CHAR =
"\u00A0-\uD7FF" +
@@ -270,7 +270,8 @@ public class Patterns {
"\uDA80\uDC00-\uDABF\uDFFD" +
"\uDAC0\uDC00-\uDAFF\uDFFD" +
"\uDB00\uDC00-\uDB3F\uDFFD" +
"\uDB44\uDC00-\uDB7F\uDFFD";
"\uDB44\uDC00-\uDB7F\uDFFD" +
"&&[^\u00A0[\u2000-\u200A]\u2028\u2029\u202F\u3000]";
/**
* Valid characters for IRI label defined in RFC 3987.

View File

@@ -419,6 +419,36 @@ public class PatternsTest extends TestCase {
Patterns.AUTOLINK_WEB_URL.matcher(url).matches());
}
@SmallTest
public void testAutoLinkWebUrl_doesNotMatchUnicodeSpaces() throws Exception {
String part1 = "http://and";
String part2 = "roid";
String[] emptySpaces = new String[]{
"\u00A0", // no-break space
"\u2000", // en quad
"\u2001", // em quad
"\u2002", // en space
"\u2003", // em space
"\u2004", // three-per-em space
"\u2005", // four-per-em space
"\u2006", // six-per-em space
"\u2007", // figure space
"\u2008", // punctuation space
"\u2009", // thin space
"\u200A", // hair space
"\u2028", // line separator
"\u2029", // paragraph separator
"\u202F", // narrow no-break space
"\u3000" // ideographic space
};
for (String emptySpace : emptySpaces) {
String url = part1 + emptySpace + part2;
assertFalse("Should not match empty space - code:" + emptySpace.codePointAt(0),
Patterns.AUTOLINK_WEB_URL.matcher(url).matches());
}
}
// Tests for Patterns.IP_ADDRESS
@SmallTest