Adding advanced pattern support to PatternMatcher

This change adds a new pathMatch attribute to the intent-filter tag
that supports a subset of regular expression syntax. Supported
tokens include dot (.) and set ([]) and supported modifiers include
zero-or-more (*), one-or-more(+) and ranges ({}).

Change-Id: I2ec3bc8a9eb3e94d21caf34a1ed0f3fd3fb33a35
Bug: 30805203
This commit is contained in:
Patrick Baumann
2016-08-25 15:56:46 -07:00
parent 02913603bd
commit 2baf095ca9
3 changed files with 609 additions and 8 deletions

View File

@@ -16,6 +16,10 @@
package android.os;
import android.util.Log;
import java.util.Arrays;
/**
* A simple pattern matcher, which is safe to use on untrusted data: it does
* not provide full reg-exp support, only simple globbing that can not be
@@ -44,13 +48,59 @@ public class PatternMatcher implements Parcelable {
* wildcard part of a normal regexp.
*/
public static final int PATTERN_SIMPLE_GLOB = 2;
/**
* Pattern type: the given pattern is interpreted with a regular
* expression-like syntax for matching against the string it is tested
* against. Supported tokens include dot ({@code .}) and sets ({@code [...]})
* with full support for character ranges and the not ({@code ^}) modifier.
* Supported modifiers include star ({@code *}) for zero-or-more, plus ({@code +})
* for one-or-more and full range ({@code {...}}) support. This is a simple
* evaulation implementation in which matching is done against the pattern in
* realtime with no backtracking support.
*
* {@hide} Pending approval for public API
*/
public static final int PATTERN_ADVANCED_GLOB = 3;
// token types for advanced matching
private static final int TOKEN_TYPE_LITERAL = 0;
private static final int TOKEN_TYPE_ANY = 1;
private static final int TOKEN_TYPE_SET = 2;
private static final int TOKEN_TYPE_INVERSE_SET = 3;
// Return for no match
private static final int NO_MATCH = -1;
private static final String TAG = "PatternMatcher";
// Parsed placeholders for advanced patterns
private static final int PARSED_TOKEN_CHAR_SET_START = -1;
private static final int PARSED_TOKEN_CHAR_SET_INVERSE_START = -2;
private static final int PARSED_TOKEN_CHAR_SET_STOP = -3;
private static final int PARSED_TOKEN_CHAR_ANY = -4;
private static final int PARSED_MODIFIER_RANGE_START = -5;
private static final int PARSED_MODIFIER_RANGE_STOP = -6;
private static final int PARSED_MODIFIER_ZERO_OR_MORE = -7;
private static final int PARSED_MODIFIER_ONE_OR_MORE = -8;
private final String mPattern;
private final int mType;
private final int[] mParsedPattern;
private static final int MAX_PATTERN_STORAGE = 2048;
// workspace to use for building a parsed advanced pattern;
private static final int[] sParsedPatternScratch = new int[MAX_PATTERN_STORAGE];
public PatternMatcher(String pattern, int type) {
mPattern = pattern;
mType = type;
if (mType == PATTERN_ADVANCED_GLOB) {
mParsedPattern = parseAndVerifyAdvancedPattern(pattern);
} else {
mParsedPattern = null;
}
}
public final String getPath() {
@@ -62,7 +112,7 @@ public class PatternMatcher implements Parcelable {
}
public boolean match(String str) {
return matchPattern(mPattern, str, mType);
return matchPattern(str, mPattern, mParsedPattern, mType);
}
public String toString() {
@@ -77,6 +127,9 @@ public class PatternMatcher implements Parcelable {
case PATTERN_SIMPLE_GLOB:
type = "GLOB: ";
break;
case PATTERN_ADVANCED_GLOB:
type = "ADVANCED: ";
break;
}
return "PatternMatcher{" + type + mPattern + "}";
}
@@ -88,11 +141,13 @@ public class PatternMatcher implements Parcelable {
public void writeToParcel(Parcel dest, int flags) {
dest.writeString(mPattern);
dest.writeInt(mType);
dest.writeIntArray(mParsedPattern);
}
public PatternMatcher(Parcel src) {
mPattern = src.readString();
mType = src.readInt();
mParsedPattern = src.createIntArray();
}
public static final Parcelable.Creator<PatternMatcher> CREATOR
@@ -106,16 +161,21 @@ public class PatternMatcher implements Parcelable {
}
};
static boolean matchPattern(String pattern, String match, int type) {
static boolean matchPattern(String match, String pattern, int[] parsedPattern, int type) {
if (match == null) return false;
if (type == PATTERN_LITERAL) {
return pattern.equals(match);
} if (type == PATTERN_PREFIX) {
return match.startsWith(pattern);
} else if (type != PATTERN_SIMPLE_GLOB) {
return false;
} else if (type == PATTERN_SIMPLE_GLOB) {
return matchGlobPattern(pattern, match);
} else if (type == PATTERN_ADVANCED_GLOB) {
return matchAdvancedPattern(parsedPattern, match);
}
return false;
}
static boolean matchGlobPattern(String pattern, String match) {
final int NP = pattern.length();
if (NP <= 0) {
return match.length() <= 0;
@@ -194,4 +254,310 @@ public class PatternMatcher implements Parcelable {
return false;
}
}
/**
* Parses the advanced pattern and returns an integer array representation of it. The integer
* array treats each field as a character if positive and a unique token placeholder if
* negative. This method will throw on any pattern structure violations.
*/
synchronized static int[] parseAndVerifyAdvancedPattern(String pattern) {
int ip = 0;
final int LP = pattern.length();
int it = 0;
boolean inSet = false;
boolean inRange = false;
boolean inCharClass = false;
boolean addToParsedPattern;
while (ip < LP) {
if (it > MAX_PATTERN_STORAGE - 3) {
throw new IllegalArgumentException("Pattern is too large!");
}
char c = pattern.charAt(ip);
addToParsedPattern = false;
switch (c) {
case '[':
if (inSet) {
addToParsedPattern = true; // treat as literal or char class in set
} else {
if (pattern.charAt(ip + 1) == '^') {
sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_INVERSE_START;
ip++; // skip over the '^'
} else {
sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_START;
}
ip++; // move to the next pattern char
inSet = true;
continue;
}
break;
case ']':
if (!inSet) {
addToParsedPattern = true; // treat as literal outside of set
} else {
int parsedToken = sParsedPatternScratch[it - 1];
if (parsedToken == PARSED_TOKEN_CHAR_SET_START ||
parsedToken == PARSED_TOKEN_CHAR_SET_INVERSE_START) {
throw new IllegalArgumentException(
"You must define characters in a set.");
}
sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_STOP;
inSet = false;
inCharClass = false;
}
break;
case '{':
if (!inSet) {
if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) {
throw new IllegalArgumentException("Modifier must follow a token.");
}
sParsedPatternScratch[it++] = PARSED_MODIFIER_RANGE_START;
ip++;
inRange = true;
}
break;
case '}':
if (inRange) { // only terminate the range if we're currently in one
sParsedPatternScratch[it++] = PARSED_MODIFIER_RANGE_STOP;
inRange = false;
}
break;
case '*':
if (!inSet) {
if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) {
throw new IllegalArgumentException("Modifier must follow a token.");
}
sParsedPatternScratch[it++] = PARSED_MODIFIER_ZERO_OR_MORE;
}
break;
case '+':
if (!inSet) {
if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) {
throw new IllegalArgumentException("Modifier must follow a token.");
}
sParsedPatternScratch[it++] = PARSED_MODIFIER_ONE_OR_MORE;
}
break;
case '.':
if (!inSet) {
sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_ANY;
}
break;
case '\\': // escape
if (ip + 1 >= LP) {
throw new IllegalArgumentException("Escape found at end of pattern!");
}
c = pattern.charAt(++ip);
addToParsedPattern = true;
break;
default:
addToParsedPattern = true;
break;
}
if (inSet) {
if (inCharClass) {
sParsedPatternScratch[it++] = c;
inCharClass = false;
} else {
// look forward for character class
if (ip + 2 < LP
&& pattern.charAt(ip + 1) == '-'
&& pattern.charAt(ip + 2) != ']') {
inCharClass = true;
sParsedPatternScratch[it++] = c; // set first token as lower end of range
ip++; // advance past dash
} else { // literal
sParsedPatternScratch[it++] = c; // set first token as literal
sParsedPatternScratch[it++] = c; // set second set as literal
}
}
} else if (inRange) {
int endOfSet = pattern.indexOf('}', ip);
if (endOfSet < 0) {
throw new IllegalArgumentException("Range not ended with '}'");
}
String rangeString = pattern.substring(ip, endOfSet);
int commaIndex = rangeString.indexOf(',');
try {
final int rangeMin;
final int rangeMax;
if (commaIndex < 0) {
int parsedRange = Integer.parseInt(rangeString);
rangeMin = rangeMax = parsedRange;
} else {
rangeMin = Integer.parseInt(rangeString.substring(0, commaIndex));
if (commaIndex == rangeString.length() - 1) { // e.g. {n,} (n or more)
rangeMax = Integer.MAX_VALUE;
} else {
rangeMax = Integer.parseInt(rangeString.substring(commaIndex + 1));
}
}
if (rangeMin > rangeMax) {
throw new IllegalArgumentException(
"Range quantifier minimum is greater than maximum");
}
sParsedPatternScratch[it++] = rangeMin;
sParsedPatternScratch[it++] = rangeMax;
} catch (NumberFormatException e) {
throw new IllegalArgumentException("Range number format incorrect", e);
}
ip = endOfSet;
continue; // don't increment ip
} else if (addToParsedPattern) {
sParsedPatternScratch[it++] = c;
}
ip++;
}
if (inSet) {
throw new IllegalArgumentException("Set was not terminated!");
}
return Arrays.copyOf(sParsedPatternScratch, it);
}
private static boolean isParsedModifier(int parsedChar) {
return parsedChar == PARSED_MODIFIER_ONE_OR_MORE ||
parsedChar == PARSED_MODIFIER_ZERO_OR_MORE ||
parsedChar == PARSED_MODIFIER_RANGE_STOP ||
parsedChar == PARSED_MODIFIER_RANGE_START;
}
static boolean matchAdvancedPattern(int[] parsedPattern, String match) {
// create indexes
int ip = 0, im = 0;
// one-time length check
final int LP = parsedPattern.length, LM = match.length();
// The current character being analyzed in the pattern
int patternChar;
int tokenType;
int charSetStart = 0, charSetEnd = 0;
while (ip < LP) { // we still have content in the pattern
patternChar = parsedPattern[ip];
// get the match type of the next verb
switch (patternChar) {
case PARSED_TOKEN_CHAR_ANY:
tokenType = TOKEN_TYPE_ANY;
ip++;
break;
case PARSED_TOKEN_CHAR_SET_START:
case PARSED_TOKEN_CHAR_SET_INVERSE_START:
tokenType = patternChar == PARSED_TOKEN_CHAR_SET_START
? TOKEN_TYPE_SET
: TOKEN_TYPE_INVERSE_SET;
charSetStart = ip + 1; // start from the char after the set start
while (++ip < LP && parsedPattern[ip] != PARSED_TOKEN_CHAR_SET_STOP);
charSetEnd = ip - 1; // we're on the set stop, end is the previous
ip++; // move the pointer to the next pattern entry
break;
default:
charSetStart = ip;
tokenType = TOKEN_TYPE_LITERAL;
ip++;
break;
}
final int minRepetition;
final int maxRepetition;
// look for a match length modifier
if (ip >= LP) {
minRepetition = maxRepetition = 1;
} else {
patternChar = parsedPattern[ip];
switch (patternChar) {
case PARSED_MODIFIER_ZERO_OR_MORE:
minRepetition = 0;
maxRepetition = Integer.MAX_VALUE;
ip++;
break;
case PARSED_MODIFIER_ONE_OR_MORE:
minRepetition = 1;
maxRepetition = Integer.MAX_VALUE;
ip++;
break;
case PARSED_MODIFIER_RANGE_START:
minRepetition = parsedPattern[++ip];
maxRepetition = parsedPattern[++ip];
ip += 2; // step over PARSED_MODIFIER_RANGE_STOP and on to the next token
break;
default:
minRepetition = maxRepetition = 1; // implied literal
break;
}
}
if (minRepetition > maxRepetition) {
return false;
}
// attempt to match as many characters as possible
int matched = matchChars(match, im, LM, tokenType, minRepetition, maxRepetition,
parsedPattern, charSetStart, charSetEnd);
// if we found a conflict, return false immediately
if (matched == NO_MATCH) {
return false;
}
// move the match pointer the number of characters matched
im += matched;
}
return ip >= LP && im >= LM; // have parsed entire string and regex
}
private static int matchChars(String match, int im, final int lm, int tokenType,
int minRepetition, int maxRepetition, int[] parsedPattern,
int tokenStart, int tokenEnd) {
int matched = 0;
while(matched < maxRepetition
&& matchChar(match, im + matched, lm, tokenType, parsedPattern, tokenStart,
tokenEnd)) {
matched++;
}
return matched < minRepetition ? NO_MATCH : matched;
}
private static boolean matchChar(String match, int im, final int lm, int tokenType,
int[] parsedPattern, int tokenStart, int tokenEnd) {
if (im >= lm) { // we've overrun the string, no match
return false;
}
switch (tokenType) {
case TOKEN_TYPE_ANY:
return true;
case TOKEN_TYPE_SET:
for (int i = tokenStart; i < tokenEnd; i += 2) {
char matchChar = match.charAt(im);
if (matchChar >= parsedPattern[i] && matchChar <= parsedPattern[i + 1]) {
return true;
}
}
return false;
case TOKEN_TYPE_INVERSE_SET:
for (int i = tokenStart; i < tokenEnd; i += 2) {
char matchChar = match.charAt(im);
if (matchChar >= parsedPattern[i] && matchChar <= parsedPattern[i + 1]) {
return false;
}
}
return true;
case TOKEN_TYPE_LITERAL:
return match.charAt(im) == parsedPattern[tokenStart];
default:
return false;
}
}
}

View File

@@ -32,6 +32,7 @@ public class OsTests {
suite.addTestSuite(IdleHandlerTest.class);
suite.addTestSuite(MessageQueueTest.class);
suite.addTestSuite(MessengerTest.class);
suite.addTestSuite(PatternMatcherTest.class);
suite.addTestSuite(SystemPropertiesTest.class);
return suite;

View File

@@ -0,0 +1,234 @@
package android.os;
import android.test.suitebuilder.annotation.SmallTest;
import junit.framework.TestCase;
import org.junit.runner.RunWith;
import org.junit.Test;
import org.junit.runners.JUnit4;
@RunWith(JUnit4.class)
@SmallTest
public class PatternMatcherTest extends TestCase{
@Test
public void testAdvancedPatternMatchesAnyToken() {
PatternMatcher matcher = new PatternMatcher(".", PatternMatcher.PATTERN_ADVANCED_GLOB);
assertMatches("a", matcher);
assertMatches("b", matcher);
assertNotMatches("", matcher);
}
@Test
public void testAdvancedPatternMatchesSetToken() {
PatternMatcher matcher = new PatternMatcher("[a]", PatternMatcher.PATTERN_ADVANCED_GLOB);
assertMatches("a", matcher);
assertNotMatches("b", matcher);
matcher = new PatternMatcher("[.*+{}\\]\\\\[]", PatternMatcher.PATTERN_ADVANCED_GLOB);
assertMatches(".", matcher);
assertMatches("*", matcher);
assertMatches("+", matcher);
assertMatches("{", matcher);
assertMatches("}", matcher);
assertMatches("]", matcher);
assertMatches("\\", matcher);
assertMatches("[", matcher);
}
@Test
public void testAdvancedPatternMatchesSetCharacterClassToken() {
PatternMatcher matcher = new PatternMatcher("[a-z]", PatternMatcher.PATTERN_ADVANCED_GLOB);
assertMatches("a", matcher);
assertMatches("b", matcher);
assertNotMatches("A", matcher);
assertNotMatches("1", matcher);
matcher = new PatternMatcher("[a-z][0-9]", PatternMatcher.PATTERN_ADVANCED_GLOB);
assertMatches("a1", matcher);
assertNotMatches("1a", matcher);
assertNotMatches("aa", matcher);
matcher = new PatternMatcher("[z-a]", PatternMatcher.PATTERN_ADVANCED_GLOB);
assertNotMatches("a", matcher);
assertNotMatches("z", matcher);
assertNotMatches("A", matcher);
matcher = new PatternMatcher("[^0-9]", PatternMatcher.PATTERN_ADVANCED_GLOB);
assertMatches("a", matcher);
assertMatches("z", matcher);
assertMatches("A", matcher);
assertNotMatches("9", matcher);
assertNotMatches("5", matcher);
assertNotMatches("0", matcher);
assertPoorlyFormattedPattern("[]a]");
matcher = new PatternMatcher("[\\[a]", PatternMatcher.PATTERN_ADVANCED_GLOB);
assertMatches("a", matcher);
assertMatches("[", matcher);
}
@Test
public void testAdvancedPatternMatchesEscapedCharacters() {
PatternMatcher matcher = new PatternMatcher("\\.", PatternMatcher.PATTERN_ADVANCED_GLOB);
assertMatches(".", matcher);
assertNotMatches("a", matcher);
assertNotMatches("1", matcher);
matcher = new PatternMatcher("a\\+", PatternMatcher.PATTERN_ADVANCED_GLOB);
assertMatches("a+", matcher);
assertNotMatches("a", matcher);
assertNotMatches("aaaaa", matcher);
matcher = new PatternMatcher("[\\a-\\z]", PatternMatcher.PATTERN_ADVANCED_GLOB);
assertMatches("a", matcher);
assertMatches("z", matcher);
assertNotMatches("A", matcher);
}
@Test
public void testAdvancedPatternMatchesLiteralTokens() {
PatternMatcher matcher = new PatternMatcher("a", PatternMatcher.PATTERN_ADVANCED_GLOB);
assertNotMatches("", matcher);
assertMatches("a", matcher);
assertNotMatches("z", matcher);
matcher = new PatternMatcher("az", PatternMatcher.PATTERN_ADVANCED_GLOB);
assertNotMatches("", matcher);
assertMatches("az", matcher);
assertNotMatches("za", matcher);
}
@Test
public void testAdvancedPatternMatchesSetZeroOrMore() {
PatternMatcher matcher = new PatternMatcher("[a-z]*", PatternMatcher.PATTERN_ADVANCED_GLOB);
assertMatches("", matcher);
assertMatches("a", matcher);
assertMatches("abcdefg", matcher);
assertNotMatches("abc1", matcher);
assertNotMatches("1abc", matcher);
}
@Test
public void testAdvancedPatternMatchesSetOneOrMore() {
PatternMatcher matcher = new PatternMatcher("[a-z]+", PatternMatcher.PATTERN_ADVANCED_GLOB);
assertNotMatches("", matcher);
assertMatches("a", matcher);
assertMatches("abcdefg", matcher);
assertNotMatches("abc1", matcher);
assertNotMatches("1abc", matcher);
}
@Test
public void testAdvancedPatternMatchesSingleRange() {
PatternMatcher matcher = new PatternMatcher("[a-z]{1}",
PatternMatcher.PATTERN_ADVANCED_GLOB);
assertNotMatches("", matcher);
assertMatches("a", matcher);
assertMatches("z", matcher);
assertNotMatches("1", matcher);
assertNotMatches("aa", matcher);
}
@Test
public void testAdvancedPatternMatchesFullRange() {
PatternMatcher matcher = new PatternMatcher("[a-z]{1,5}",
PatternMatcher.PATTERN_ADVANCED_GLOB);
assertNotMatches("", matcher);
assertMatches("a", matcher);
assertMatches("zazaz", matcher);
assertNotMatches("azazaz", matcher);
assertNotMatches("11111", matcher);
}
@Test
public void testAdvancedPatternMatchesPartialRange() {
PatternMatcher matcher = new PatternMatcher("[a-z]{3,}",
PatternMatcher.PATTERN_ADVANCED_GLOB);
assertNotMatches("", matcher);
assertMatches("aza", matcher);
assertMatches("zazaz", matcher);
assertMatches("azazazazazaz", matcher);
assertNotMatches("aa", matcher);
}
@Test
public void testAdvancedPatternMatchesComplexPatterns() {
PatternMatcher matcher = new PatternMatcher(
"/[0-9]{4}/[0-9]{2}/[0-9]{2}/[a-zA-Z0-9_]+\\.html",
PatternMatcher.PATTERN_ADVANCED_GLOB);
assertNotMatches("", matcher);
assertMatches("/2016/09/07/got_this_working.html", matcher);
assertMatches("/2016/09/07/got_this_working2.html", matcher);
assertNotMatches("/2016/09/07/got_this_working2dothtml", matcher);
assertNotMatches("/2016/9/7/got_this_working.html", matcher);
matcher = new PatternMatcher(
"/b*a*bar.*",
PatternMatcher.PATTERN_ADVANCED_GLOB);
assertMatches("/babar", matcher);
assertMatches("/babarfff", matcher);
assertMatches("/bbaabarfff", matcher);
assertMatches("/babar?blah", matcher);
assertMatches("/baaaabar?blah", matcher);
assertNotMatches("?bar", matcher);
assertNotMatches("/bar", matcher);
assertNotMatches("/baz", matcher);
assertNotMatches("/ba/bar", matcher);
assertNotMatches("/barf", matcher);
assertNotMatches("/", matcher);
assertNotMatches("?blah", matcher);
}
@Test
public void testAdvancedPatternPoorFormatThrowsIllegalArgumentException() {
assertPoorlyFormattedPattern("[a-z");
assertPoorlyFormattedPattern("a{,4}");
assertPoorlyFormattedPattern("a{0,a}");
assertPoorlyFormattedPattern("a{\\1, 2}");
assertPoorlyFormattedPattern("[]");
assertPoorlyFormattedPattern("a{}");
assertPoorlyFormattedPattern("{3,4}");
assertPoorlyFormattedPattern("a+{3,4}");
assertPoorlyFormattedPattern("*.");
assertPoorlyFormattedPattern(".+*");
assertPoorlyFormattedPattern("a{3,4");
assertPoorlyFormattedPattern("[a");
assertPoorlyFormattedPattern("abc\\");
assertPoorlyFormattedPattern("+.");
StringBuilder charSet = new StringBuilder("[");
for (int i = 0; i < 1024; i++) {
charSet.append('a' + (i % 26));
}
charSet.append("]");
assertPoorlyFormattedPattern(charSet.toString());
}
private void assertMatches(String string, PatternMatcher matcher) {
assertTrue("'" + string + "' should match '" + matcher.toString() + "'",
matcher.match(string));
}
private void assertNotMatches(String string, PatternMatcher matcher) {
assertTrue("'" + string + "' should not match '" + matcher.toString() + "'",
!matcher.match(string));
}
private void assertPoorlyFormattedPattern(String format) {
try {
new PatternMatcher(format, PatternMatcher.PATTERN_ADVANCED_GLOB);
} catch (IllegalArgumentException e) {
return;// expected
}
fail("'" + format + "' was erroneously created");
}
}