Merge "Improved Field Classification edit distance algorithms." into pi-dev

This commit is contained in:
TreeHugger Robot
2018-03-15 20:52:09 +00:00
committed by Android (Google) Code Review
4 changed files with 245 additions and 79 deletions

View File

@@ -15,6 +15,8 @@
*/
package android.ext.services.autofill;
import static android.ext.services.autofill.EditDistanceScorer.DEFAULT_ALGORITHM;
import android.annotation.NonNull;
import android.annotation.Nullable;
import android.os.Bundle;
@@ -29,8 +31,6 @@ import java.util.List;
public class AutofillFieldClassificationServiceImpl extends AutofillFieldClassificationService {
private static final String TAG = "AutofillFieldClassificationServiceImpl";
// TODO(b/70291841): set to false before launching
private static final boolean DEBUG = true;
@Nullable
@Override
@@ -40,30 +40,13 @@ public class AutofillFieldClassificationServiceImpl extends AutofillFieldClassif
if (ArrayUtils.isEmpty(actualValues) || ArrayUtils.isEmpty(userDataValues)) {
Log.w(TAG, "getScores(): empty currentvalues (" + actualValues + ") or userValues ("
+ userDataValues + ")");
// TODO(b/70939974): add unit test
return null;
}
if (algorithmName != null && !algorithmName.equals(EditDistanceScorer.NAME)) {
if (algorithmName != null && !algorithmName.equals(DEFAULT_ALGORITHM)) {
Log.w(TAG, "Ignoring invalid algorithm (" + algorithmName + ") and using "
+ EditDistanceScorer.NAME + " instead");
+ DEFAULT_ALGORITHM + " instead");
}
final String actualAlgorithmName = EditDistanceScorer.NAME;
final int actualValuesSize = actualValues.size();
final int userDataValuesSize = userDataValues.size();
if (DEBUG) {
Log.d(TAG, "getScores() will return a " + actualValuesSize + "x"
+ userDataValuesSize + " matrix for " + actualAlgorithmName);
}
final float[][] scores = new float[actualValuesSize][userDataValuesSize];
final EditDistanceScorer algorithm = EditDistanceScorer.getInstance();
for (int i = 0; i < actualValuesSize; i++) {
for (int j = 0; j < userDataValuesSize; j++) {
final float score = algorithm.getScore(actualValues.get(i), userDataValues.get(j));
scores[i][j] = score;
}
}
return scores;
return EditDistanceScorer.getScores(actualValues, userDataValues);
}
}

View File

@@ -16,53 +16,133 @@
package android.ext.services.autofill;
import android.annotation.NonNull;
import android.annotation.Nullable;
import android.util.Log;
import android.view.autofill.AutofillValue;
/**
* Helper used to calculate the classification score between an actual {@link AutofillValue} filled
* by the user and the expected value predicted by an autofill service.
*/
// TODO(b/70291841): explain algorithm once it's fully implemented
import com.android.internal.annotations.VisibleForTesting;
import java.util.List;
final class EditDistanceScorer {
private static final EditDistanceScorer sInstance = new EditDistanceScorer();
private static final String TAG = "EditDistanceScorer";
public static final String NAME = "EDIT_DISTANCE";
// TODO(b/70291841): STOPSHIP - set to false before launching
private static final boolean DEBUG = true;
static final String DEFAULT_ALGORITHM = "EDIT_DISTANCE";
/**
* Gets the singleton instance.
*/
public static EditDistanceScorer getInstance() {
return sInstance;
}
private EditDistanceScorer() {
}
/**
* Returns the classification score between an actual {@link AutofillValue} filled
* by the user and the expected value predicted by an autofill service.
*
* <p>A full-match is {@code 1.0} (representing 100%), a full mismatch is {@code 0.0} and
* partial mathces are something in between, typically using edit-distance algorithms.
* Gets the field classification score of 2 values based on the edit distance between them.
*
* <p>The score is defined as: @(max_length - edit_distance) / max_length
*/
public float getScore(@NonNull AutofillValue actualValue, @NonNull String userDataValue) {
@VisibleForTesting
static float getScore(@Nullable AutofillValue actualValue, @Nullable String userDataValue) {
if (actualValue == null || !actualValue.isText() || userDataValue == null) return 0;
// TODO(b/70291841): implement edit distance - currently it's returning either 0, 100%, or
// partial match when number of chars match
final String textValue = actualValue.getTextValue().toString();
final int total = textValue.length();
if (total != userDataValue.length()) return 0F;
int matches = 0;
for (int i = 0; i < total; i++) {
if (Character.toLowerCase(textValue.charAt(i)) == Character
.toLowerCase(userDataValue.charAt(i))) {
matches++;
final String actualValueText = actualValue.getTextValue().toString();
final int actualValueLength = actualValueText.length();
final int userDatalength = userDataValue.length();
if (userDatalength == 0) {
return (actualValueLength == 0) ? 1 : 0;
}
final int distance = editDistance(actualValueText.toLowerCase(),
userDataValue.toLowerCase());
final int maxLength = Math.max(actualValueLength, userDatalength);
return ((float) maxLength - distance) / maxLength;
}
/**
* Computes the edit distance (number of insertions, deletions or substitutions to edit one
* string into the other) between two strings. In particular, this will compute the Levenshtein
* distance.
*
* <p>See http://en.wikipedia.org/wiki/Levenshtein_distance for details.
*
* @param s the first string to compare
* @param t the second string to compare
* @return the edit distance between the two strings
*/
// Note: copied verbatim from com.android.tools.lint.detector.api.LintUtils.java
public static int editDistance(@NonNull String s, @NonNull String t) {
return editDistance(s, t, Integer.MAX_VALUE);
}
/**
* Computes the edit distance (number of insertions, deletions or substitutions to edit one
* string into the other) between two strings. In particular, this will compute the Levenshtein
* distance.
*
* <p>See http://en.wikipedia.org/wiki/Levenshtein_distance for details.
*
* @param s the first string to compare
* @param t the second string to compare
* @param max the maximum edit distance that we care about; if for example the string length
* delta is greater than this we don't bother computing the exact edit distance since the
* caller has indicated they're not interested in the result
* @return the edit distance between the two strings, or some other value greater than that if
* the edit distance is at least as big as the {@code max} parameter
*/
// Note: copied verbatim from com.android.tools.lint.detector.api.LintUtils.java
private static int editDistance(@NonNull String s, @NonNull String t, int max) {
if (s.equals(t)) {
return 0;
}
if (Math.abs(s.length() - t.length()) > max) {
// The string lengths differ more than the allowed edit distance;
// no point in even attempting to compute the edit distance (requires
// O(n*m) storage and O(n*m) speed, where n and m are the string lengths)
return Integer.MAX_VALUE;
}
int m = s.length();
int n = t.length();
int[][] d = new int[m + 1][n + 1];
for (int i = 0; i <= m; i++) {
d[i][0] = i;
}
for (int j = 0; j <= n; j++) {
d[0][j] = j;
}
for (int j = 1; j <= n; j++) {
for (int i = 1; i <= m; i++) {
if (s.charAt(i - 1) == t.charAt(j - 1)) {
d[i][j] = d[i - 1][j - 1];
} else {
int deletion = d[i - 1][j] + 1;
int insertion = d[i][j - 1] + 1;
int substitution = d[i - 1][j - 1] + 1;
d[i][j] = Math.min(deletion, Math.min(insertion, substitution));
}
}
}
return ((float) matches) / total;
return d[m][n];
}
/**
* Gets the scores in a batch.
*/
static float[][] getScores(@NonNull List<AutofillValue> actualValues,
@NonNull List<String> userDataValues) {
final int actualValuesSize = actualValues.size();
final int userDataValuesSize = userDataValues.size();
if (DEBUG) {
Log.d(TAG, "getScores() will return a " + actualValuesSize + "x"
+ userDataValuesSize + " matrix for " + DEFAULT_ALGORITHM);
}
final float[][] scores = new float[actualValuesSize][userDataValuesSize];
for (int i = 0; i < actualValuesSize; i++) {
for (int j = 0; j < userDataValuesSize; j++) {
final float score = getScore(actualValues.get(i), userDataValues.get(j));
scores[i][j] = score;
}
}
return scores;
}
}

View File

@@ -0,0 +1,59 @@
/*
* Copyright (C) 2018 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package android.ext.services.autofill;
import org.junit.Test;
import java.util.Arrays;
import java.util.Collections;
import static com.google.common.truth.Truth.assertThat;
import android.view.autofill.AutofillValue;
/**
* Contains the base tests that does not rely on the specific algorithm implementation.
*/
public class AutofillFieldClassificationServiceImplTest {
private final AutofillFieldClassificationServiceImpl mService =
new AutofillFieldClassificationServiceImpl();
@Test
public void testOnGetScores_nullActualValues() {
assertThat(mService.onGetScores(null, null, null, Arrays.asList("whatever"))).isNull();
}
@Test
public void testOnGetScores_emptyActualValues() {
assertThat(mService.onGetScores(null, null, Collections.emptyList(),
Arrays.asList("whatever"))).isNull();
}
@Test
public void testOnGetScores_nullUserDataValues() {
assertThat(mService.onGetScores(null, null,
Arrays.asList(AutofillValue.forText("whatever")), null)).isNull();
}
@Test
public void testOnGetScores_emptyUserDataValues() {
assertThat(mService.onGetScores(null, null,
Arrays.asList(AutofillValue.forText("whatever")), Collections.emptyList()))
.isNull();
}
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2017 The Android Open Source Project
* Copyright (C) 2018 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,65 +13,109 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package android.ext.services.autofill;
import static com.google.common.truth.Truth.assertThat;
import static android.ext.services.autofill.EditDistanceScorer.getScore;
import static android.ext.services.autofill.EditDistanceScorer.getScores;
import static android.view.autofill.AutofillValue.forText;
import static com.google.common.truth.Truth.assertThat;
import static com.google.common.truth.Truth.assertWithMessage;
import android.support.test.runner.AndroidJUnit4;
import android.view.autofill.AutofillValue;
import org.junit.Test;
import org.junit.runner.RunWith;
@RunWith(AndroidJUnit4.class)
import java.util.Arrays;
import java.util.List;
public class EditDistanceScorerTest {
private final EditDistanceScorer mScorer = EditDistanceScorer.getInstance();
@Test
public void testGetScore_nullValue() {
assertFloat(mScorer.getScore(null, "D'OH!"), 0);
assertFloat(getScore(null, "D'OH!"), 0);
}
@Test
public void testGetScore_nonTextValue() {
assertFloat(mScorer.getScore(AutofillValue.forToggle(true), "D'OH!"), 0);
assertFloat(getScore(AutofillValue.forToggle(true), "D'OH!"), 0);
}
@Test
public void testGetScore_nullUserData() {
assertFloat(mScorer.getScore(AutofillValue.forText("D'OH!"), null), 0);
assertFloat(getScore(AutofillValue.forText("D'OH!"), null), 0);
}
@Test
public void testGetScore_fullMatch() {
assertFloat(mScorer.getScore(AutofillValue.forText("D'OH!"), "D'OH!"), 1);
assertFloat(getScore(AutofillValue.forText("D'OH!"), "D'OH!"), 1);
assertFloat(getScore(AutofillValue.forText(""), ""), 1);
}
@Test
public void testGetScore_fullMatchMixedCase() {
assertFloat(mScorer.getScore(AutofillValue.forText("D'OH!"), "D'oH!"), 1);
assertFloat(getScore(AutofillValue.forText("D'OH!"), "D'oH!"), 1);
}
// TODO(b/70291841): might need to change it once it supports different sizes
@Test
public void testGetScore_mismatchDifferentSizes() {
assertFloat(mScorer.getScore(AutofillValue.forText("One"), "MoreThanOne"), 0);
assertFloat(mScorer.getScore(AutofillValue.forText("MoreThanOne"), "One"), 0);
assertFloat(getScore(AutofillValue.forText("X"), "Xy"), 0.50F);
assertFloat(getScore(AutofillValue.forText("Xy"), "X"), 0.50F);
assertFloat(getScore(AutofillValue.forText("One"), "MoreThanOne"), 0.27F);
assertFloat(getScore(AutofillValue.forText("MoreThanOne"), "One"), 0.27F);
assertFloat(getScore(AutofillValue.forText("1600 Amphitheatre Parkway"),
"1600 Amphitheatre Pkwy"), 0.88F);
assertFloat(getScore(AutofillValue.forText("1600 Amphitheatre Pkwy"),
"1600 Amphitheatre Parkway"), 0.88F);
}
@Test
public void testGetScore_partialMatch() {
assertFloat(mScorer.getScore(AutofillValue.forText("Dude"), "Dxxx"), 0.25F);
assertFloat(mScorer.getScore(AutofillValue.forText("Dude"), "DUxx"), 0.50F);
assertFloat(mScorer.getScore(AutofillValue.forText("Dude"), "DUDx"), 0.75F);
assertFloat(mScorer.getScore(AutofillValue.forText("Dxxx"), "Dude"), 0.25F);
assertFloat(mScorer.getScore(AutofillValue.forText("DUxx"), "Dude"), 0.50F);
assertFloat(mScorer.getScore(AutofillValue.forText("DUDx"), "Dude"), 0.75F);
assertFloat(getScore(AutofillValue.forText("Dude"), "Dxxx"), 0.25F);
assertFloat(getScore(AutofillValue.forText("Dude"), "DUxx"), 0.50F);
assertFloat(getScore(AutofillValue.forText("Dude"), "DUDx"), 0.75F);
assertFloat(getScore(AutofillValue.forText("Dxxx"), "Dude"), 0.25F);
assertFloat(getScore(AutofillValue.forText("DUxx"), "Dude"), 0.50F);
assertFloat(getScore(AutofillValue.forText("DUDx"), "Dude"), 0.75F);
}
@Test
public void testGetScores() {
final List<AutofillValue> actualValues = Arrays.asList(forText("A"), forText("b"));
final List<String> userDataValues = Arrays.asList("a", "B", "ab", "c");
final float[][] expectedScores = new float[][] {
new float[] { 1F, 0F, 0.5F, 0F },
new float[] { 0F, 1F, 0.5F, 0F }
};
final float[][] actualScores = getScores(actualValues, userDataValues);
// Unfortunately, Truth does not have an easy way to compare float matrices and show useful
// messages in case of error, so we need to check.
assertWithMessage("actual=%s, expected=%s", toString(actualScores),
toString(expectedScores)).that(actualScores.length).isEqualTo(2);
assertWithMessage("actual=%s, expected=%s", toString(actualScores),
toString(expectedScores)).that(actualScores[0].length).isEqualTo(4);
assertWithMessage("actual=%s, expected=%s", toString(actualScores),
toString(expectedScores)).that(actualScores[1].length).isEqualTo(4);
for (int i = 0; i < actualScores.length; i++) {
final float[] line = actualScores[i];
for (int j = 0; j < line.length; j++) {
float cell = line[j];
assertWithMessage("wrong score at [%s, %s]", i, j).that(cell).isWithin(0.01F)
.of(expectedScores[i][j]);
}
}
}
public static void assertFloat(float actualValue, float expectedValue) {
assertThat(actualValue).isWithin(1.0e-10f).of(expectedValue);
assertThat(actualValue).isWithin(0.01F).of(expectedValue);
}
public static String toString(float[][] matrix) {
final StringBuilder string = new StringBuilder("[ ");
for (int i = 0; i < matrix.length; i++) {
string.append(Arrays.toString(matrix[i])).append(" ");
}
return string.append(" ]").toString();
}
}