Merge "Improved Field Classification edit distance algorithms." into pi-dev
This commit is contained in:
committed by
Android (Google) Code Review
commit
a46e4d1059
@@ -15,6 +15,8 @@
|
||||
*/
|
||||
package android.ext.services.autofill;
|
||||
|
||||
import static android.ext.services.autofill.EditDistanceScorer.DEFAULT_ALGORITHM;
|
||||
|
||||
import android.annotation.NonNull;
|
||||
import android.annotation.Nullable;
|
||||
import android.os.Bundle;
|
||||
@@ -29,8 +31,6 @@ import java.util.List;
|
||||
public class AutofillFieldClassificationServiceImpl extends AutofillFieldClassificationService {
|
||||
|
||||
private static final String TAG = "AutofillFieldClassificationServiceImpl";
|
||||
// TODO(b/70291841): set to false before launching
|
||||
private static final boolean DEBUG = true;
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
@@ -40,30 +40,13 @@ public class AutofillFieldClassificationServiceImpl extends AutofillFieldClassif
|
||||
if (ArrayUtils.isEmpty(actualValues) || ArrayUtils.isEmpty(userDataValues)) {
|
||||
Log.w(TAG, "getScores(): empty currentvalues (" + actualValues + ") or userValues ("
|
||||
+ userDataValues + ")");
|
||||
// TODO(b/70939974): add unit test
|
||||
return null;
|
||||
}
|
||||
if (algorithmName != null && !algorithmName.equals(EditDistanceScorer.NAME)) {
|
||||
if (algorithmName != null && !algorithmName.equals(DEFAULT_ALGORITHM)) {
|
||||
Log.w(TAG, "Ignoring invalid algorithm (" + algorithmName + ") and using "
|
||||
+ EditDistanceScorer.NAME + " instead");
|
||||
+ DEFAULT_ALGORITHM + " instead");
|
||||
}
|
||||
|
||||
final String actualAlgorithmName = EditDistanceScorer.NAME;
|
||||
final int actualValuesSize = actualValues.size();
|
||||
final int userDataValuesSize = userDataValues.size();
|
||||
if (DEBUG) {
|
||||
Log.d(TAG, "getScores() will return a " + actualValuesSize + "x"
|
||||
+ userDataValuesSize + " matrix for " + actualAlgorithmName);
|
||||
}
|
||||
final float[][] scores = new float[actualValuesSize][userDataValuesSize];
|
||||
|
||||
final EditDistanceScorer algorithm = EditDistanceScorer.getInstance();
|
||||
for (int i = 0; i < actualValuesSize; i++) {
|
||||
for (int j = 0; j < userDataValuesSize; j++) {
|
||||
final float score = algorithm.getScore(actualValues.get(i), userDataValues.get(j));
|
||||
scores[i][j] = score;
|
||||
}
|
||||
}
|
||||
return scores;
|
||||
return EditDistanceScorer.getScores(actualValues, userDataValues);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,53 +16,133 @@
|
||||
package android.ext.services.autofill;
|
||||
|
||||
import android.annotation.NonNull;
|
||||
import android.annotation.Nullable;
|
||||
import android.util.Log;
|
||||
import android.view.autofill.AutofillValue;
|
||||
|
||||
/**
|
||||
* Helper used to calculate the classification score between an actual {@link AutofillValue} filled
|
||||
* by the user and the expected value predicted by an autofill service.
|
||||
*/
|
||||
// TODO(b/70291841): explain algorithm once it's fully implemented
|
||||
import com.android.internal.annotations.VisibleForTesting;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
final class EditDistanceScorer {
|
||||
|
||||
private static final EditDistanceScorer sInstance = new EditDistanceScorer();
|
||||
private static final String TAG = "EditDistanceScorer";
|
||||
|
||||
public static final String NAME = "EDIT_DISTANCE";
|
||||
// TODO(b/70291841): STOPSHIP - set to false before launching
|
||||
private static final boolean DEBUG = true;
|
||||
|
||||
static final String DEFAULT_ALGORITHM = "EDIT_DISTANCE";
|
||||
|
||||
/**
|
||||
* Gets the singleton instance.
|
||||
*/
|
||||
public static EditDistanceScorer getInstance() {
|
||||
return sInstance;
|
||||
}
|
||||
|
||||
private EditDistanceScorer() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the classification score between an actual {@link AutofillValue} filled
|
||||
* by the user and the expected value predicted by an autofill service.
|
||||
*
|
||||
* <p>A full-match is {@code 1.0} (representing 100%), a full mismatch is {@code 0.0} and
|
||||
* partial mathces are something in between, typically using edit-distance algorithms.
|
||||
* Gets the field classification score of 2 values based on the edit distance between them.
|
||||
*
|
||||
* <p>The score is defined as: @(max_length - edit_distance) / max_length
|
||||
*/
|
||||
public float getScore(@NonNull AutofillValue actualValue, @NonNull String userDataValue) {
|
||||
@VisibleForTesting
|
||||
static float getScore(@Nullable AutofillValue actualValue, @Nullable String userDataValue) {
|
||||
if (actualValue == null || !actualValue.isText() || userDataValue == null) return 0;
|
||||
// TODO(b/70291841): implement edit distance - currently it's returning either 0, 100%, or
|
||||
// partial match when number of chars match
|
||||
final String textValue = actualValue.getTextValue().toString();
|
||||
final int total = textValue.length();
|
||||
if (total != userDataValue.length()) return 0F;
|
||||
|
||||
int matches = 0;
|
||||
for (int i = 0; i < total; i++) {
|
||||
if (Character.toLowerCase(textValue.charAt(i)) == Character
|
||||
.toLowerCase(userDataValue.charAt(i))) {
|
||||
matches++;
|
||||
final String actualValueText = actualValue.getTextValue().toString();
|
||||
final int actualValueLength = actualValueText.length();
|
||||
final int userDatalength = userDataValue.length();
|
||||
if (userDatalength == 0) {
|
||||
return (actualValueLength == 0) ? 1 : 0;
|
||||
}
|
||||
|
||||
final int distance = editDistance(actualValueText.toLowerCase(),
|
||||
userDataValue.toLowerCase());
|
||||
final int maxLength = Math.max(actualValueLength, userDatalength);
|
||||
return ((float) maxLength - distance) / maxLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the edit distance (number of insertions, deletions or substitutions to edit one
|
||||
* string into the other) between two strings. In particular, this will compute the Levenshtein
|
||||
* distance.
|
||||
*
|
||||
* <p>See http://en.wikipedia.org/wiki/Levenshtein_distance for details.
|
||||
*
|
||||
* @param s the first string to compare
|
||||
* @param t the second string to compare
|
||||
* @return the edit distance between the two strings
|
||||
*/
|
||||
// Note: copied verbatim from com.android.tools.lint.detector.api.LintUtils.java
|
||||
public static int editDistance(@NonNull String s, @NonNull String t) {
|
||||
return editDistance(s, t, Integer.MAX_VALUE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the edit distance (number of insertions, deletions or substitutions to edit one
|
||||
* string into the other) between two strings. In particular, this will compute the Levenshtein
|
||||
* distance.
|
||||
*
|
||||
* <p>See http://en.wikipedia.org/wiki/Levenshtein_distance for details.
|
||||
*
|
||||
* @param s the first string to compare
|
||||
* @param t the second string to compare
|
||||
* @param max the maximum edit distance that we care about; if for example the string length
|
||||
* delta is greater than this we don't bother computing the exact edit distance since the
|
||||
* caller has indicated they're not interested in the result
|
||||
* @return the edit distance between the two strings, or some other value greater than that if
|
||||
* the edit distance is at least as big as the {@code max} parameter
|
||||
*/
|
||||
// Note: copied verbatim from com.android.tools.lint.detector.api.LintUtils.java
|
||||
private static int editDistance(@NonNull String s, @NonNull String t, int max) {
|
||||
if (s.equals(t)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (Math.abs(s.length() - t.length()) > max) {
|
||||
// The string lengths differ more than the allowed edit distance;
|
||||
// no point in even attempting to compute the edit distance (requires
|
||||
// O(n*m) storage and O(n*m) speed, where n and m are the string lengths)
|
||||
return Integer.MAX_VALUE;
|
||||
}
|
||||
|
||||
int m = s.length();
|
||||
int n = t.length();
|
||||
int[][] d = new int[m + 1][n + 1];
|
||||
for (int i = 0; i <= m; i++) {
|
||||
d[i][0] = i;
|
||||
}
|
||||
for (int j = 0; j <= n; j++) {
|
||||
d[0][j] = j;
|
||||
}
|
||||
for (int j = 1; j <= n; j++) {
|
||||
for (int i = 1; i <= m; i++) {
|
||||
if (s.charAt(i - 1) == t.charAt(j - 1)) {
|
||||
d[i][j] = d[i - 1][j - 1];
|
||||
} else {
|
||||
int deletion = d[i - 1][j] + 1;
|
||||
int insertion = d[i][j - 1] + 1;
|
||||
int substitution = d[i - 1][j - 1] + 1;
|
||||
d[i][j] = Math.min(deletion, Math.min(insertion, substitution));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ((float) matches) / total;
|
||||
return d[m][n];
|
||||
}
|
||||
/**
|
||||
* Gets the scores in a batch.
|
||||
*/
|
||||
static float[][] getScores(@NonNull List<AutofillValue> actualValues,
|
||||
@NonNull List<String> userDataValues) {
|
||||
final int actualValuesSize = actualValues.size();
|
||||
final int userDataValuesSize = userDataValues.size();
|
||||
if (DEBUG) {
|
||||
Log.d(TAG, "getScores() will return a " + actualValuesSize + "x"
|
||||
+ userDataValuesSize + " matrix for " + DEFAULT_ALGORITHM);
|
||||
}
|
||||
final float[][] scores = new float[actualValuesSize][userDataValuesSize];
|
||||
|
||||
for (int i = 0; i < actualValuesSize; i++) {
|
||||
for (int j = 0; j < userDataValuesSize; j++) {
|
||||
final float score = getScore(actualValues.get(i), userDataValues.get(j));
|
||||
scores[i][j] = score;
|
||||
}
|
||||
}
|
||||
return scores;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,59 @@
|
||||
/*
|
||||
* Copyright (C) 2018 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package android.ext.services.autofill;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
|
||||
import static com.google.common.truth.Truth.assertThat;
|
||||
|
||||
import android.view.autofill.AutofillValue;
|
||||
|
||||
/**
|
||||
* Contains the base tests that does not rely on the specific algorithm implementation.
|
||||
*/
|
||||
public class AutofillFieldClassificationServiceImplTest {
|
||||
|
||||
private final AutofillFieldClassificationServiceImpl mService =
|
||||
new AutofillFieldClassificationServiceImpl();
|
||||
|
||||
@Test
|
||||
public void testOnGetScores_nullActualValues() {
|
||||
assertThat(mService.onGetScores(null, null, null, Arrays.asList("whatever"))).isNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOnGetScores_emptyActualValues() {
|
||||
assertThat(mService.onGetScores(null, null, Collections.emptyList(),
|
||||
Arrays.asList("whatever"))).isNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOnGetScores_nullUserDataValues() {
|
||||
assertThat(mService.onGetScores(null, null,
|
||||
Arrays.asList(AutofillValue.forText("whatever")), null)).isNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOnGetScores_emptyUserDataValues() {
|
||||
assertThat(mService.onGetScores(null, null,
|
||||
Arrays.asList(AutofillValue.forText("whatever")), Collections.emptyList()))
|
||||
.isNull();
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2017 The Android Open Source Project
|
||||
* Copyright (C) 2018 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@@ -13,65 +13,109 @@
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package android.ext.services.autofill;
|
||||
|
||||
import static com.google.common.truth.Truth.assertThat;
|
||||
import static android.ext.services.autofill.EditDistanceScorer.getScore;
|
||||
import static android.ext.services.autofill.EditDistanceScorer.getScores;
|
||||
import static android.view.autofill.AutofillValue.forText;
|
||||
|
||||
import static com.google.common.truth.Truth.assertThat;
|
||||
import static com.google.common.truth.Truth.assertWithMessage;
|
||||
|
||||
import android.support.test.runner.AndroidJUnit4;
|
||||
import android.view.autofill.AutofillValue;
|
||||
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
|
||||
@RunWith(AndroidJUnit4.class)
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
public class EditDistanceScorerTest {
|
||||
|
||||
private final EditDistanceScorer mScorer = EditDistanceScorer.getInstance();
|
||||
|
||||
@Test
|
||||
public void testGetScore_nullValue() {
|
||||
assertFloat(mScorer.getScore(null, "D'OH!"), 0);
|
||||
assertFloat(getScore(null, "D'OH!"), 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetScore_nonTextValue() {
|
||||
assertFloat(mScorer.getScore(AutofillValue.forToggle(true), "D'OH!"), 0);
|
||||
assertFloat(getScore(AutofillValue.forToggle(true), "D'OH!"), 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetScore_nullUserData() {
|
||||
assertFloat(mScorer.getScore(AutofillValue.forText("D'OH!"), null), 0);
|
||||
assertFloat(getScore(AutofillValue.forText("D'OH!"), null), 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetScore_fullMatch() {
|
||||
assertFloat(mScorer.getScore(AutofillValue.forText("D'OH!"), "D'OH!"), 1);
|
||||
assertFloat(getScore(AutofillValue.forText("D'OH!"), "D'OH!"), 1);
|
||||
assertFloat(getScore(AutofillValue.forText(""), ""), 1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetScore_fullMatchMixedCase() {
|
||||
assertFloat(mScorer.getScore(AutofillValue.forText("D'OH!"), "D'oH!"), 1);
|
||||
assertFloat(getScore(AutofillValue.forText("D'OH!"), "D'oH!"), 1);
|
||||
}
|
||||
|
||||
// TODO(b/70291841): might need to change it once it supports different sizes
|
||||
@Test
|
||||
public void testGetScore_mismatchDifferentSizes() {
|
||||
assertFloat(mScorer.getScore(AutofillValue.forText("One"), "MoreThanOne"), 0);
|
||||
assertFloat(mScorer.getScore(AutofillValue.forText("MoreThanOne"), "One"), 0);
|
||||
assertFloat(getScore(AutofillValue.forText("X"), "Xy"), 0.50F);
|
||||
assertFloat(getScore(AutofillValue.forText("Xy"), "X"), 0.50F);
|
||||
assertFloat(getScore(AutofillValue.forText("One"), "MoreThanOne"), 0.27F);
|
||||
assertFloat(getScore(AutofillValue.forText("MoreThanOne"), "One"), 0.27F);
|
||||
assertFloat(getScore(AutofillValue.forText("1600 Amphitheatre Parkway"),
|
||||
"1600 Amphitheatre Pkwy"), 0.88F);
|
||||
assertFloat(getScore(AutofillValue.forText("1600 Amphitheatre Pkwy"),
|
||||
"1600 Amphitheatre Parkway"), 0.88F);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetScore_partialMatch() {
|
||||
assertFloat(mScorer.getScore(AutofillValue.forText("Dude"), "Dxxx"), 0.25F);
|
||||
assertFloat(mScorer.getScore(AutofillValue.forText("Dude"), "DUxx"), 0.50F);
|
||||
assertFloat(mScorer.getScore(AutofillValue.forText("Dude"), "DUDx"), 0.75F);
|
||||
assertFloat(mScorer.getScore(AutofillValue.forText("Dxxx"), "Dude"), 0.25F);
|
||||
assertFloat(mScorer.getScore(AutofillValue.forText("DUxx"), "Dude"), 0.50F);
|
||||
assertFloat(mScorer.getScore(AutofillValue.forText("DUDx"), "Dude"), 0.75F);
|
||||
assertFloat(getScore(AutofillValue.forText("Dude"), "Dxxx"), 0.25F);
|
||||
assertFloat(getScore(AutofillValue.forText("Dude"), "DUxx"), 0.50F);
|
||||
assertFloat(getScore(AutofillValue.forText("Dude"), "DUDx"), 0.75F);
|
||||
assertFloat(getScore(AutofillValue.forText("Dxxx"), "Dude"), 0.25F);
|
||||
assertFloat(getScore(AutofillValue.forText("DUxx"), "Dude"), 0.50F);
|
||||
assertFloat(getScore(AutofillValue.forText("DUDx"), "Dude"), 0.75F);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetScores() {
|
||||
final List<AutofillValue> actualValues = Arrays.asList(forText("A"), forText("b"));
|
||||
final List<String> userDataValues = Arrays.asList("a", "B", "ab", "c");
|
||||
final float[][] expectedScores = new float[][] {
|
||||
new float[] { 1F, 0F, 0.5F, 0F },
|
||||
new float[] { 0F, 1F, 0.5F, 0F }
|
||||
};
|
||||
final float[][] actualScores = getScores(actualValues, userDataValues);
|
||||
|
||||
// Unfortunately, Truth does not have an easy way to compare float matrices and show useful
|
||||
// messages in case of error, so we need to check.
|
||||
assertWithMessage("actual=%s, expected=%s", toString(actualScores),
|
||||
toString(expectedScores)).that(actualScores.length).isEqualTo(2);
|
||||
assertWithMessage("actual=%s, expected=%s", toString(actualScores),
|
||||
toString(expectedScores)).that(actualScores[0].length).isEqualTo(4);
|
||||
assertWithMessage("actual=%s, expected=%s", toString(actualScores),
|
||||
toString(expectedScores)).that(actualScores[1].length).isEqualTo(4);
|
||||
for (int i = 0; i < actualScores.length; i++) {
|
||||
final float[] line = actualScores[i];
|
||||
for (int j = 0; j < line.length; j++) {
|
||||
float cell = line[j];
|
||||
assertWithMessage("wrong score at [%s, %s]", i, j).that(cell).isWithin(0.01F)
|
||||
.of(expectedScores[i][j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static void assertFloat(float actualValue, float expectedValue) {
|
||||
assertThat(actualValue).isWithin(1.0e-10f).of(expectedValue);
|
||||
assertThat(actualValue).isWithin(0.01F).of(expectedValue);
|
||||
}
|
||||
|
||||
public static String toString(float[][] matrix) {
|
||||
final StringBuilder string = new StringBuilder("[ ");
|
||||
for (int i = 0; i < matrix.length; i++) {
|
||||
string.append(Arrays.toString(matrix[i])).append(" ");
|
||||
}
|
||||
return string.append(" ]").toString();
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user