Merge "Introduce script matching for enabling default IME subtypes." into nyc-dev

This commit is contained in:
Seigo Nonaka
2016-03-07 21:28:53 +00:00
committed by Android (Google) Code Review
2 changed files with 365 additions and 89 deletions

View File

@@ -18,15 +18,17 @@ package com.android.internal.inputmethod;
import com.android.internal.annotations.VisibleForTesting;
import android.annotation.IntRange;
import android.annotation.NonNull;
import android.annotation.Nullable;
import android.text.TextUtils;
import android.icu.util.ULocale;
import android.util.LocaleList;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Objects;
public final class LocaleUtils {
@@ -36,12 +38,138 @@ public final class LocaleUtils {
Locale get(@Nullable T source);
}
@Nullable
private static String getLanguage(@Nullable Locale locale) {
if (locale == null) {
return null;
/**
* Calculates a matching score for the single desired locale.
*
* @see LocaleUtils#calculateMatchingScore(ULocale, LocaleList, byte[])
*
* @param supported The locale supported by IME subtype.
* @param desired The locale preferred by user.
* @return A score based on the locale matching for the default subtype enabling.
*/
@IntRange(from=1, to=3)
private static byte calculateMatchingSubScore(@NonNull final ULocale supported,
@NonNull final ULocale desired) {
// Assuming supported/desired is fully expanded.
if (supported.equals(desired)) {
return 3; // Exact match.
}
// Skip language matching since it was already done in calculateMatchingScore.
final String supportedScript = supported.getScript();
if (supportedScript.isEmpty() || !supportedScript.equals(desired.getScript())) {
// TODO: Need subscript matching. For example, Hanb should match with Bopo.
return 1;
}
final String supportedCountry = supported.getCountry();
if (supportedCountry.isEmpty() || !supportedCountry.equals(desired.getCountry())) {
return 2;
}
// Ignore others e.g. variants, extensions.
return 3;
}
/**
* Calculates a matching score for the desired locale list.
*
* <p>The supported locale gets a matching score of 3 if all language, script and country of the
* supported locale matches with the desired locale. The supported locale gets a matching
* score of 2 if the language and script of the supported locale matches with the desired
* locale. The supported locale gets a matching score of 1 if only language of the supported
* locale matches with the desired locale. The supported locale gets a matching score of 0 if
* the language of the supported locale doesn't match with the desired locale.</p>
*
* @param supported The locale supported by IME subtyle.
* @param desired The locale list preferred by user. Typically system locale list.
* @param out The output buffer to be stored the individual score for the desired language list.
* The length of {@code out} must be same as the length of {@code desired} language list.
* @return {@code false} if supported locale doesn't match with any desired locale list.
* Otherwise {@code true}.
*/
private static boolean calculateMatchingScore(@NonNull final ULocale supported,
@NonNull final LocaleList desired, @NonNull byte[] out) {
if (desired.isEmpty()) {
return false;
}
boolean allZeros = true;
final int N = desired.size();
for (int i = 0; i < N; ++i) {
final Locale locale = desired.get(i);
if (!locale.getLanguage().equals(supported.getLanguage())) {
// TODO: cache the result of addLikelySubtags if it is slow.
out[i] = 0;
} else {
out[i] = calculateMatchingSubScore(
supported, ULocale.addLikelySubtags(ULocale.forLocale(locale)));
if (allZeros && out[i] != 0) {
allZeros = false;
}
}
}
return !allZeros;
}
private static final class ScoreEntry implements Comparable<ScoreEntry> {
public int mIndex = -1;
@NonNull public final byte[] mScore; // matching score of the i-th system languages.
ScoreEntry(@NonNull byte[] score, int index) {
mScore = new byte[score.length];
set(score, index);
}
private void set(@NonNull byte[] score, int index) {
for (int i = 0; i < mScore.length; ++i) {
mScore[i] = score[i];
}
mIndex = index;
}
/**
* Update score and index if the given score is better than this.
*/
public void updateIfBetter(@NonNull byte[] score, int index) {
if (compare(mScore, score) == -1) { // mScore < score
set(score, index);
}
}
/**
* Provides comaprison for bytes[].
*
* <p> Comparison does as follows. If the first value of {@code left} is larger than the
* first value of {@code right}, {@code left} is large than {@code right}. If the first
* value of {@code left} is less than the first value of {@code right}, {@code left} is less
* than {@code right}. If the first value of {@code left} and the first value of
* {@code right} is equal, do the same comparison to the next value. Finally if all values
* in {@code left} and {@code right} are equal, {@code left} and {@code right} is equal.</p>
*
* @param left The length must be equal to {@code right}.
* @param right The length must be equal to {@code left}.
* @return 1 if {@code left} is larger than {@code right}. -1 if {@code left} is less than
* {@code right}. 0 if {@code left} and {@code right} is equal.
*/
@IntRange(from=-1, to=1)
private static int compare(@NonNull byte[] left, @NonNull byte[] right) {
for (int i = 0; i < left.length; ++i) {
if (left[i] > right[i]) {
return 1;
} else if (left[i] < right[i]) {
return -1;
}
}
return 0;
}
@Override
public int compareTo(final ScoreEntry other) {
return -1 * compare(mScore, other.mScore); // Order by descending order.
}
return locale.getLanguage();
}
/**
@@ -52,14 +180,8 @@ public final class LocaleUtils {
* {@code "en-GB", "ja", "en-AU", "fr-CA", "en-IN"} is specified to {@code preferredLanguages},
* this method tries to copy at most one English locale, at most one Japanese, and at most one
* French locale from {@code source} to {@code dest}. Here the best matching English locale
* will be searched from {@code source} as follows.
* <ol>
* <li>The first instance in {@code sources} that exactly matches {@code "en-GB"}</li>
* <li>The first instance in {@code sources} that exactly matches {@code "en-AU"}</li>
* <li>The first instance in {@code sources} that exactly matches {@code "en-IN"}</li>
* <li>The first instance in {@code sources} that partially matches {@code "en"}</li>
* </ol>
* <p>Then this method iterates the same algorithm for Japanese then French.</p>
* will be searched from {@code source} based on matching score. For the score design, see
* {@link LocaleUtils#calculateMatchingScore(ULocale, LocaleList, byte[])}</p>
*
* @param sources Source items to be filtered.
* @param extractor Type converter from the source items to {@link Locale} object.
@@ -74,69 +196,31 @@ public final class LocaleUtils {
@NonNull LocaleExtractor<T> extractor,
@NonNull LocaleList preferredLanguages,
@NonNull ArrayList<T> dest) {
final Locale[] availableLocales = new Locale[sources.size()];
for (int i = 0; i < availableLocales.length; ++i) {
availableLocales[i] = extractor.get(sources.get(i));
}
final Locale[] sortedPreferredLanguages = new Locale[preferredLanguages.size()];
if (sortedPreferredLanguages.length > 0) {
int nextIndex = 0;
final int N = preferredLanguages.size();
languageLoop:
for (int i = 0; i < N; ++i) {
final String language = getLanguage(preferredLanguages.get(i));
for (int j = 0; j < nextIndex; ++j) {
if (TextUtils.equals(getLanguage(sortedPreferredLanguages[j]), language)) {
continue languageLoop;
}
}
for (int j = i; j < N; ++j) {
final Locale locale = preferredLanguages.get(j);
if (TextUtils.equals(language, getLanguage(locale))) {
sortedPreferredLanguages[nextIndex] = locale;
++nextIndex;
}
}
final HashMap<String, ScoreEntry> scoreboard = new HashMap<>();
final byte[] score = new byte[preferredLanguages.size()];
final int sourceSize = sources.size();
for (int i = 0; i < sourceSize; ++i) {
final Locale locale = extractor.get(sources.get(i));
if (locale == null ||
!calculateMatchingScore(ULocale.addLikelySubtags(ULocale.forLocale(locale)),
preferredLanguages, score)) {
continue;
}
final String lang = locale.getLanguage();
final ScoreEntry bestScore = scoreboard.get(lang);
if (bestScore == null) {
scoreboard.put(lang, new ScoreEntry(score, i));
} else {
bestScore.updateIfBetter(score, i);
}
}
for (int languageIndex = 0; languageIndex < sortedPreferredLanguages.length;) {
// Finding the range.
final String language = getLanguage(sortedPreferredLanguages[languageIndex]);
int nextLanguageIndex = languageIndex;
for (; nextLanguageIndex < sortedPreferredLanguages.length; ++nextLanguageIndex) {
final Locale locale = sortedPreferredLanguages[nextLanguageIndex];
if (!TextUtils.equals(getLanguage(locale), language)) {
break;
}
}
// Check exact match
boolean found = false;
for (int i = languageIndex; !found && i < nextLanguageIndex; ++i) {
final Locale locale = sortedPreferredLanguages[i];
for (int j = 0; j < availableLocales.length; ++j) {
if (!Objects.equals(locale, availableLocales[j])) {
continue;
}
dest.add(sources.get(j));
found = true;
break;
}
}
if (!found) {
// No exact match. Use language match.
for (int j = 0; j < availableLocales.length; ++j) {
if (!TextUtils.equals(language, getLanguage(availableLocales[j]))) {
continue;
}
dest.add(sources.get(j));
break;
}
}
languageIndex = nextLanguageIndex;
final ScoreEntry[] result = scoreboard.values().toArray(new ScoreEntry[scoreboard.size()]);
Arrays.sort(result);
for (final ScoreEntry entry : result) {
dest.add(sources.get(entry.mIndex));
}
}
}
}

View File

@@ -49,6 +49,22 @@ public class LocaleUtilsTest extends InstrumentationTestCase {
assertEquals(0, dest.size());
}
@SmallTest
public void testFilterDoesNotMatchAnything() throws Exception {
final ArrayList<Locale> availableLocales = new ArrayList<>();
availableLocales.add(Locale.forLanguageTag("en-US"));
availableLocales.add(Locale.forLanguageTag("fr-CA"));
availableLocales.add(Locale.forLanguageTag("in"));
availableLocales.add(Locale.forLanguageTag("ja"));
availableLocales.add(Locale.forLanguageTag("fil"));
final LocaleList preferredLocales = LocaleList.forLanguageTags("zh-Hans-TW");
final ArrayList<Locale> dest = new ArrayList<>();
LocaleUtils.filterByLanguage(availableLocales, sIdentityMapper, preferredLocales, dest);
assertEquals(0, dest.size());
}
@SmallTest
public void testFilterByLanguageEmptySource() throws Exception {
final ArrayList<Locale> availableLocales = new ArrayList<>();
@@ -124,21 +140,36 @@ public class LocaleUtilsTest extends InstrumentationTestCase {
@SmallTest
public void testFilterByLanguage() throws Exception {
final ArrayList<Locale> availableLocales = new ArrayList<>();
availableLocales.add(Locale.forLanguageTag("en-US"));
availableLocales.add(Locale.forLanguageTag("fr-CA"));
availableLocales.add(Locale.forLanguageTag("in"));
availableLocales.add(Locale.forLanguageTag("ja"));
availableLocales.add(Locale.forLanguageTag("fil"));
{
final ArrayList<Locale> availableLocales = new ArrayList<>();
availableLocales.add(Locale.forLanguageTag("en-US"));
availableLocales.add(Locale.forLanguageTag("fr-CA"));
availableLocales.add(Locale.forLanguageTag("in"));
availableLocales.add(Locale.forLanguageTag("ja"));
availableLocales.add(Locale.forLanguageTag("fil"));
final LocaleList preferredLocales = LocaleList.forLanguageTags("fr,en-US,ja-JP");
final LocaleList preferredLocales = LocaleList.forLanguageTags("fr,en-US,ja-JP");
final ArrayList<Locale> dest = new ArrayList<>();
LocaleUtils.filterByLanguage(availableLocales, sIdentityMapper, preferredLocales, dest);
assertEquals(3, dest.size());
assertEquals(availableLocales.get(1), dest.get(0)); // "fr-CA"
assertEquals(availableLocales.get(0), dest.get(1)); // "en-US"
assertEquals(availableLocales.get(3), dest.get(2)); // "ja"
final ArrayList<Locale> dest = new ArrayList<>();
LocaleUtils.filterByLanguage(availableLocales, sIdentityMapper, preferredLocales, dest);
assertEquals(3, dest.size());
assertEquals(availableLocales.get(1), dest.get(0)); // "fr-CA"
assertEquals(availableLocales.get(0), dest.get(1)); // "en-US"
assertEquals(availableLocales.get(3), dest.get(2)); // "ja"
}
{
final ArrayList<Locale> availableLocales = new ArrayList<>();
availableLocales.add(Locale.forLanguageTag("en-US"));
availableLocales.add(Locale.forLanguageTag("en-GB"));
availableLocales.add(Locale.forLanguageTag("en-IN"));
final LocaleList preferredLocales = LocaleList.forLanguageTags("en-US");
final ArrayList<Locale> dest = new ArrayList<>();
LocaleUtils.filterByLanguage(availableLocales, sIdentityMapper, preferredLocales, dest);
assertEquals(1, dest.size());
assertEquals(availableLocales.get(0), dest.get(0)); // "en-US"
}
}
@SmallTest
@@ -191,4 +222,165 @@ public class LocaleUtilsTest extends InstrumentationTestCase {
assertEquals(availableLocales.get(1), dest.get(0)); // "en-CA"
}
}
@SmallTest
public void testFilterByLanguageFallbackRules() throws Exception {
{
final LocaleList preferredLocales = LocaleList.forLanguageTags("sr-Latn-RS");
final ArrayList<Locale> availableLocales = new ArrayList<>();
availableLocales.add(Locale.forLanguageTag("sr-Cyrl-BA"));
availableLocales.add(Locale.forLanguageTag("sr-Cyrl-CS"));
availableLocales.add(Locale.forLanguageTag("sr-Cyrl-ME"));
availableLocales.add(Locale.forLanguageTag("sr-Cyrl-RS"));
availableLocales.add(Locale.forLanguageTag("sr-Latn-BA"));
availableLocales.add(Locale.forLanguageTag("sr-Latn-CS"));
availableLocales.add(Locale.forLanguageTag("sr-Latn-ME"));
availableLocales.add(Locale.forLanguageTag("sr-Latn-RS"));
final ArrayList<Locale> dest = new ArrayList<>();
LocaleUtils.filterByLanguage(availableLocales, sIdentityMapper, preferredLocales, dest);
assertEquals(1, dest.size());
assertEquals(availableLocales.get(7), dest.get(0)); // "sr-Latn-RS"
}
{
final LocaleList preferredLocales = LocaleList.forLanguageTags("sr-Latn-RS-x-android");
final ArrayList<Locale> availableLocales = new ArrayList<>();
availableLocales.add(Locale.forLanguageTag("sr-Cyrl-BA"));
availableLocales.add(Locale.forLanguageTag("sr-Cyrl-CS"));
availableLocales.add(Locale.forLanguageTag("sr-Cyrl-ME"));
availableLocales.add(Locale.forLanguageTag("sr-Cyrl-RS"));
availableLocales.add(Locale.forLanguageTag("sr-Latn-BA"));
availableLocales.add(Locale.forLanguageTag("sr-Latn-CS"));
availableLocales.add(Locale.forLanguageTag("sr-Latn-ME"));
availableLocales.add(Locale.forLanguageTag("sr-Latn-RS"));
final ArrayList<Locale> dest = new ArrayList<>();
LocaleUtils.filterByLanguage(availableLocales, sIdentityMapper, preferredLocales, dest);
assertEquals(1, dest.size());
assertEquals(availableLocales.get(7), dest.get(0)); // "sr-Latn-RS"
}
{
final LocaleList preferredLocales = LocaleList.forLanguageTags("sr-Latn-RS");
final ArrayList<Locale> availableLocales = new ArrayList<>();
availableLocales.add(Locale.forLanguageTag("sr-Cyrl-BA-x-android"));
availableLocales.add(Locale.forLanguageTag("sr-Cyrl-CS-x-android"));
availableLocales.add(Locale.forLanguageTag("sr-Cyrl-ME-x-android"));
availableLocales.add(Locale.forLanguageTag("sr-Cyrl-RS-x-android"));
availableLocales.add(Locale.forLanguageTag("sr-Latn-BA-x-android"));
availableLocales.add(Locale.forLanguageTag("sr-Latn-CS-x-android"));
availableLocales.add(Locale.forLanguageTag("sr-Latn-ME-x-android"));
availableLocales.add(Locale.forLanguageTag("sr-Latn-RS-x-android"));
final ArrayList<Locale> dest = new ArrayList<>();
LocaleUtils.filterByLanguage(availableLocales, sIdentityMapper, preferredLocales, dest);
assertEquals(1, dest.size());
assertEquals(availableLocales.get(7), dest.get(0)); // "sr-Latn-RS-x-android"
}
{
final LocaleList preferredLocales = LocaleList.forLanguageTags("sr-Latn-RS");
final ArrayList<Locale> availableLocales = new ArrayList<>();
availableLocales.add(Locale.forLanguageTag("sr"));
availableLocales.add(Locale.forLanguageTag("sr-Cyrl"));
availableLocales.add(Locale.forLanguageTag("sr-Latn"));
final ArrayList<Locale> dest = new ArrayList<>();
LocaleUtils.filterByLanguage(availableLocales, sIdentityMapper, preferredLocales, dest);
assertEquals(1, dest.size());
assertEquals(availableLocales.get(2), dest.get(0)); // "sr-Latn"
}
{
final LocaleList preferredLocales = LocaleList.forLanguageTags("sr-RS");
final ArrayList<Locale> availableLocales = new ArrayList<>();
availableLocales.add(Locale.forLanguageTag("sr"));
availableLocales.add(Locale.forLanguageTag("sr-RS"));
availableLocales.add(Locale.forLanguageTag("sr-Latn"));
final ArrayList<Locale> dest = new ArrayList<>();
LocaleUtils.filterByLanguage(availableLocales, sIdentityMapper, preferredLocales, dest);
assertEquals(1, dest.size());
assertEquals(availableLocales.get(0), dest.get(0)); // "sr"
}
{
final LocaleList preferredLocales = LocaleList.forLanguageTags("sr-Latn");
final ArrayList<Locale> availableLocales = new ArrayList<>();
availableLocales.add(Locale.forLanguageTag("sr"));
availableLocales.add(Locale.forLanguageTag("sr-RS"));
availableLocales.add(Locale.forLanguageTag("sr-Latn"));
final ArrayList<Locale> dest = new ArrayList<>();
LocaleUtils.filterByLanguage(availableLocales, sIdentityMapper, preferredLocales, dest);
assertEquals(1, dest.size());
assertEquals(availableLocales.get(2), dest.get(0)); // "sr-Latn"
}
{
final LocaleList preferredLocales = LocaleList.forLanguageTags("sr");
final ArrayList<Locale> availableLocales = new ArrayList<>();
availableLocales.add(Locale.forLanguageTag("sr"));
availableLocales.add(Locale.forLanguageTag("sr-RS"));
availableLocales.add(Locale.forLanguageTag("sr-Latn"));
final ArrayList<Locale> dest = new ArrayList<>();
LocaleUtils.filterByLanguage(availableLocales, sIdentityMapper, preferredLocales, dest);
assertEquals(1, dest.size());
assertEquals(availableLocales.get(0), dest.get(0)); // "sr"
}
{
final LocaleList preferredLocales = LocaleList.forLanguageTags("sr");
final ArrayList<Locale> availableLocales = new ArrayList<>();
availableLocales.add(Locale.forLanguageTag("sr-Latn"));
availableLocales.add(Locale.forLanguageTag("sr-RS"));
availableLocales.add(Locale.forLanguageTag("sr"));
final ArrayList<Locale> dest = new ArrayList<>();
LocaleUtils.filterByLanguage(availableLocales, sIdentityMapper, preferredLocales, dest);
assertEquals(1, dest.size());
assertEquals(availableLocales.get(1), dest.get(0)); // "sr-RS"
}
{
final LocaleList preferredLocales = LocaleList.forLanguageTags("sr");
final ArrayList<Locale> availableLocales = new ArrayList<>();
availableLocales.add(Locale.forLanguageTag("sr-Cyrl-RS"));
availableLocales.add(Locale.forLanguageTag("sr-Latn-RS"));
final ArrayList<Locale> dest = new ArrayList<>();
LocaleUtils.filterByLanguage(availableLocales, sIdentityMapper, preferredLocales, dest);
assertEquals(1, dest.size());
assertEquals(availableLocales.get(0), dest.get(0)); // "sr-Cyrl-RS"
}
{
final LocaleList preferredLocales = LocaleList.forLanguageTags("sr-Latn");
final ArrayList<Locale> availableLocales = new ArrayList<>();
availableLocales.add(Locale.forLanguageTag("sr-Latn-RS"));
availableLocales.add(Locale.forLanguageTag("sr-Cyrl-RS"));
final ArrayList<Locale> dest = new ArrayList<>();
LocaleUtils.filterByLanguage(availableLocales, sIdentityMapper, preferredLocales, dest);
assertEquals(1, dest.size());
assertEquals(availableLocales.get(0), dest.get(0)); // "sr-Latn-RS"
}
}
public void testFilterKnownLimitation() throws Exception {
// Following test cases are not for intentional behavior but checks for preventing the
// behavior from becoming worse.
{
final LocaleList preferredLocales = LocaleList.forLanguageTags("ja-Hrkt");
final ArrayList<Locale> availableLocales = new ArrayList<>();
availableLocales.add(Locale.forLanguageTag("ja-Jpan"));
availableLocales.add(Locale.forLanguageTag("ja-Hrkt"));
final ArrayList<Locale> dest = new ArrayList<>();
LocaleUtils.filterByLanguage(availableLocales, sIdentityMapper, preferredLocales, dest);
assertEquals(1, dest.size());
// Should be ja-Jpan since it supports ja-Hrkt and listed before ja-Hrkt.
assertEquals(availableLocales.get(1), dest.get(0));
}
{
final LocaleList preferredLocales = LocaleList.forLanguageTags("zh-Hani");
final ArrayList<Locale> availableLocales = new ArrayList<>();
availableLocales.add(Locale.forLanguageTag("zh-Hans"));
availableLocales.add(Locale.forLanguageTag("zh-Hant"));
availableLocales.add(Locale.forLanguageTag("zh-Hanb"));
availableLocales.add(Locale.forLanguageTag("zh-Hani"));
final ArrayList<Locale> dest = new ArrayList<>();
LocaleUtils.filterByLanguage(availableLocales, sIdentityMapper, preferredLocales, dest);
assertEquals(1, dest.size());
// Should be zh-Hans since it supports zh-Hani. Also zh-Hant, zh-Hanb supports zh-Hani.
assertEquals(availableLocales.get(3), dest.get(0));
}
}
}