Merge "Refactor WordIterator to avoid copying to internal buffers"
This commit is contained in:
committed by
Android (Google) Code Review
commit
ca132ca6a1
108
core/java/android/text/CharSequenceCharacterIterator.java
Normal file
108
core/java/android/text/CharSequenceCharacterIterator.java
Normal file
@@ -0,0 +1,108 @@
|
||||
/*
|
||||
* Copyright (C) 2016 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package android.text;
|
||||
|
||||
import android.annotation.NonNull;
|
||||
|
||||
import java.text.CharacterIterator;
|
||||
|
||||
/**
|
||||
* An implementation of {@link java.text.CharacterIterator} that iterates over a given CharSequence.
|
||||
* {@hide}
|
||||
*/
|
||||
public class CharSequenceCharacterIterator implements CharacterIterator {
|
||||
private final int mBeginIndex, mEndIndex;
|
||||
private int mIndex;
|
||||
private final CharSequence mCharSeq;
|
||||
|
||||
/**
|
||||
* Constructs the iterator given a CharSequence and a range. The position of the iterator index
|
||||
* is set to the beginning of the range.
|
||||
*/
|
||||
public CharSequenceCharacterIterator(@NonNull CharSequence text, int start, int end) {
|
||||
mCharSeq = text;
|
||||
mBeginIndex = mIndex = start;
|
||||
mEndIndex = end;
|
||||
}
|
||||
|
||||
public char first() {
|
||||
mIndex = mBeginIndex;
|
||||
return current();
|
||||
}
|
||||
|
||||
public char last() {
|
||||
if (mBeginIndex == mEndIndex) {
|
||||
mIndex = mEndIndex;
|
||||
return DONE;
|
||||
} else {
|
||||
mIndex = mEndIndex - 1;
|
||||
return mCharSeq.charAt(mIndex);
|
||||
}
|
||||
}
|
||||
|
||||
public char current() {
|
||||
return (mIndex == mEndIndex) ? DONE : mCharSeq.charAt(mIndex);
|
||||
}
|
||||
|
||||
public char next() {
|
||||
mIndex++;
|
||||
if (mIndex >= mEndIndex) {
|
||||
mIndex = mEndIndex;
|
||||
return DONE;
|
||||
} else {
|
||||
return mCharSeq.charAt(mIndex);
|
||||
}
|
||||
}
|
||||
|
||||
public char previous() {
|
||||
if (mIndex <= mBeginIndex) {
|
||||
return DONE;
|
||||
} else {
|
||||
mIndex--;
|
||||
return mCharSeq.charAt(mIndex);
|
||||
}
|
||||
}
|
||||
|
||||
public char setIndex(int position) {
|
||||
if (mBeginIndex <= position && position <= mEndIndex) {
|
||||
mIndex = position;
|
||||
return current();
|
||||
} else {
|
||||
throw new IllegalArgumentException("invalid position");
|
||||
}
|
||||
}
|
||||
|
||||
public int getBeginIndex() {
|
||||
return mBeginIndex;
|
||||
}
|
||||
|
||||
public int getEndIndex() {
|
||||
return mEndIndex;
|
||||
}
|
||||
|
||||
public int getIndex() {
|
||||
return mIndex;
|
||||
}
|
||||
|
||||
public Object clone() {
|
||||
try {
|
||||
return super.clone();
|
||||
} catch (CloneNotSupportedException e) {
|
||||
throw new InternalError();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,3 @@
|
||||
|
||||
/*
|
||||
* Copyright (C) 2011 The Android Open Source Project
|
||||
*
|
||||
@@ -17,10 +16,11 @@
|
||||
|
||||
package android.text.method;
|
||||
|
||||
import android.text.Selection;
|
||||
import android.text.SpannableStringBuilder;
|
||||
|
||||
import android.annotation.NonNull;
|
||||
import android.icu.text.BreakIterator;
|
||||
import android.text.Selection;
|
||||
import android.text.CharSequenceCharacterIterator;
|
||||
|
||||
import java.util.Locale;
|
||||
|
||||
/**
|
||||
@@ -35,10 +35,9 @@ public class WordIterator implements Selection.PositionIterator {
|
||||
// Size of the window for the word iterator, should be greater than the longest word's length
|
||||
private static final int WINDOW_WIDTH = 50;
|
||||
|
||||
private String mString;
|
||||
private int mOffsetShift;
|
||||
|
||||
private BreakIterator mIterator;
|
||||
private int mStart, mEnd;
|
||||
private CharSequence mCharSeq;
|
||||
private final BreakIterator mIterator;
|
||||
|
||||
/**
|
||||
* Constructs a WordIterator using the default locale.
|
||||
@@ -49,59 +48,49 @@ public class WordIterator implements Selection.PositionIterator {
|
||||
|
||||
/**
|
||||
* Constructs a new WordIterator for the specified locale.
|
||||
* @param locale The locale to be used when analysing the text.
|
||||
* @param locale The locale to be used for analyzing the text.
|
||||
*/
|
||||
public WordIterator(Locale locale) {
|
||||
mIterator = BreakIterator.getWordInstance(locale);
|
||||
}
|
||||
|
||||
public void setCharSequence(CharSequence charSequence, int start, int end) {
|
||||
mOffsetShift = Math.max(0, start - WINDOW_WIDTH);
|
||||
final int windowEnd = Math.min(charSequence.length(), end + WINDOW_WIDTH);
|
||||
|
||||
if (charSequence instanceof SpannableStringBuilder) {
|
||||
mString = ((SpannableStringBuilder) charSequence).substring(mOffsetShift, windowEnd);
|
||||
public void setCharSequence(@NonNull CharSequence charSequence, int start, int end) {
|
||||
if (0 <= start && end <= charSequence.length()) {
|
||||
mCharSeq = charSequence;
|
||||
mStart = Math.max(0, start - WINDOW_WIDTH);
|
||||
mEnd = Math.min(charSequence.length(), end + WINDOW_WIDTH);
|
||||
mIterator.setText(new CharSequenceCharacterIterator(charSequence, mStart, mEnd));
|
||||
} else {
|
||||
mString = charSequence.subSequence(mOffsetShift, windowEnd).toString();
|
||||
throw new IndexOutOfBoundsException("input indexes are outside the CharSequence");
|
||||
}
|
||||
mIterator.setText(mString);
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
public int preceding(int offset) {
|
||||
int shiftedOffset = offset - mOffsetShift;
|
||||
checkOffsetIsValid(shiftedOffset);
|
||||
do {
|
||||
shiftedOffset = mIterator.preceding(shiftedOffset);
|
||||
if (shiftedOffset == BreakIterator.DONE) {
|
||||
return BreakIterator.DONE;
|
||||
checkOffsetIsValid(offset);
|
||||
while (true) {
|
||||
offset = mIterator.preceding(offset);
|
||||
if (offset == BreakIterator.DONE || isOnLetterOrDigit(offset)) {
|
||||
return offset;
|
||||
}
|
||||
if (isOnLetterOrDigit(shiftedOffset)) {
|
||||
return shiftedOffset + mOffsetShift;
|
||||
}
|
||||
} while (true);
|
||||
}
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
public int following(int offset) {
|
||||
int shiftedOffset = offset - mOffsetShift;
|
||||
checkOffsetIsValid(shiftedOffset);
|
||||
do {
|
||||
shiftedOffset = mIterator.following(shiftedOffset);
|
||||
if (shiftedOffset == BreakIterator.DONE) {
|
||||
return BreakIterator.DONE;
|
||||
checkOffsetIsValid(offset);
|
||||
while (true) {
|
||||
offset = mIterator.following(offset);
|
||||
if (offset == BreakIterator.DONE || isAfterLetterOrDigit(offset)) {
|
||||
return offset;
|
||||
}
|
||||
if (isAfterLetterOrDigit(shiftedOffset)) {
|
||||
return shiftedOffset + mOffsetShift;
|
||||
}
|
||||
} while (true);
|
||||
}
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
public boolean isBoundary(int offset) {
|
||||
int shiftedOffset = offset - mOffsetShift;
|
||||
checkOffsetIsValid(shiftedOffset);
|
||||
return mIterator.isBoundary(shiftedOffset);
|
||||
checkOffsetIsValid(offset);
|
||||
return mIterator.isBoundary(offset);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -112,13 +101,8 @@ public class WordIterator implements Selection.PositionIterator {
|
||||
* @return the position of the last boundary preceding the given offset.
|
||||
*/
|
||||
public int nextBoundary(int offset) {
|
||||
int shiftedOffset = offset - mOffsetShift;
|
||||
checkOffsetIsValid(shiftedOffset);
|
||||
shiftedOffset = mIterator.following(shiftedOffset);
|
||||
if (shiftedOffset == BreakIterator.DONE) {
|
||||
return BreakIterator.DONE;
|
||||
}
|
||||
return shiftedOffset + mOffsetShift;
|
||||
checkOffsetIsValid(offset);
|
||||
return mIterator.following(offset);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -129,13 +113,8 @@ public class WordIterator implements Selection.PositionIterator {
|
||||
* @return the position of the last boundary preceding the given offset.
|
||||
*/
|
||||
public int prevBoundary(int offset) {
|
||||
int shiftedOffset = offset - mOffsetShift;
|
||||
checkOffsetIsValid(shiftedOffset);
|
||||
shiftedOffset = mIterator.preceding(shiftedOffset);
|
||||
if (shiftedOffset == BreakIterator.DONE) {
|
||||
return BreakIterator.DONE;
|
||||
}
|
||||
return shiftedOffset + mOffsetShift;
|
||||
checkOffsetIsValid(offset);
|
||||
return mIterator.preceding(offset);
|
||||
}
|
||||
|
||||
/** If <code>offset</code> is within a word, returns the index of the first character of that
|
||||
@@ -228,20 +207,19 @@ public class WordIterator implements Selection.PositionIterator {
|
||||
* @throws IllegalArgumentException is offset is not valid.
|
||||
*/
|
||||
private int getBeginning(int offset, boolean getPrevWordBeginningOnTwoWordsBoundary) {
|
||||
final int shiftedOffset = offset - mOffsetShift;
|
||||
checkOffsetIsValid(shiftedOffset);
|
||||
checkOffsetIsValid(offset);
|
||||
|
||||
if (isOnLetterOrDigit(shiftedOffset)) {
|
||||
if (mIterator.isBoundary(shiftedOffset)
|
||||
&& (!isAfterLetterOrDigit(shiftedOffset)
|
||||
if (isOnLetterOrDigit(offset)) {
|
||||
if (mIterator.isBoundary(offset)
|
||||
&& (!isAfterLetterOrDigit(offset)
|
||||
|| !getPrevWordBeginningOnTwoWordsBoundary)) {
|
||||
return shiftedOffset + mOffsetShift;
|
||||
return offset;
|
||||
} else {
|
||||
return mIterator.preceding(shiftedOffset) + mOffsetShift;
|
||||
return mIterator.preceding(offset);
|
||||
}
|
||||
} else {
|
||||
if (isAfterLetterOrDigit(shiftedOffset)) {
|
||||
return mIterator.preceding(shiftedOffset) + mOffsetShift;
|
||||
if (isAfterLetterOrDigit(offset)) {
|
||||
return mIterator.preceding(offset);
|
||||
}
|
||||
}
|
||||
return BreakIterator.DONE;
|
||||
@@ -264,19 +242,18 @@ public class WordIterator implements Selection.PositionIterator {
|
||||
* @throws IllegalArgumentException is offset is not valid.
|
||||
*/
|
||||
private int getEnd(int offset, boolean getNextWordEndOnTwoWordBoundary) {
|
||||
final int shiftedOffset = offset - mOffsetShift;
|
||||
checkOffsetIsValid(shiftedOffset);
|
||||
checkOffsetIsValid(offset);
|
||||
|
||||
if (isAfterLetterOrDigit(shiftedOffset)) {
|
||||
if (mIterator.isBoundary(shiftedOffset)
|
||||
&& (!isOnLetterOrDigit(shiftedOffset) || !getNextWordEndOnTwoWordBoundary)) {
|
||||
return shiftedOffset + mOffsetShift;
|
||||
if (isAfterLetterOrDigit(offset)) {
|
||||
if (mIterator.isBoundary(offset)
|
||||
&& (!isOnLetterOrDigit(offset) || !getNextWordEndOnTwoWordBoundary)) {
|
||||
return offset;
|
||||
} else {
|
||||
return mIterator.following(shiftedOffset) + mOffsetShift;
|
||||
return mIterator.following(offset);
|
||||
}
|
||||
} else {
|
||||
if (isOnLetterOrDigit(shiftedOffset)) {
|
||||
return mIterator.following(shiftedOffset) + mOffsetShift;
|
||||
if (isOnLetterOrDigit(offset)) {
|
||||
return mIterator.following(offset);
|
||||
}
|
||||
}
|
||||
return BreakIterator.DONE;
|
||||
@@ -290,7 +267,7 @@ public class WordIterator implements Selection.PositionIterator {
|
||||
* @param offset the offset to search from.
|
||||
*/
|
||||
public int getPunctuationBeginning(int offset) {
|
||||
checkOffsetIsValid(offset - mOffsetShift);
|
||||
checkOffsetIsValid(offset);
|
||||
while (offset != BreakIterator.DONE && !isPunctuationStartBoundary(offset)) {
|
||||
offset = prevBoundary(offset);
|
||||
}
|
||||
@@ -306,7 +283,7 @@ public class WordIterator implements Selection.PositionIterator {
|
||||
* @param offset the offset to search from.
|
||||
*/
|
||||
public int getPunctuationEnd(int offset) {
|
||||
checkOffsetIsValid(offset - mOffsetShift);
|
||||
checkOffsetIsValid(offset);
|
||||
while (offset != BreakIterator.DONE && !isPunctuationEndBoundary(offset)) {
|
||||
offset = nextBoundary(offset);
|
||||
}
|
||||
@@ -322,9 +299,8 @@ public class WordIterator implements Selection.PositionIterator {
|
||||
* @return Whether the offset is after a punctuation character.
|
||||
*/
|
||||
public boolean isAfterPunctuation(int offset) {
|
||||
final int shiftedOffset = offset - mOffsetShift;
|
||||
if (shiftedOffset >= 1 && shiftedOffset <= mString.length()) {
|
||||
final int codePoint = mString.codePointBefore(shiftedOffset);
|
||||
if (mStart < offset && offset <= mEnd) {
|
||||
final int codePoint = Character.codePointBefore(mCharSeq, offset);
|
||||
return isPunctuation(codePoint);
|
||||
}
|
||||
return false;
|
||||
@@ -338,9 +314,8 @@ public class WordIterator implements Selection.PositionIterator {
|
||||
* @return Whether the offset is at a punctuation character.
|
||||
*/
|
||||
public boolean isOnPunctuation(int offset) {
|
||||
final int shiftedOffset = offset - mOffsetShift;
|
||||
if (shiftedOffset >= 0 && shiftedOffset < mString.length()) {
|
||||
final int codePoint = mString.codePointAt(shiftedOffset);
|
||||
if (mStart <= offset && offset < mEnd) {
|
||||
final int codePoint = Character.codePointAt(mCharSeq, offset);
|
||||
return isPunctuation(codePoint);
|
||||
}
|
||||
return false;
|
||||
@@ -354,8 +329,8 @@ public class WordIterator implements Selection.PositionIterator {
|
||||
return !isOnPunctuation(offset) && isAfterPunctuation(offset);
|
||||
}
|
||||
|
||||
private boolean isPunctuation(int cp) {
|
||||
int type = Character.getType(cp);
|
||||
private static boolean isPunctuation(int cp) {
|
||||
final int type = Character.getType(cp);
|
||||
return (type == Character.CONNECTOR_PUNCTUATION ||
|
||||
type == Character.DASH_PUNCTUATION ||
|
||||
type == Character.END_PUNCTUATION ||
|
||||
@@ -365,27 +340,26 @@ public class WordIterator implements Selection.PositionIterator {
|
||||
type == Character.START_PUNCTUATION);
|
||||
}
|
||||
|
||||
private boolean isAfterLetterOrDigit(int shiftedOffset) {
|
||||
if (shiftedOffset >= 1 && shiftedOffset <= mString.length()) {
|
||||
final int codePoint = mString.codePointBefore(shiftedOffset);
|
||||
private boolean isAfterLetterOrDigit(int offset) {
|
||||
if (mStart < offset && offset <= mEnd) {
|
||||
final int codePoint = Character.codePointBefore(mCharSeq, offset);
|
||||
if (Character.isLetterOrDigit(codePoint)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean isOnLetterOrDigit(int shiftedOffset) {
|
||||
if (shiftedOffset >= 0 && shiftedOffset < mString.length()) {
|
||||
final int codePoint = mString.codePointAt(shiftedOffset);
|
||||
private boolean isOnLetterOrDigit(int offset) {
|
||||
if (mStart <= offset && offset < mEnd) {
|
||||
final int codePoint = Character.codePointAt(mCharSeq, offset);
|
||||
if (Character.isLetterOrDigit(codePoint)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private void checkOffsetIsValid(int shiftedOffset) {
|
||||
if (shiftedOffset < 0 || shiftedOffset > mString.length()) {
|
||||
throw new IllegalArgumentException("Invalid offset: " + (shiftedOffset + mOffsetShift) +
|
||||
". Valid range is [" + mOffsetShift + ", " + (mString.length() + mOffsetShift) +
|
||||
"]");
|
||||
private void checkOffsetIsValid(int offset) {
|
||||
if (!(mStart <= offset && offset <= mEnd)) {
|
||||
throw new IllegalArgumentException("Invalid offset: " + (offset) +
|
||||
". Valid range is [" + mStart + ", " + mEnd + "]");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -48,6 +48,23 @@ public class WordIteratorTest extends AndroidTestCase {
|
||||
wordIterator.setCharSequence(text, text.length(), text.length());
|
||||
}
|
||||
|
||||
@SmallTest
|
||||
public void testWindowWidth() {
|
||||
final String text = "aaaa bbbb cccc dddd eeee ffff gggg hhhh iiii jjjj kkkk llll mmmm nnnn";
|
||||
WordIterator wordIterator = new WordIterator(Locale.ENGLISH);
|
||||
|
||||
// The first 'n' is more than 50 characters into the string.
|
||||
wordIterator.setCharSequence(text, text.indexOf('n'), text.length());
|
||||
final int expectedWindowStart = text.indexOf('n') - 50;
|
||||
assertEquals(expectedWindowStart, wordIterator.preceding(expectedWindowStart + 1));
|
||||
assertEquals(BreakIterator.DONE, wordIterator.preceding(expectedWindowStart));
|
||||
|
||||
wordIterator.setCharSequence(text, 0, 1);
|
||||
final int expectedWindowEnd = 1 + 50;
|
||||
assertEquals(expectedWindowEnd, wordIterator.following(expectedWindowEnd - 1));
|
||||
assertEquals(BreakIterator.DONE, wordIterator.following(expectedWindowEnd));
|
||||
}
|
||||
|
||||
@SmallTest
|
||||
public void testPreceding() {
|
||||
final String text = "abc def-ghi. jkl";
|
||||
@@ -73,6 +90,19 @@ public class WordIteratorTest extends AndroidTestCase {
|
||||
assertEquals(text.indexOf('g'), wordIterator.preceding(text.indexOf('h')));
|
||||
assertEquals(text.indexOf('g'), wordIterator.preceding(text.indexOf('j')));
|
||||
assertEquals(text.indexOf('j'), wordIterator.preceding(text.indexOf('l')));
|
||||
|
||||
// The results should be the same even if we set an smaller window, since WordIterator
|
||||
// enlargens the window by 50 code units on each side anyway.
|
||||
wordIterator.setCharSequence(text, text.indexOf('d'), text.indexOf('e'));
|
||||
|
||||
assertEquals(BreakIterator.DONE, wordIterator.preceding(text.indexOf('a')));
|
||||
assertEquals(text.indexOf('a'), wordIterator.preceding(text.indexOf('c')));
|
||||
assertEquals(text.indexOf('a'), wordIterator.preceding(text.indexOf('d')));
|
||||
assertEquals(text.indexOf('d'), wordIterator.preceding(text.indexOf('e')));
|
||||
assertEquals(text.indexOf('d'), wordIterator.preceding(text.indexOf('g')));
|
||||
assertEquals(text.indexOf('g'), wordIterator.preceding(text.indexOf('h')));
|
||||
assertEquals(text.indexOf('g'), wordIterator.preceding(text.indexOf('j')));
|
||||
assertEquals(text.indexOf('j'), wordIterator.preceding(text.indexOf('l')));
|
||||
}
|
||||
|
||||
@SmallTest
|
||||
@@ -100,6 +130,19 @@ public class WordIteratorTest extends AndroidTestCase {
|
||||
assertEquals(text.indexOf('i') + 1, wordIterator.following(text.indexOf('g')));
|
||||
assertEquals(text.length(), wordIterator.following(text.indexOf('j')));
|
||||
assertEquals(BreakIterator.DONE, wordIterator.following(text.length()));
|
||||
|
||||
// The results should be the same even if we set an smaller window, since WordIterator
|
||||
// enlargens the window by 50 code units on each side anyway.
|
||||
wordIterator.setCharSequence(text, text.indexOf('d'), text.indexOf('e'));
|
||||
|
||||
assertEquals(text.indexOf('c') + 1, wordIterator.following(text.indexOf('a')));
|
||||
assertEquals(text.indexOf('c') + 1, wordIterator.following(text.indexOf('c')));
|
||||
assertEquals(text.indexOf('f') + 1, wordIterator.following(text.indexOf('c') + 1));
|
||||
assertEquals(text.indexOf('f') + 1, wordIterator.following(text.indexOf('d')));
|
||||
assertEquals(text.indexOf('i') + 1, wordIterator.following(text.indexOf('-')));
|
||||
assertEquals(text.indexOf('i') + 1, wordIterator.following(text.indexOf('g')));
|
||||
assertEquals(text.length(), wordIterator.following(text.indexOf('j')));
|
||||
assertEquals(BreakIterator.DONE, wordIterator.following(text.length()));
|
||||
}
|
||||
|
||||
@SmallTest
|
||||
|
||||
Reference in New Issue
Block a user