Merge "Refactor WordIterator to avoid copying to internal buffers"

This commit is contained in:
TreeHugger Robot
2016-07-01 00:03:57 +00:00
committed by Android (Google) Code Review
3 changed files with 218 additions and 93 deletions

View File

@@ -0,0 +1,108 @@
/*
* Copyright (C) 2016 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package android.text;
import android.annotation.NonNull;
import java.text.CharacterIterator;
/**
* An implementation of {@link java.text.CharacterIterator} that iterates over a given CharSequence.
* {@hide}
*/
public class CharSequenceCharacterIterator implements CharacterIterator {
private final int mBeginIndex, mEndIndex;
private int mIndex;
private final CharSequence mCharSeq;
/**
* Constructs the iterator given a CharSequence and a range. The position of the iterator index
* is set to the beginning of the range.
*/
public CharSequenceCharacterIterator(@NonNull CharSequence text, int start, int end) {
mCharSeq = text;
mBeginIndex = mIndex = start;
mEndIndex = end;
}
public char first() {
mIndex = mBeginIndex;
return current();
}
public char last() {
if (mBeginIndex == mEndIndex) {
mIndex = mEndIndex;
return DONE;
} else {
mIndex = mEndIndex - 1;
return mCharSeq.charAt(mIndex);
}
}
public char current() {
return (mIndex == mEndIndex) ? DONE : mCharSeq.charAt(mIndex);
}
public char next() {
mIndex++;
if (mIndex >= mEndIndex) {
mIndex = mEndIndex;
return DONE;
} else {
return mCharSeq.charAt(mIndex);
}
}
public char previous() {
if (mIndex <= mBeginIndex) {
return DONE;
} else {
mIndex--;
return mCharSeq.charAt(mIndex);
}
}
public char setIndex(int position) {
if (mBeginIndex <= position && position <= mEndIndex) {
mIndex = position;
return current();
} else {
throw new IllegalArgumentException("invalid position");
}
}
public int getBeginIndex() {
return mBeginIndex;
}
public int getEndIndex() {
return mEndIndex;
}
public int getIndex() {
return mIndex;
}
public Object clone() {
try {
return super.clone();
} catch (CloneNotSupportedException e) {
throw new InternalError();
}
}
}

View File

@@ -1,4 +1,3 @@
/*
* Copyright (C) 2011 The Android Open Source Project
*
@@ -17,10 +16,11 @@
package android.text.method;
import android.text.Selection;
import android.text.SpannableStringBuilder;
import android.annotation.NonNull;
import android.icu.text.BreakIterator;
import android.text.Selection;
import android.text.CharSequenceCharacterIterator;
import java.util.Locale;
/**
@@ -35,10 +35,9 @@ public class WordIterator implements Selection.PositionIterator {
// Size of the window for the word iterator, should be greater than the longest word's length
private static final int WINDOW_WIDTH = 50;
private String mString;
private int mOffsetShift;
private BreakIterator mIterator;
private int mStart, mEnd;
private CharSequence mCharSeq;
private final BreakIterator mIterator;
/**
* Constructs a WordIterator using the default locale.
@@ -49,59 +48,49 @@ public class WordIterator implements Selection.PositionIterator {
/**
* Constructs a new WordIterator for the specified locale.
* @param locale The locale to be used when analysing the text.
* @param locale The locale to be used for analyzing the text.
*/
public WordIterator(Locale locale) {
mIterator = BreakIterator.getWordInstance(locale);
}
public void setCharSequence(CharSequence charSequence, int start, int end) {
mOffsetShift = Math.max(0, start - WINDOW_WIDTH);
final int windowEnd = Math.min(charSequence.length(), end + WINDOW_WIDTH);
if (charSequence instanceof SpannableStringBuilder) {
mString = ((SpannableStringBuilder) charSequence).substring(mOffsetShift, windowEnd);
public void setCharSequence(@NonNull CharSequence charSequence, int start, int end) {
if (0 <= start && end <= charSequence.length()) {
mCharSeq = charSequence;
mStart = Math.max(0, start - WINDOW_WIDTH);
mEnd = Math.min(charSequence.length(), end + WINDOW_WIDTH);
mIterator.setText(new CharSequenceCharacterIterator(charSequence, mStart, mEnd));
} else {
mString = charSequence.subSequence(mOffsetShift, windowEnd).toString();
throw new IndexOutOfBoundsException("input indexes are outside the CharSequence");
}
mIterator.setText(mString);
}
/** {@inheritDoc} */
public int preceding(int offset) {
int shiftedOffset = offset - mOffsetShift;
checkOffsetIsValid(shiftedOffset);
do {
shiftedOffset = mIterator.preceding(shiftedOffset);
if (shiftedOffset == BreakIterator.DONE) {
return BreakIterator.DONE;
checkOffsetIsValid(offset);
while (true) {
offset = mIterator.preceding(offset);
if (offset == BreakIterator.DONE || isOnLetterOrDigit(offset)) {
return offset;
}
if (isOnLetterOrDigit(shiftedOffset)) {
return shiftedOffset + mOffsetShift;
}
} while (true);
}
}
/** {@inheritDoc} */
public int following(int offset) {
int shiftedOffset = offset - mOffsetShift;
checkOffsetIsValid(shiftedOffset);
do {
shiftedOffset = mIterator.following(shiftedOffset);
if (shiftedOffset == BreakIterator.DONE) {
return BreakIterator.DONE;
checkOffsetIsValid(offset);
while (true) {
offset = mIterator.following(offset);
if (offset == BreakIterator.DONE || isAfterLetterOrDigit(offset)) {
return offset;
}
if (isAfterLetterOrDigit(shiftedOffset)) {
return shiftedOffset + mOffsetShift;
}
} while (true);
}
}
/** {@inheritDoc} */
public boolean isBoundary(int offset) {
int shiftedOffset = offset - mOffsetShift;
checkOffsetIsValid(shiftedOffset);
return mIterator.isBoundary(shiftedOffset);
checkOffsetIsValid(offset);
return mIterator.isBoundary(offset);
}
/**
@@ -112,13 +101,8 @@ public class WordIterator implements Selection.PositionIterator {
* @return the position of the last boundary preceding the given offset.
*/
public int nextBoundary(int offset) {
int shiftedOffset = offset - mOffsetShift;
checkOffsetIsValid(shiftedOffset);
shiftedOffset = mIterator.following(shiftedOffset);
if (shiftedOffset == BreakIterator.DONE) {
return BreakIterator.DONE;
}
return shiftedOffset + mOffsetShift;
checkOffsetIsValid(offset);
return mIterator.following(offset);
}
/**
@@ -129,13 +113,8 @@ public class WordIterator implements Selection.PositionIterator {
* @return the position of the last boundary preceding the given offset.
*/
public int prevBoundary(int offset) {
int shiftedOffset = offset - mOffsetShift;
checkOffsetIsValid(shiftedOffset);
shiftedOffset = mIterator.preceding(shiftedOffset);
if (shiftedOffset == BreakIterator.DONE) {
return BreakIterator.DONE;
}
return shiftedOffset + mOffsetShift;
checkOffsetIsValid(offset);
return mIterator.preceding(offset);
}
/** If <code>offset</code> is within a word, returns the index of the first character of that
@@ -228,20 +207,19 @@ public class WordIterator implements Selection.PositionIterator {
* @throws IllegalArgumentException is offset is not valid.
*/
private int getBeginning(int offset, boolean getPrevWordBeginningOnTwoWordsBoundary) {
final int shiftedOffset = offset - mOffsetShift;
checkOffsetIsValid(shiftedOffset);
checkOffsetIsValid(offset);
if (isOnLetterOrDigit(shiftedOffset)) {
if (mIterator.isBoundary(shiftedOffset)
&& (!isAfterLetterOrDigit(shiftedOffset)
if (isOnLetterOrDigit(offset)) {
if (mIterator.isBoundary(offset)
&& (!isAfterLetterOrDigit(offset)
|| !getPrevWordBeginningOnTwoWordsBoundary)) {
return shiftedOffset + mOffsetShift;
return offset;
} else {
return mIterator.preceding(shiftedOffset) + mOffsetShift;
return mIterator.preceding(offset);
}
} else {
if (isAfterLetterOrDigit(shiftedOffset)) {
return mIterator.preceding(shiftedOffset) + mOffsetShift;
if (isAfterLetterOrDigit(offset)) {
return mIterator.preceding(offset);
}
}
return BreakIterator.DONE;
@@ -264,19 +242,18 @@ public class WordIterator implements Selection.PositionIterator {
* @throws IllegalArgumentException is offset is not valid.
*/
private int getEnd(int offset, boolean getNextWordEndOnTwoWordBoundary) {
final int shiftedOffset = offset - mOffsetShift;
checkOffsetIsValid(shiftedOffset);
checkOffsetIsValid(offset);
if (isAfterLetterOrDigit(shiftedOffset)) {
if (mIterator.isBoundary(shiftedOffset)
&& (!isOnLetterOrDigit(shiftedOffset) || !getNextWordEndOnTwoWordBoundary)) {
return shiftedOffset + mOffsetShift;
if (isAfterLetterOrDigit(offset)) {
if (mIterator.isBoundary(offset)
&& (!isOnLetterOrDigit(offset) || !getNextWordEndOnTwoWordBoundary)) {
return offset;
} else {
return mIterator.following(shiftedOffset) + mOffsetShift;
return mIterator.following(offset);
}
} else {
if (isOnLetterOrDigit(shiftedOffset)) {
return mIterator.following(shiftedOffset) + mOffsetShift;
if (isOnLetterOrDigit(offset)) {
return mIterator.following(offset);
}
}
return BreakIterator.DONE;
@@ -290,7 +267,7 @@ public class WordIterator implements Selection.PositionIterator {
* @param offset the offset to search from.
*/
public int getPunctuationBeginning(int offset) {
checkOffsetIsValid(offset - mOffsetShift);
checkOffsetIsValid(offset);
while (offset != BreakIterator.DONE && !isPunctuationStartBoundary(offset)) {
offset = prevBoundary(offset);
}
@@ -306,7 +283,7 @@ public class WordIterator implements Selection.PositionIterator {
* @param offset the offset to search from.
*/
public int getPunctuationEnd(int offset) {
checkOffsetIsValid(offset - mOffsetShift);
checkOffsetIsValid(offset);
while (offset != BreakIterator.DONE && !isPunctuationEndBoundary(offset)) {
offset = nextBoundary(offset);
}
@@ -322,9 +299,8 @@ public class WordIterator implements Selection.PositionIterator {
* @return Whether the offset is after a punctuation character.
*/
public boolean isAfterPunctuation(int offset) {
final int shiftedOffset = offset - mOffsetShift;
if (shiftedOffset >= 1 && shiftedOffset <= mString.length()) {
final int codePoint = mString.codePointBefore(shiftedOffset);
if (mStart < offset && offset <= mEnd) {
final int codePoint = Character.codePointBefore(mCharSeq, offset);
return isPunctuation(codePoint);
}
return false;
@@ -338,9 +314,8 @@ public class WordIterator implements Selection.PositionIterator {
* @return Whether the offset is at a punctuation character.
*/
public boolean isOnPunctuation(int offset) {
final int shiftedOffset = offset - mOffsetShift;
if (shiftedOffset >= 0 && shiftedOffset < mString.length()) {
final int codePoint = mString.codePointAt(shiftedOffset);
if (mStart <= offset && offset < mEnd) {
final int codePoint = Character.codePointAt(mCharSeq, offset);
return isPunctuation(codePoint);
}
return false;
@@ -354,8 +329,8 @@ public class WordIterator implements Selection.PositionIterator {
return !isOnPunctuation(offset) && isAfterPunctuation(offset);
}
private boolean isPunctuation(int cp) {
int type = Character.getType(cp);
private static boolean isPunctuation(int cp) {
final int type = Character.getType(cp);
return (type == Character.CONNECTOR_PUNCTUATION ||
type == Character.DASH_PUNCTUATION ||
type == Character.END_PUNCTUATION ||
@@ -365,27 +340,26 @@ public class WordIterator implements Selection.PositionIterator {
type == Character.START_PUNCTUATION);
}
private boolean isAfterLetterOrDigit(int shiftedOffset) {
if (shiftedOffset >= 1 && shiftedOffset <= mString.length()) {
final int codePoint = mString.codePointBefore(shiftedOffset);
private boolean isAfterLetterOrDigit(int offset) {
if (mStart < offset && offset <= mEnd) {
final int codePoint = Character.codePointBefore(mCharSeq, offset);
if (Character.isLetterOrDigit(codePoint)) return true;
}
return false;
}
private boolean isOnLetterOrDigit(int shiftedOffset) {
if (shiftedOffset >= 0 && shiftedOffset < mString.length()) {
final int codePoint = mString.codePointAt(shiftedOffset);
private boolean isOnLetterOrDigit(int offset) {
if (mStart <= offset && offset < mEnd) {
final int codePoint = Character.codePointAt(mCharSeq, offset);
if (Character.isLetterOrDigit(codePoint)) return true;
}
return false;
}
private void checkOffsetIsValid(int shiftedOffset) {
if (shiftedOffset < 0 || shiftedOffset > mString.length()) {
throw new IllegalArgumentException("Invalid offset: " + (shiftedOffset + mOffsetShift) +
". Valid range is [" + mOffsetShift + ", " + (mString.length() + mOffsetShift) +
"]");
private void checkOffsetIsValid(int offset) {
if (!(mStart <= offset && offset <= mEnd)) {
throw new IllegalArgumentException("Invalid offset: " + (offset) +
". Valid range is [" + mStart + ", " + mEnd + "]");
}
}
}

View File

@@ -48,6 +48,23 @@ public class WordIteratorTest extends AndroidTestCase {
wordIterator.setCharSequence(text, text.length(), text.length());
}
@SmallTest
public void testWindowWidth() {
final String text = "aaaa bbbb cccc dddd eeee ffff gggg hhhh iiii jjjj kkkk llll mmmm nnnn";
WordIterator wordIterator = new WordIterator(Locale.ENGLISH);
// The first 'n' is more than 50 characters into the string.
wordIterator.setCharSequence(text, text.indexOf('n'), text.length());
final int expectedWindowStart = text.indexOf('n') - 50;
assertEquals(expectedWindowStart, wordIterator.preceding(expectedWindowStart + 1));
assertEquals(BreakIterator.DONE, wordIterator.preceding(expectedWindowStart));
wordIterator.setCharSequence(text, 0, 1);
final int expectedWindowEnd = 1 + 50;
assertEquals(expectedWindowEnd, wordIterator.following(expectedWindowEnd - 1));
assertEquals(BreakIterator.DONE, wordIterator.following(expectedWindowEnd));
}
@SmallTest
public void testPreceding() {
final String text = "abc def-ghi. jkl";
@@ -73,6 +90,19 @@ public class WordIteratorTest extends AndroidTestCase {
assertEquals(text.indexOf('g'), wordIterator.preceding(text.indexOf('h')));
assertEquals(text.indexOf('g'), wordIterator.preceding(text.indexOf('j')));
assertEquals(text.indexOf('j'), wordIterator.preceding(text.indexOf('l')));
// The results should be the same even if we set an smaller window, since WordIterator
// enlargens the window by 50 code units on each side anyway.
wordIterator.setCharSequence(text, text.indexOf('d'), text.indexOf('e'));
assertEquals(BreakIterator.DONE, wordIterator.preceding(text.indexOf('a')));
assertEquals(text.indexOf('a'), wordIterator.preceding(text.indexOf('c')));
assertEquals(text.indexOf('a'), wordIterator.preceding(text.indexOf('d')));
assertEquals(text.indexOf('d'), wordIterator.preceding(text.indexOf('e')));
assertEquals(text.indexOf('d'), wordIterator.preceding(text.indexOf('g')));
assertEquals(text.indexOf('g'), wordIterator.preceding(text.indexOf('h')));
assertEquals(text.indexOf('g'), wordIterator.preceding(text.indexOf('j')));
assertEquals(text.indexOf('j'), wordIterator.preceding(text.indexOf('l')));
}
@SmallTest
@@ -100,6 +130,19 @@ public class WordIteratorTest extends AndroidTestCase {
assertEquals(text.indexOf('i') + 1, wordIterator.following(text.indexOf('g')));
assertEquals(text.length(), wordIterator.following(text.indexOf('j')));
assertEquals(BreakIterator.DONE, wordIterator.following(text.length()));
// The results should be the same even if we set an smaller window, since WordIterator
// enlargens the window by 50 code units on each side anyway.
wordIterator.setCharSequence(text, text.indexOf('d'), text.indexOf('e'));
assertEquals(text.indexOf('c') + 1, wordIterator.following(text.indexOf('a')));
assertEquals(text.indexOf('c') + 1, wordIterator.following(text.indexOf('c')));
assertEquals(text.indexOf('f') + 1, wordIterator.following(text.indexOf('c') + 1));
assertEquals(text.indexOf('f') + 1, wordIterator.following(text.indexOf('d')));
assertEquals(text.indexOf('i') + 1, wordIterator.following(text.indexOf('-')));
assertEquals(text.indexOf('i') + 1, wordIterator.following(text.indexOf('g')));
assertEquals(text.length(), wordIterator.following(text.indexOf('j')));
assertEquals(BreakIterator.DONE, wordIterator.following(text.length()));
}
@SmallTest