Merge "ICU format support for pseudolocalizes."

This commit is contained in:
Narayan Kamath
2015-06-09 10:29:03 +00:00
committed by Gerrit Code Review
5 changed files with 419 additions and 67 deletions

View File

@@ -50,9 +50,11 @@ aaptSources := \
aaptTests := \
tests/AaptConfig_test.cpp \
tests/AaptGroupEntry_test.cpp \
tests/Pseudolocales_test.cpp \
tests/ResourceFilter_test.cpp
aaptCIncludes := \
system/core/base/include \
external/libpng \
external/zlib
@@ -99,7 +101,6 @@ LOCAL_SRC_FILES := $(aaptSources)
include $(BUILD_HOST_STATIC_LIBRARY)
# ==========================================================
# Build the host executable: aapt
# ==========================================================

View File

@@ -213,16 +213,14 @@ status_t parseStyledString(Bundle* /* bundle */,
Vector<StringPool::entry_style_span> spanStack;
String16 curString;
String16 rawString;
Pseudolocalizer pseudo(pseudolocalize);
const char* errorMsg;
int xliffDepth = 0;
bool firstTime = true;
size_t len;
ResXMLTree::event_code_t code;
// Bracketing if pseudolocalization accented method specified.
if (pseudolocalize == PSEUDO_ACCENTED) {
curString.append(String16(String8("[")));
}
curString.append(pseudo.start());
while ((code=inXml->next()) != ResXMLTree::END_DOCUMENT && code != ResXMLTree::BAD_DOCUMENT) {
if (code == ResXMLTree::TEXT) {
String16 text(inXml->getText(&len));
@@ -231,18 +229,12 @@ status_t parseStyledString(Bundle* /* bundle */,
if (text.string()[0] == '@') {
// If this is a resource reference, don't do the pseudoloc.
pseudolocalize = NO_PSEUDOLOCALIZATION;
pseudo.setMethod(pseudolocalize);
curString = String16();
}
}
if (xliffDepth == 0 && pseudolocalize > 0) {
String16 pseudo;
if (pseudolocalize == PSEUDO_ACCENTED) {
pseudo = pseudolocalize_string(text);
} else if (pseudolocalize == PSEUDO_BIDI) {
pseudo = pseudobidi_string(text);
} else {
pseudo = text;
}
curString.append(pseudo);
curString.append(pseudo.text(text));
} else {
if (isFormatted && hasSubstitutionErrors(fileName, inXml, text) != NO_ERROR) {
return UNKNOWN_ERROR;
@@ -382,24 +374,7 @@ moveon:
}
}
// Bracketing if pseudolocalization accented method specified.
if (pseudolocalize == PSEUDO_ACCENTED) {
const char16_t* str = outString->string();
const char16_t* p = str;
const char16_t* e = p + outString->size();
int words_cnt = 0;
while (p < e) {
if (isspace(*p)) {
words_cnt++;
}
p++;
}
unsigned int length = words_cnt > 3 ? outString->size() :
outString->size() / 2;
curString.append(String16(String8(" ")));
curString.append(pseudo_generate_expansion(length));
curString.append(String16(String8("]")));
}
curString.append(pseudo.end());
if (code == ResXMLTree::BAD_DOCUMENT) {
SourcePos(String8(fileName), inXml->getLineNumber()).error(

View File

@@ -16,6 +16,80 @@ static const String16 k_pdf = String16("\xE2\x80\xac");
static const String16 k_placeholder_open = String16("\xc2\xbb");
static const String16 k_placeholder_close = String16("\xc2\xab");
static const char16_t k_arg_start = '{';
static const char16_t k_arg_end = '}';
Pseudolocalizer::Pseudolocalizer(PseudolocalizationMethod m)
: mImpl(nullptr), mLastDepth(0) {
setMethod(m);
}
void Pseudolocalizer::setMethod(PseudolocalizationMethod m) {
if (mImpl) {
delete mImpl;
}
if (m == PSEUDO_ACCENTED) {
mImpl = new PseudoMethodAccent();
} else if (m == PSEUDO_BIDI) {
mImpl = new PseudoMethodBidi();
} else {
mImpl = new PseudoMethodNone();
}
}
String16 Pseudolocalizer::text(const String16& text) {
String16 out;
size_t depth = mLastDepth;
size_t lastpos, pos;
const size_t length= text.size();
const char16_t* str = text.string();
bool escaped = false;
for (lastpos = pos = 0; pos < length; pos++) {
char16_t c = str[pos];
if (escaped) {
escaped = false;
continue;
}
if (c == '\'') {
escaped = true;
continue;
}
if (c == k_arg_start) {
depth++;
} else if (c == k_arg_end && depth) {
depth--;
}
if (mLastDepth != depth || pos == length - 1) {
bool pseudo = ((mLastDepth % 2) == 0);
size_t nextpos = pos;
if (!pseudo || depth == mLastDepth) {
nextpos++;
}
size_t size = nextpos - lastpos;
if (size) {
String16 chunk = String16(text, size, lastpos);
if (pseudo) {
chunk = mImpl->text(chunk);
} else if (str[lastpos] == k_arg_start &&
str[nextpos - 1] == k_arg_end) {
chunk = mImpl->placeholder(chunk);
}
out.append(chunk);
}
if (pseudo && depth < mLastDepth) { // End of message
out.append(mImpl->end());
} else if (!pseudo && depth > mLastDepth) { // Start of message
out.append(mImpl->start());
}
lastpos = nextpos;
mLastDepth = depth;
}
}
return out;
}
static const char*
pseudolocalize_char(const char16_t c)
{
@@ -78,8 +152,7 @@ pseudolocalize_char(const char16_t c)
}
}
static bool
is_possible_normal_placeholder_end(const char16_t c) {
static bool is_possible_normal_placeholder_end(const char16_t c) {
switch (c) {
case 's': return true;
case 'S': return true;
@@ -106,8 +179,7 @@ is_possible_normal_placeholder_end(const char16_t c) {
}
}
String16
pseudo_generate_expansion(const unsigned int length) {
static String16 pseudo_generate_expansion(const unsigned int length) {
String16 result = k_expansion_string;
const char16_t* s = result.string();
if (result.size() < length) {
@@ -127,18 +199,47 @@ pseudo_generate_expansion(const unsigned int length) {
return result;
}
static bool is_space(const char16_t c) {
return (c == ' ' || c == '\t' || c == '\n');
}
String16 PseudoMethodAccent::start() {
String16 result;
if (mDepth == 0) {
result = String16(String8("["));
}
mWordCount = mLength = 0;
mDepth++;
return result;
}
String16 PseudoMethodAccent::end() {
String16 result;
if (mLength) {
result.append(String16(String8(" ")));
result.append(pseudo_generate_expansion(
mWordCount > 3 ? mLength : mLength / 2));
}
mWordCount = mLength = 0;
mDepth--;
if (mDepth == 0) {
result.append(String16(String8("]")));
}
return result;
}
/**
* Converts characters so they look like they've been localized.
*
* Note: This leaves escape sequences untouched so they can later be
* processed by ResTable::collectString in the normal way.
*/
String16
pseudolocalize_string(const String16& source)
String16 PseudoMethodAccent::text(const String16& source)
{
const char16_t* s = source.string();
String16 result;
const size_t I = source.size();
bool lastspace = true;
for (size_t i=0; i<I; i++) {
char16_t c = s[i];
if (c == '\\') {
@@ -170,23 +271,24 @@ pseudolocalize_string(const String16& source)
}
} else if (c == '%') {
// Placeholder syntax, no need to pseudolocalize
result += k_placeholder_open;
String16 chunk;
bool end = false;
result.append(&c, 1);
chunk.append(&c, 1);
while (!end && i < I) {
++i;
c = s[i];
result.append(&c, 1);
chunk.append(&c, 1);
if (is_possible_normal_placeholder_end(c)) {
end = true;
} else if (c == 't') {
++i;
c = s[i];
result.append(&c, 1);
chunk.append(&c, 1);
end = true;
}
}
result += k_placeholder_close;
// Treat chunk as a placeholder unless it ends with %.
result += ((c == '%') ? chunk : placeholder(chunk));
} else if (c == '<' || c == '&') {
// html syntax, no need to pseudolocalize
bool tag_closed = false;
@@ -234,35 +336,52 @@ pseudolocalize_string(const String16& source)
if (p != NULL) {
result += String16(p);
} else {
bool space = is_space(c);
if (lastspace && !space) {
mWordCount++;
}
lastspace = space;
result.append(&c, 1);
}
// Count only pseudolocalizable chars and delimiters
mLength++;
}
}
return result;
}
String16 PseudoMethodAccent::placeholder(const String16& source) {
// Surround a placeholder with brackets
return k_placeholder_open + source + k_placeholder_close;
}
String16
pseudobidi_string(const String16& source)
String16 PseudoMethodBidi::text(const String16& source)
{
const char16_t* s = source.string();
String16 result;
result += k_rlm;
result += k_rlo;
bool lastspace = true;
bool space = true;
for (size_t i=0; i<source.size(); i++) {
char16_t c = s[i];
switch(c) {
case ' ': result += k_pdf;
result += k_rlm;
result.append(&c, 1);
result += k_rlm;
result += k_rlo;
break;
default: result.append(&c, 1);
break;
space = is_space(c);
if (lastspace && !space) {
// Word start
result += k_rlm + k_rlo;
} else if (!lastspace && space) {
// Word end
result += k_pdf + k_rlm;
}
lastspace = space;
result.append(&c, 1);
}
if (!lastspace) {
// End of last word
result += k_pdf + k_rlm;
}
result += k_pdf;
result += k_rlm;
return result;
}
String16 PseudoMethodBidi::placeholder(const String16& source) {
// Surround a placeholder with directionality change sequence
return k_rlm + k_rlo + source + k_pdf + k_rlm;
}

View File

@@ -1,18 +1,58 @@
#ifndef HOST_PSEUDOLOCALIZE_H
#define HOST_PSEUDOLOCALIZE_H
#include <base/macros.h>
#include "StringPool.h"
#include <string>
class PseudoMethodImpl {
public:
virtual ~PseudoMethodImpl() {}
virtual String16 start() { return String16(); }
virtual String16 end() { return String16(); }
virtual String16 text(const String16& text) = 0;
virtual String16 placeholder(const String16& text) = 0;
};
String16 pseudolocalize_string(const String16& source);
// Surrounds every word in the sentance with specific characters that makes
// the word directionality RTL.
String16 pseudobidi_string(const String16& source);
// Generates expansion string based on the specified lenght.
// Generated string could not be shorter that length, but it could be slightly
// longer.
String16 pseudo_generate_expansion(const unsigned int length);
class PseudoMethodNone : public PseudoMethodImpl {
public:
PseudoMethodNone() {}
String16 text(const String16& text) { return text; }
String16 placeholder(const String16& text) { return text; }
private:
DISALLOW_COPY_AND_ASSIGN(PseudoMethodNone);
};
class PseudoMethodBidi : public PseudoMethodImpl {
public:
String16 text(const String16& text);
String16 placeholder(const String16& text);
};
class PseudoMethodAccent : public PseudoMethodImpl {
public:
PseudoMethodAccent() : mDepth(0), mWordCount(0), mLength(0) {}
String16 start();
String16 end();
String16 text(const String16& text);
String16 placeholder(const String16& text);
private:
size_t mDepth;
size_t mWordCount;
size_t mLength;
};
class Pseudolocalizer {
public:
Pseudolocalizer(PseudolocalizationMethod m);
~Pseudolocalizer() { if (mImpl) delete mImpl; }
void setMethod(PseudolocalizationMethod m);
String16 start() { return mImpl->start(); }
String16 end() { return mImpl->end(); }
String16 text(const String16& text);
private:
PseudoMethodImpl *mImpl;
size_t mLastDepth;
};
#endif // HOST_PSEUDOLOCALIZE_H

View File

@@ -0,0 +1,217 @@
/*
* Copyright (C) 2014 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <androidfw/ResourceTypes.h>
#include <utils/String8.h>
#include <gtest/gtest.h>
#include "Bundle.h"
#include "pseudolocalize.h"
using android::String8;
// In this context, 'Axis' represents a particular field in the configuration,
// such as language or density.
static void simple_helper(const char* input, const char* expected, PseudolocalizationMethod method) {
Pseudolocalizer pseudo(method);
String16 result = pseudo.start() + pseudo.text(String16(String8(input))) + pseudo.end();
//std::cout << String8(result).string() << std::endl;
ASSERT_EQ(String8(expected), String8(result));
}
static void compound_helper(const char* in1, const char* in2, const char *in3,
const char* expected, PseudolocalizationMethod method) {
Pseudolocalizer pseudo(method);
String16 result = pseudo.start() + \
pseudo.text(String16(String8(in1))) + \
pseudo.text(String16(String8(in2))) + \
pseudo.text(String16(String8(in3))) + \
pseudo.end();
ASSERT_EQ(String8(expected), String8(result));
}
TEST(Pseudolocales, NoPseudolocalization) {
simple_helper("", "", NO_PSEUDOLOCALIZATION);
simple_helper("Hello, world", "Hello, world", NO_PSEUDOLOCALIZATION);
compound_helper("Hello,", " world", "",
"Hello, world", NO_PSEUDOLOCALIZATION);
}
TEST(Pseudolocales, PlaintextAccent) {
simple_helper("", "[]", PSEUDO_ACCENTED);
simple_helper("Hello, world",
"[Ĥéļļö, ŵöŕļð one two]", PSEUDO_ACCENTED);
simple_helper("Hello, %1d",
"[Ĥéļļö, »%1d« one two]", PSEUDO_ACCENTED);
simple_helper("Battery %1d%%",
"[βåţţéŕý »%1d«%% one two]", PSEUDO_ACCENTED);
compound_helper("", "", "", "[]", PSEUDO_ACCENTED);
compound_helper("Hello,", " world", "",
"[Ĥéļļö, ŵöŕļð one two]", PSEUDO_ACCENTED);
}
TEST(Pseudolocales, PlaintextBidi) {
simple_helper("", "", PSEUDO_BIDI);
simple_helper("word",
"\xe2\x80\x8f\xE2\x80\xaeword\xE2\x80\xac\xe2\x80\x8f",
PSEUDO_BIDI);
simple_helper(" word ",
" \xe2\x80\x8f\xE2\x80\xaeword\xE2\x80\xac\xe2\x80\x8f ",
PSEUDO_BIDI);
simple_helper(" word ",
" \xe2\x80\x8f\xE2\x80\xaeword\xE2\x80\xac\xe2\x80\x8f ",
PSEUDO_BIDI);
simple_helper("hello\n world\n",
"\xe2\x80\x8f\xE2\x80\xaehello\xE2\x80\xac\xe2\x80\x8f\n" \
" \xe2\x80\x8f\xE2\x80\xaeworld\xE2\x80\xac\xe2\x80\x8f\n",
PSEUDO_BIDI);
compound_helper("hello", "\n ", " world\n",
"\xe2\x80\x8f\xE2\x80\xaehello\xE2\x80\xac\xe2\x80\x8f\n" \
" \xe2\x80\x8f\xE2\x80\xaeworld\xE2\x80\xac\xe2\x80\x8f\n",
PSEUDO_BIDI);
}
TEST(Pseudolocales, SimpleICU) {
// Single-fragment messages
simple_helper("{placeholder}", "[»{placeholder}«]", PSEUDO_ACCENTED);
simple_helper("{USER} is offline",
"[»{USER}« îš öƒƒļîñé one two]", PSEUDO_ACCENTED);
simple_helper("Copy from {path1} to {path2}",
"[Çöþý ƒŕöḿ »{path1}« ţö »{path2}« one two three]", PSEUDO_ACCENTED);
simple_helper("Today is {1,date} {1,time}",
"[Ţöðåý îš »{1,date}« »{1,time}« one two]", PSEUDO_ACCENTED);
// Multi-fragment messages
compound_helper("{USER}", " ", "is offline",
"[»{USER}« îš öƒƒļîñé one two]",
PSEUDO_ACCENTED);
compound_helper("Copy from ", "{path1}", " to {path2}",
"[Çöþý ƒŕöḿ »{path1}« ţö »{path2}« one two three]",
PSEUDO_ACCENTED);
}
TEST(Pseudolocales, ICUBidi) {
// Single-fragment messages
simple_helper("{placeholder}",
"\xe2\x80\x8f\xE2\x80\xae{placeholder}\xE2\x80\xac\xe2\x80\x8f",
PSEUDO_BIDI);
simple_helper(
"{COUNT, plural, one {one} other {other}}",
"{COUNT, plural, " \
"one {\xe2\x80\x8f\xE2\x80\xaeone\xE2\x80\xac\xe2\x80\x8f} " \
"other {\xe2\x80\x8f\xE2\x80\xaeother\xE2\x80\xac\xe2\x80\x8f}}",
PSEUDO_BIDI
);
}
TEST(Pseudolocales, Escaping) {
// Single-fragment messages
simple_helper("'{USER'} is offline",
"['{ÛŠÉŔ'} îš öƒƒļîñé one two three]", PSEUDO_ACCENTED);
// Multi-fragment messages
compound_helper("'{USER}", " ", "''is offline",
"['{ÛŠÉŔ} ''îš öƒƒļîñé one two three]", PSEUDO_ACCENTED);
}
TEST(Pseudolocales, PluralsAndSelects) {
simple_helper(
"{COUNT, plural, one {Delete a file} other {Delete {COUNT} files}}",
"[{COUNT, plural, one {Ðéļéţé å ƒîļé one two} " \
"other {Ðéļéţé »{COUNT}« ƒîļéš one two}}]",
PSEUDO_ACCENTED
);
simple_helper(
"Distance is {COUNT, plural, one {# mile} other {# miles}}",
"[Ðîšţåñçé îš {COUNT, plural, one {# ḿîļé one two} " \
"other {# ḿîļéš one two}}]",
PSEUDO_ACCENTED
);
simple_helper(
"{1, select, female {{1} added you} " \
"male {{1} added you} other {{1} added you}}",
"[{1, select, female {»{1}« åððéð ýöû one two} " \
"male {»{1}« åððéð ýöû one two} other {»{1}« åððéð ýöû one two}}]",
PSEUDO_ACCENTED
);
compound_helper(
"{COUNT, plural, one {Delete a file} " \
"other {Delete ", "{COUNT}", " files}}",
"[{COUNT, plural, one {Ðéļéţé å ƒîļé one two} " \
"other {Ðéļéţé »{COUNT}« ƒîļéš one two}}]",
PSEUDO_ACCENTED
);
}
TEST(Pseudolocales, NestedICU) {
simple_helper(
"{person, select, " \
"female {" \
"{num_circles, plural," \
"=0{{person} didn't add you to any of her circles.}" \
"=1{{person} added you to one of her circles.}" \
"other{{person} added you to her # circles.}}}" \
"male {" \
"{num_circles, plural," \
"=0{{person} didn't add you to any of his circles.}" \
"=1{{person} added you to one of his circles.}" \
"other{{person} added you to his # circles.}}}" \
"other {" \
"{num_circles, plural," \
"=0{{person} didn't add you to any of their circles.}" \
"=1{{person} added you to one of their circles.}" \
"other{{person} added you to their # circles.}}}}",
"[{person, select, " \
"female {" \
"{num_circles, plural," \
"=0{»{person}« ðîðñ'ţ åðð ýöû ţö åñý öƒ ĥéŕ çîŕçļéš." \
" one two three four five}" \
"=1{»{person}« åððéð ýöû ţö öñé öƒ ĥéŕ çîŕçļéš." \
" one two three four}" \
"other{»{person}« åððéð ýöû ţö ĥéŕ # çîŕçļéš." \
" one two three four}}}" \
"male {" \
"{num_circles, plural," \
"=0{»{person}« ðîðñ'ţ åðð ýöû ţö åñý öƒ ĥîš çîŕçļéš." \
" one two three four five}" \
"=1{»{person}« åððéð ýöû ţö öñé öƒ ĥîš çîŕçļéš." \
" one two three four}" \
"other{»{person}« åððéð ýöû ţö ĥîš # çîŕçļéš." \
" one two three four}}}" \
"other {{num_circles, plural," \
"=0{»{person}« ðîðñ'ţ åðð ýöû ţö åñý öƒ ţĥéîŕ çîŕçļéš." \
" one two three four five}" \
"=1{»{person}« åððéð ýöû ţö öñé öƒ ţĥéîŕ çîŕçļéš." \
" one two three four}" \
"other{»{person}« åððéð ýöû ţö ţĥéîŕ # çîŕçļéš." \
" one two three four}}}}]",
PSEUDO_ACCENTED
);
}
TEST(Pseudolocales, RedefineMethod) {
Pseudolocalizer pseudo(PSEUDO_ACCENTED);
String16 result = pseudo.text(String16(String8("Hello, ")));
pseudo.setMethod(NO_PSEUDOLOCALIZATION);
result.append(pseudo.text(String16(String8("world!"))));
ASSERT_EQ(String8("Ĥéļļö, world!"), String8(result));
}