Merge "AAPT2: Fix pseudolocalization (again)" into oc-dev
This commit is contained in:
committed by
Android (Google) Code Review
commit
448727bdae
@@ -155,7 +155,10 @@ bool ResourceParser::FlattenXmlSubtree(
|
||||
xml::XmlPullParser* parser, std::string* out_raw_string, StyleString* out_style_string,
|
||||
std::vector<UntranslatableSection>* out_untranslatable_sections) {
|
||||
// Keeps track of formatting tags (<b>, <i>) and the range of characters for which they apply.
|
||||
std::vector<Span> span_stack;
|
||||
// The stack elements refer to the indices in out_style_string->spans.
|
||||
// By first adding to the out_style_string->spans vector, and then using the stack to refer
|
||||
// to this vector, the original order of tags is preserved in cases such as <b><i>hello</b></i>.
|
||||
std::vector<size_t> span_stack;
|
||||
|
||||
// Clear the output variables.
|
||||
out_raw_string->clear();
|
||||
@@ -192,7 +195,9 @@ bool ResourceParser::FlattenXmlSubtree(
|
||||
return false;
|
||||
}
|
||||
|
||||
span_stack.push_back(Span{std::move(span_name), static_cast<uint32_t>(builder.Utf16Len())});
|
||||
out_style_string->spans.push_back(
|
||||
Span{std::move(span_name), static_cast<uint32_t>(builder.Utf16Len())});
|
||||
span_stack.push_back(out_style_string->spans.size() - 1);
|
||||
} else if (parser->element_namespace() == sXliffNamespaceUri) {
|
||||
if (parser->element_name() == "g") {
|
||||
if (untranslatable_start_depth) {
|
||||
@@ -233,9 +238,8 @@ bool ResourceParser::FlattenXmlSubtree(
|
||||
if (parser->element_namespace().empty()) {
|
||||
// This is an HTML tag which we encode as a span. Update the span
|
||||
// stack and pop the top entry.
|
||||
Span& top_span = span_stack.back();
|
||||
Span& top_span = out_style_string->spans[span_stack.back()];
|
||||
top_span.last_char = builder.Utf16Len() - 1;
|
||||
out_style_string->spans.push_back(std::move(top_span));
|
||||
span_stack.pop_back();
|
||||
} else if (untranslatable_start_depth == make_value(depth)) {
|
||||
// This is the end of an untranslatable section. Use UTF8 indices/lengths.
|
||||
|
||||
@@ -101,20 +101,24 @@ TEST_F(ResourceParserTest, ParseStyledString) {
|
||||
// Use a surrogate pair unicode point so that we can verify that the span
|
||||
// indices use UTF-16 length and not UTF-8 length.
|
||||
std::string input =
|
||||
"<string name=\"foo\">This is my aunt\u2019s <b>string</b></string>";
|
||||
"<string name=\"foo\">This is my aunt\u2019s <b>fickle <small>string</small></b></string>";
|
||||
ASSERT_TRUE(TestParse(input));
|
||||
|
||||
StyledString* str = test::GetValue<StyledString>(&table_, "string/foo");
|
||||
ASSERT_NE(nullptr, str);
|
||||
|
||||
const std::string expected_str = "This is my aunt\u2019s string";
|
||||
const std::string expected_str = "This is my aunt\u2019s fickle string";
|
||||
EXPECT_EQ(expected_str, *str->value->str);
|
||||
EXPECT_EQ(1u, str->value->spans.size());
|
||||
EXPECT_EQ(2u, str->value->spans.size());
|
||||
EXPECT_TRUE(str->untranslatable_sections.empty());
|
||||
|
||||
EXPECT_EQ(std::string("b"), *str->value->spans[0].name);
|
||||
EXPECT_EQ(17u, str->value->spans[0].first_char);
|
||||
EXPECT_EQ(23u, str->value->spans[0].last_char);
|
||||
EXPECT_EQ(30u, str->value->spans[0].last_char);
|
||||
|
||||
EXPECT_EQ(std::string("small"), *str->value->spans[1].name);
|
||||
EXPECT_EQ(24u, str->value->spans[1].first_char);
|
||||
EXPECT_EQ(30u, str->value->spans[1].last_char);
|
||||
}
|
||||
|
||||
TEST_F(ResourceParserTest, ParseStringWithWhitespace) {
|
||||
|
||||
@@ -22,136 +22,194 @@
|
||||
#include "ResourceValues.h"
|
||||
#include "ValueVisitor.h"
|
||||
#include "compile/Pseudolocalizer.h"
|
||||
#include "util/Util.h"
|
||||
|
||||
using android::StringPiece;
|
||||
using android::StringPiece16;
|
||||
|
||||
namespace aapt {
|
||||
|
||||
std::unique_ptr<StyledString> PseudolocalizeStyledString(
|
||||
StyledString* string, Pseudolocalizer::Method method, StringPool* pool) {
|
||||
// The struct that represents both Span objects and UntranslatableSections.
|
||||
struct UnifiedSpan {
|
||||
// Only present for Span objects. If not present, this was an UntranslatableSection.
|
||||
Maybe<std::string> tag;
|
||||
|
||||
// The UTF-16 index into the string where this span starts.
|
||||
uint32_t first_char;
|
||||
|
||||
// The UTF-16 index into the string where this span ends, inclusive.
|
||||
uint32_t last_char;
|
||||
};
|
||||
|
||||
inline static bool operator<(const UnifiedSpan& left, const UnifiedSpan& right) {
|
||||
if (left.first_char < right.first_char) {
|
||||
return true;
|
||||
} else if (left.first_char > right.first_char) {
|
||||
return false;
|
||||
} else if (left.last_char < right.last_char) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
inline static UnifiedSpan SpanToUnifiedSpan(const StringPool::Span& span) {
|
||||
return UnifiedSpan{*span.name, span.first_char, span.last_char};
|
||||
}
|
||||
|
||||
inline static UnifiedSpan UntranslatableSectionToUnifiedSpan(const UntranslatableSection& section) {
|
||||
return UnifiedSpan{
|
||||
{}, static_cast<uint32_t>(section.start), static_cast<uint32_t>(section.end) - 1};
|
||||
}
|
||||
|
||||
// Merges the Span and UntranslatableSections of this StyledString into a single vector of
|
||||
// UnifiedSpans. This will first check that the Spans are sorted in ascending order.
|
||||
static std::vector<UnifiedSpan> MergeSpans(const StyledString& string) {
|
||||
// Ensure the Spans are sorted and converted.
|
||||
std::vector<UnifiedSpan> sorted_spans;
|
||||
sorted_spans.reserve(string.value->spans.size());
|
||||
std::transform(string.value->spans.begin(), string.value->spans.end(),
|
||||
std::back_inserter(sorted_spans), SpanToUnifiedSpan);
|
||||
|
||||
// Stable sort to ensure tag sequences like "<b><i>" are preserved.
|
||||
std::stable_sort(sorted_spans.begin(), sorted_spans.end());
|
||||
|
||||
// Ensure the UntranslatableSections are sorted and converted.
|
||||
std::vector<UnifiedSpan> sorted_untranslatable_sections;
|
||||
sorted_untranslatable_sections.reserve(string.untranslatable_sections.size());
|
||||
std::transform(string.untranslatable_sections.begin(), string.untranslatable_sections.end(),
|
||||
std::back_inserter(sorted_untranslatable_sections),
|
||||
UntranslatableSectionToUnifiedSpan);
|
||||
std::sort(sorted_untranslatable_sections.begin(), sorted_untranslatable_sections.end());
|
||||
|
||||
std::vector<UnifiedSpan> merged_spans;
|
||||
merged_spans.reserve(sorted_spans.size() + sorted_untranslatable_sections.size());
|
||||
auto span_iter = sorted_spans.begin();
|
||||
auto untranslatable_iter = sorted_untranslatable_sections.begin();
|
||||
while (span_iter != sorted_spans.end() &&
|
||||
untranslatable_iter != sorted_untranslatable_sections.end()) {
|
||||
if (*span_iter < *untranslatable_iter) {
|
||||
merged_spans.push_back(std::move(*span_iter));
|
||||
++span_iter;
|
||||
} else {
|
||||
merged_spans.push_back(std::move(*untranslatable_iter));
|
||||
++untranslatable_iter;
|
||||
}
|
||||
}
|
||||
|
||||
while (span_iter != sorted_spans.end()) {
|
||||
merged_spans.push_back(std::move(*span_iter));
|
||||
++span_iter;
|
||||
}
|
||||
|
||||
while (untranslatable_iter != sorted_untranslatable_sections.end()) {
|
||||
merged_spans.push_back(std::move(*untranslatable_iter));
|
||||
++untranslatable_iter;
|
||||
}
|
||||
return merged_spans;
|
||||
}
|
||||
|
||||
std::unique_ptr<StyledString> PseudolocalizeStyledString(StyledString* string,
|
||||
Pseudolocalizer::Method method,
|
||||
StringPool* pool) {
|
||||
Pseudolocalizer localizer(method);
|
||||
|
||||
const StringPiece original_text = *string->value->str;
|
||||
// Collect the spans and untranslatable sections into one set of spans, sorted by first_char.
|
||||
// This will effectively subdivide the string into multiple sections that can be individually
|
||||
// pseudolocalized, while keeping the span indices synchronized.
|
||||
std::vector<UnifiedSpan> merged_spans = MergeSpans(*string);
|
||||
|
||||
// All Span indices are UTF-16 based, according to the resources.arsc format expected by the
|
||||
// runtime. So we will do all our processing in UTF-16, then convert back.
|
||||
const std::u16string text16 = util::Utf8ToUtf16(*string->value->str);
|
||||
|
||||
// Convenient wrapper around the text that allows us to work with StringPieces.
|
||||
const StringPiece16 text(text16);
|
||||
|
||||
// The new string.
|
||||
std::string new_string = localizer.Start();
|
||||
|
||||
// The stack that keeps track of what nested Span we're in.
|
||||
std::vector<size_t> span_stack;
|
||||
|
||||
// The current position in the original text.
|
||||
uint32_t cursor = 0u;
|
||||
|
||||
// The current position in the new text.
|
||||
uint32_t new_cursor = utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_string.data()),
|
||||
new_string.size(), false);
|
||||
|
||||
// We assume no nesting of untranslatable sections, since XLIFF doesn't allow it.
|
||||
bool translatable = true;
|
||||
size_t span_idx = 0u;
|
||||
while (span_idx < merged_spans.size() || !span_stack.empty()) {
|
||||
UnifiedSpan* span = span_idx >= merged_spans.size() ? nullptr : &merged_spans[span_idx];
|
||||
UnifiedSpan* parent_span = span_stack.empty() ? nullptr : &merged_spans[span_stack.back()];
|
||||
|
||||
if (span != nullptr) {
|
||||
if (parent_span == nullptr || parent_span->last_char > span->first_char) {
|
||||
// There is no parent, or this span is the child of the parent.
|
||||
// Pseudolocalize all the text until this span.
|
||||
const StringPiece16 substr = text.substr(cursor, span->first_char - cursor);
|
||||
cursor += substr.size();
|
||||
|
||||
// Pseudolocalize the substring.
|
||||
std::string new_substr = util::Utf16ToUtf8(substr);
|
||||
if (translatable) {
|
||||
new_substr = localizer.Text(new_substr);
|
||||
}
|
||||
new_cursor += utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_substr.data()),
|
||||
new_substr.size(), false);
|
||||
new_string += new_substr;
|
||||
|
||||
// Rewrite the first_char.
|
||||
span->first_char = new_cursor;
|
||||
if (!span->tag) {
|
||||
// An untranslatable section has begun!
|
||||
translatable = false;
|
||||
}
|
||||
span_stack.push_back(span_idx);
|
||||
++span_idx;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (parent_span != nullptr) {
|
||||
// There is a parent, and either this span is not a child of it, or there are no more spans.
|
||||
// Pop this off the stack.
|
||||
const StringPiece16 substr = text.substr(cursor, parent_span->last_char - cursor + 1);
|
||||
cursor += substr.size();
|
||||
|
||||
// Pseudolocalize the substring.
|
||||
std::string new_substr = util::Utf16ToUtf8(substr);
|
||||
if (translatable) {
|
||||
new_substr = localizer.Text(new_substr);
|
||||
}
|
||||
new_cursor += utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_substr.data()),
|
||||
new_substr.size(), false);
|
||||
new_string += new_substr;
|
||||
|
||||
parent_span->last_char = new_cursor - 1;
|
||||
if (parent_span->tag) {
|
||||
// An end to an untranslatable section.
|
||||
translatable = true;
|
||||
}
|
||||
span_stack.pop_back();
|
||||
}
|
||||
}
|
||||
|
||||
// Finish the pseudolocalization at the end of the string.
|
||||
new_string += localizer.Text(util::Utf16ToUtf8(text.substr(cursor, text.size() - cursor)));
|
||||
new_string += localizer.End();
|
||||
|
||||
StyleString localized;
|
||||
localized.str = std::move(new_string);
|
||||
|
||||
// Copy the spans. We will update their offsets when we localize.
|
||||
localized.spans.reserve(string->value->spans.size());
|
||||
for (const StringPool::Span& span : string->value->spans) {
|
||||
localized.spans.push_back(
|
||||
Span{*span.name, span.first_char, span.last_char});
|
||||
}
|
||||
|
||||
// The ranges are all represented with a single value. This is the start of
|
||||
// one range and end of another.
|
||||
struct Range {
|
||||
size_t start;
|
||||
|
||||
// If set to true, toggles the state of translatability.
|
||||
bool toggle_translatability;
|
||||
|
||||
// Once the new string is localized, these are the pointers to the spans to adjust.
|
||||
// Since this struct represents the start of one range and end of another,
|
||||
// we have the two pointers respectively.
|
||||
uint32_t* update_start;
|
||||
uint32_t* update_end;
|
||||
};
|
||||
|
||||
auto cmp = [](const Range& r, size_t index) -> bool {
|
||||
return r.start < index;
|
||||
};
|
||||
|
||||
// Construct the ranges. The ranges are represented like so: [0, 2, 5, 7]
|
||||
// The ranges are the spaces in between. In this example, with a total string
|
||||
// length of 9, the vector represents: (0,1], (2,4], (5,6], (7,9]
|
||||
//
|
||||
std::vector<Range> ranges;
|
||||
ranges.push_back(Range{0, false, nullptr, nullptr});
|
||||
ranges.push_back(Range{original_text.size() - 1, false, nullptr, nullptr});
|
||||
for (size_t i = 0; i < string->value->spans.size(); i++) {
|
||||
const StringPool::Span& span = string->value->spans[i];
|
||||
|
||||
// Insert or update the Range marker for the start of this span.
|
||||
auto iter =
|
||||
std::lower_bound(ranges.begin(), ranges.end(), span.first_char, cmp);
|
||||
if (iter != ranges.end() && iter->start == span.first_char) {
|
||||
iter->update_start = &localized.spans[i].first_char;
|
||||
} else {
|
||||
ranges.insert(iter, Range{span.first_char, false, &localized.spans[i].first_char, nullptr});
|
||||
}
|
||||
|
||||
// Insert or update the Range marker for the end of this span.
|
||||
iter = std::lower_bound(ranges.begin(), ranges.end(), span.last_char, cmp);
|
||||
if (iter != ranges.end() && iter->start == span.last_char) {
|
||||
iter->update_end = &localized.spans[i].last_char;
|
||||
} else {
|
||||
ranges.insert(iter, Range{span.last_char, false, nullptr, &localized.spans[i].last_char});
|
||||
// Convert the UnifiedSpans into regular Spans, skipping the UntranslatableSections.
|
||||
for (UnifiedSpan& span : merged_spans) {
|
||||
if (span.tag) {
|
||||
localized.spans.push_back(Span{std::move(span.tag.value()), span.first_char, span.last_char});
|
||||
}
|
||||
}
|
||||
|
||||
// Parts of the string may be untranslatable. Merge those ranges
|
||||
// in as well, so that we have continuous sections of text to
|
||||
// feed into the pseudolocalizer.
|
||||
// We do this by marking the beginning of a range as either toggling
|
||||
// the translatability state or not.
|
||||
for (const UntranslatableSection& section : string->untranslatable_sections) {
|
||||
auto iter = std::lower_bound(ranges.begin(), ranges.end(), section.start, cmp);
|
||||
if (iter != ranges.end() && iter->start == section.start) {
|
||||
// An existing span starts (or ends) here. We just need to mark that
|
||||
// the translatability should toggle here. If translatability was
|
||||
// already being toggled, then that means we have two adjacent ranges of untranslatable
|
||||
// text, so remove the toggle and only toggle at the end of this range,
|
||||
// effectively merging these ranges.
|
||||
iter->toggle_translatability = !iter->toggle_translatability;
|
||||
} else {
|
||||
// Insert a new range that specifies to toggle the translatability.
|
||||
iter = ranges.insert(iter, Range{section.start, true, nullptr, nullptr});
|
||||
}
|
||||
|
||||
// Update/create an end to the untranslatable section.
|
||||
iter = std::lower_bound(iter, ranges.end(), section.end, cmp);
|
||||
if (iter != ranges.end() && iter->start == section.end) {
|
||||
iter->toggle_translatability = true;
|
||||
} else {
|
||||
iter = ranges.insert(iter, Range{section.end, true, nullptr, nullptr});
|
||||
}
|
||||
}
|
||||
|
||||
localized.str += localizer.Start();
|
||||
|
||||
// Iterate over the ranges and localize each section.
|
||||
// The text starts as translatable, and each time a range has toggle_translatability
|
||||
// set to true, we toggle whether to translate or not.
|
||||
// This assumes no untranslatable ranges overlap.
|
||||
bool translatable = true;
|
||||
for (size_t i = 0; i < ranges.size(); i++) {
|
||||
const size_t start = ranges[i].start;
|
||||
size_t len = original_text.size() - start;
|
||||
if (i + 1 < ranges.size()) {
|
||||
len = ranges[i + 1].start - start;
|
||||
}
|
||||
|
||||
if (ranges[i].update_start) {
|
||||
*ranges[i].update_start = localized.str.size();
|
||||
}
|
||||
|
||||
if (ranges[i].update_end) {
|
||||
*ranges[i].update_end = localized.str.size();
|
||||
}
|
||||
|
||||
if (ranges[i].toggle_translatability) {
|
||||
translatable = !translatable;
|
||||
}
|
||||
|
||||
if (translatable) {
|
||||
localized.str += localizer.Text(original_text.substr(start, len));
|
||||
} else {
|
||||
localized.str += original_text.substr(start, len);
|
||||
}
|
||||
}
|
||||
|
||||
localized.str += localizer.End();
|
||||
|
||||
return util::make_unique<StyledString>(pool->MakeRef(localized));
|
||||
}
|
||||
|
||||
@@ -175,8 +233,7 @@ class Visitor : public RawValueVisitor {
|
||||
if (sub_visitor.value) {
|
||||
localized->values[i] = std::move(sub_visitor.item);
|
||||
} else {
|
||||
localized->values[i] =
|
||||
std::unique_ptr<Item>(plural->values[i]->Clone(pool_));
|
||||
localized->values[i] = std::unique_ptr<Item>(plural->values[i]->Clone(pool_));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -210,8 +267,7 @@ class Visitor : public RawValueVisitor {
|
||||
}
|
||||
result += localizer_.End();
|
||||
|
||||
std::unique_ptr<String> localized =
|
||||
util::make_unique<String>(pool_->MakeRef(result));
|
||||
std::unique_ptr<String> localized = util::make_unique<String>(pool_->MakeRef(result));
|
||||
localized->SetSource(string->GetSource());
|
||||
localized->SetWeak(true);
|
||||
item = std::move(localized);
|
||||
@@ -282,14 +338,10 @@ void PseudolocalizeIfNeeded(const Pseudolocalizer::Method method,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A value is pseudolocalizable if it does not define a locale (or is the
|
||||
* default locale)
|
||||
* and is translatable.
|
||||
*/
|
||||
// A value is pseudolocalizable if it does not define a locale (or is the default locale) and is
|
||||
// translatable.
|
||||
static bool IsPseudolocalizable(ResourceConfigValue* config_value) {
|
||||
const int diff =
|
||||
config_value->config.diff(ConfigDescription::DefaultConfig());
|
||||
const int diff = config_value->config.diff(ConfigDescription::DefaultConfig());
|
||||
if (diff & ConfigDescription::CONFIG_LOCALE) {
|
||||
return false;
|
||||
}
|
||||
@@ -298,19 +350,16 @@ static bool IsPseudolocalizable(ResourceConfigValue* config_value) {
|
||||
|
||||
} // namespace
|
||||
|
||||
bool PseudolocaleGenerator::Consume(IAaptContext* context,
|
||||
ResourceTable* table) {
|
||||
bool PseudolocaleGenerator::Consume(IAaptContext* context, ResourceTable* table) {
|
||||
for (auto& package : table->packages) {
|
||||
for (auto& type : package->types) {
|
||||
for (auto& entry : type->entries) {
|
||||
std::vector<ResourceConfigValue*> values =
|
||||
entry->FindValuesIf(IsPseudolocalizable);
|
||||
|
||||
std::vector<ResourceConfigValue*> values = entry->FindValuesIf(IsPseudolocalizable);
|
||||
for (ResourceConfigValue* value : values) {
|
||||
PseudolocalizeIfNeeded(Pseudolocalizer::Method::kAccent, value,
|
||||
&table->string_pool, entry.get());
|
||||
PseudolocalizeIfNeeded(Pseudolocalizer::Method::kBidi, value,
|
||||
&table->string_pool, entry.get());
|
||||
PseudolocalizeIfNeeded(Pseudolocalizer::Method::kAccent, value, &table->string_pool,
|
||||
entry.get());
|
||||
PseudolocalizeIfNeeded(Pseudolocalizer::Method::kBidi, value, &table->string_pool,
|
||||
entry.get());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -25,7 +25,7 @@ TEST(PseudolocaleGeneratorTest, PseudolocalizeStyledString) {
|
||||
StringPool pool;
|
||||
StyleString original_style;
|
||||
original_style.str = "Hello world!";
|
||||
original_style.spans = {Span{"b", 2, 3}, Span{"b", 6, 7}, Span{"i", 1, 10}};
|
||||
original_style.spans = {Span{"i", 1, 10}, Span{"b", 2, 3}, Span{"b", 6, 7}};
|
||||
|
||||
std::unique_ptr<StyledString> new_string = PseudolocalizeStyledString(
|
||||
util::make_unique<StyledString>(pool.MakeRef(original_style)).get(),
|
||||
@@ -34,22 +34,19 @@ TEST(PseudolocaleGeneratorTest, PseudolocalizeStyledString) {
|
||||
EXPECT_EQ(original_style.str, *new_string->value->str);
|
||||
ASSERT_EQ(original_style.spans.size(), new_string->value->spans.size());
|
||||
|
||||
EXPECT_EQ(std::string("He").size(), new_string->value->spans[0].first_char);
|
||||
EXPECT_EQ(std::string("Hel").size(), new_string->value->spans[0].last_char);
|
||||
EXPECT_EQ(std::string("b"), *new_string->value->spans[0].name);
|
||||
EXPECT_EQ(std::string("i"), *new_string->value->spans[0].name);
|
||||
EXPECT_EQ(std::u16string(u"H").size(), new_string->value->spans[0].first_char);
|
||||
EXPECT_EQ(std::u16string(u"Hello worl").size(), new_string->value->spans[0].last_char);
|
||||
|
||||
EXPECT_EQ(std::string("Hello ").size(),
|
||||
new_string->value->spans[1].first_char);
|
||||
EXPECT_EQ(std::string("Hello w").size(),
|
||||
new_string->value->spans[1].last_char);
|
||||
EXPECT_EQ(std::string("b"), *new_string->value->spans[1].name);
|
||||
EXPECT_EQ(std::u16string(u"He").size(), new_string->value->spans[1].first_char);
|
||||
EXPECT_EQ(std::u16string(u"Hel").size(), new_string->value->spans[1].last_char);
|
||||
|
||||
EXPECT_EQ(std::string("H").size(), new_string->value->spans[2].first_char);
|
||||
EXPECT_EQ(std::string("Hello worl").size(),
|
||||
new_string->value->spans[2].last_char);
|
||||
EXPECT_EQ(std::string("i"), *new_string->value->spans[2].name);
|
||||
EXPECT_EQ(std::string("b"), *new_string->value->spans[2].name);
|
||||
EXPECT_EQ(std::u16string(u"Hello ").size(), new_string->value->spans[2].first_char);
|
||||
EXPECT_EQ(std::u16string(u"Hello w").size(), new_string->value->spans[2].last_char);
|
||||
|
||||
original_style.spans.push_back(Span{"em", 0, 11u});
|
||||
original_style.spans.insert(original_style.spans.begin(), Span{"em", 0, 11u});
|
||||
|
||||
new_string = PseudolocalizeStyledString(
|
||||
util::make_unique<StyledString>(pool.MakeRef(original_style)).get(),
|
||||
@@ -58,23 +55,128 @@ TEST(PseudolocaleGeneratorTest, PseudolocalizeStyledString) {
|
||||
EXPECT_EQ(std::string("[Ĥéļļö ŵöŕļð¡ one two]"), *new_string->value->str);
|
||||
ASSERT_EQ(original_style.spans.size(), new_string->value->spans.size());
|
||||
|
||||
EXPECT_EQ(std::string("[Ĥé").size(), new_string->value->spans[0].first_char);
|
||||
EXPECT_EQ(std::string("[Ĥéļ").size(), new_string->value->spans[0].last_char);
|
||||
EXPECT_EQ(std::u16string(u"[").size(), new_string->value->spans[0].first_char);
|
||||
EXPECT_EQ(std::u16string(u"[Ĥéļļö ŵöŕļð").size(), new_string->value->spans[0].last_char);
|
||||
|
||||
EXPECT_EQ(std::string("[Ĥéļļö ").size(),
|
||||
EXPECT_EQ(std::u16string(u"[Ĥ").size(), new_string->value->spans[1].first_char);
|
||||
EXPECT_EQ(std::u16string(u"[Ĥéļļö ŵöŕļ").size(), new_string->value->spans[1].last_char);
|
||||
|
||||
EXPECT_EQ(std::u16string(u"[Ĥé").size(), new_string->value->spans[2].first_char);
|
||||
EXPECT_EQ(std::u16string(u"[Ĥéļ").size(), new_string->value->spans[2].last_char);
|
||||
|
||||
EXPECT_EQ(std::u16string(u"[Ĥéļļö ").size(), new_string->value->spans[3].first_char);
|
||||
EXPECT_EQ(std::u16string(u"[Ĥéļļö ŵ").size(), new_string->value->spans[3].last_char);
|
||||
}
|
||||
|
||||
TEST(PseudolocaleGeneratorTest, PseudolocalizeAdjacentNestedTags) {
|
||||
StringPool pool;
|
||||
StyleString original_style;
|
||||
original_style.str = "bold";
|
||||
original_style.spans = {Span{"b", 0, 3}, Span{"i", 0, 3}};
|
||||
|
||||
std::unique_ptr<StyledString> new_string = PseudolocalizeStyledString(
|
||||
util::make_unique<StyledString>(pool.MakeRef(original_style)).get(),
|
||||
Pseudolocalizer::Method::kAccent, &pool);
|
||||
ASSERT_NE(nullptr, new_string);
|
||||
ASSERT_EQ(2u, new_string->value->spans.size());
|
||||
EXPECT_EQ(std::string("[ɓöļð one]"), *new_string->value->str);
|
||||
|
||||
EXPECT_EQ(std::string("b"), *new_string->value->spans[0].name);
|
||||
EXPECT_EQ(std::u16string(u"[").size(), new_string->value->spans[0].first_char);
|
||||
EXPECT_EQ(std::u16string(u"[ɓöļ").size(), new_string->value->spans[0].last_char);
|
||||
|
||||
EXPECT_EQ(std::string("i"), *new_string->value->spans[1].name);
|
||||
EXPECT_EQ(std::u16string(u"[").size(), new_string->value->spans[1].first_char);
|
||||
EXPECT_EQ(std::u16string(u"[ɓöļ").size(), new_string->value->spans[1].last_char);
|
||||
}
|
||||
|
||||
TEST(PseudolocaleGeneratorTest, PseudolocalizeAdjacentTagsUnsorted) {
|
||||
StringPool pool;
|
||||
StyleString original_style;
|
||||
original_style.str = "bold";
|
||||
original_style.spans = {Span{"i", 2, 3}, Span{"b", 0, 1}};
|
||||
|
||||
std::unique_ptr<StyledString> new_string = PseudolocalizeStyledString(
|
||||
util::make_unique<StyledString>(pool.MakeRef(original_style)).get(),
|
||||
Pseudolocalizer::Method::kAccent, &pool);
|
||||
ASSERT_NE(nullptr, new_string);
|
||||
ASSERT_EQ(2u, new_string->value->spans.size());
|
||||
EXPECT_EQ(std::string("[ɓöļð one]"), *new_string->value->str);
|
||||
|
||||
EXPECT_EQ(std::string("b"), *new_string->value->spans[0].name);
|
||||
EXPECT_EQ(std::u16string(u"[").size(), new_string->value->spans[0].first_char);
|
||||
EXPECT_EQ(std::u16string(u"[ɓ").size(), new_string->value->spans[0].last_char);
|
||||
|
||||
EXPECT_EQ(std::string("i"), *new_string->value->spans[1].name);
|
||||
EXPECT_EQ(std::u16string(u"[ɓö").size(), new_string->value->spans[1].first_char);
|
||||
EXPECT_EQ(std::u16string(u"[ɓöļ").size(), new_string->value->spans[1].last_char);
|
||||
}
|
||||
|
||||
TEST(PseudolocaleGeneratorTest, PseudolocalizeNestedAndAdjacentTags) {
|
||||
StringPool pool;
|
||||
StyleString original_style;
|
||||
original_style.str = "This sentence is not what you think it is at all.";
|
||||
original_style.spans = {Span{"b", 16u, 19u}, Span{"em", 29u, 47u}, Span{"i", 38u, 40u},
|
||||
Span{"b", 44u, 47u}};
|
||||
|
||||
std::unique_ptr<StyledString> new_string = PseudolocalizeStyledString(
|
||||
util::make_unique<StyledString>(pool.MakeRef(original_style)).get(),
|
||||
Pseudolocalizer::Method::kAccent, &pool);
|
||||
ASSERT_NE(nullptr, new_string);
|
||||
ASSERT_EQ(4u, new_string->value->spans.size());
|
||||
EXPECT_EQ(std::string(
|
||||
"[Ţĥîš šéñţéñçé îš ñöţ ŵĥåţ ýöû ţĥîñķ îţ îš åţ åļļ. one two three four five six]"),
|
||||
*new_string->value->str);
|
||||
|
||||
EXPECT_EQ(std::string("b"), *new_string->value->spans[0].name);
|
||||
EXPECT_EQ(std::u16string(u"[Ţĥîš šéñţéñçé îš").size(), new_string->value->spans[0].first_char);
|
||||
EXPECT_EQ(std::u16string(u"[Ţĥîš šéñţéñçé îš ñö").size(), new_string->value->spans[0].last_char);
|
||||
|
||||
EXPECT_EQ(std::string("em"), *new_string->value->spans[1].name);
|
||||
EXPECT_EQ(std::u16string(u"[Ţĥîš šéñţéñçé îš ñöţ ŵĥåţ ýöû").size(),
|
||||
new_string->value->spans[1].first_char);
|
||||
EXPECT_EQ(std::string("[Ĥéļļö ŵ").size(),
|
||||
EXPECT_EQ(std::u16string(u"[Ţĥîš šéñţéñçé îš ñöţ ŵĥåţ ýöû ţĥîñķ îţ îš åţ åļ").size(),
|
||||
new_string->value->spans[1].last_char);
|
||||
|
||||
EXPECT_EQ(std::string("[Ĥ").size(), new_string->value->spans[2].first_char);
|
||||
EXPECT_EQ(std::string("[Ĥéļļö ŵöŕļ").size(),
|
||||
EXPECT_EQ(std::string("i"), *new_string->value->spans[2].name);
|
||||
EXPECT_EQ(std::u16string(u"[Ţĥîš šéñţéñçé îš ñöţ ŵĥåţ ýöû ţĥîñķ îţ").size(),
|
||||
new_string->value->spans[2].first_char);
|
||||
EXPECT_EQ(std::u16string(u"[Ţĥîš šéñţéñçé îš ñöţ ŵĥåţ ýöû ţĥîñķ îţ î").size(),
|
||||
new_string->value->spans[2].last_char);
|
||||
|
||||
EXPECT_EQ(std::string("[").size(), new_string->value->spans[3].first_char);
|
||||
EXPECT_EQ(std::string("[Ĥéļļö ŵöŕļð").size(),
|
||||
EXPECT_EQ(std::string("b"), *new_string->value->spans[3].name);
|
||||
EXPECT_EQ(std::u16string(u"[Ţĥîš šéñţéñçé îš ñöţ ŵĥåţ ýöû ţĥîñķ îţ îš åţ").size(),
|
||||
new_string->value->spans[3].first_char);
|
||||
EXPECT_EQ(std::u16string(u"[Ţĥîš šéñţéñçé îš ñöţ ŵĥåţ ýöû ţĥîñķ îţ îš åţ åļ").size(),
|
||||
new_string->value->spans[3].last_char);
|
||||
}
|
||||
|
||||
TEST(PseudolocaleGeneratorTest, PseudolocalizePartsOfString) {
|
||||
StringPool pool;
|
||||
StyleString original_style;
|
||||
original_style.str = "This should NOT be pseudolocalized.";
|
||||
original_style.spans = {Span{"em", 4u, 14u}, Span{"i", 18u, 33u}};
|
||||
std::unique_ptr<StyledString> original_string =
|
||||
util::make_unique<StyledString>(pool.MakeRef(original_style));
|
||||
original_string->untranslatable_sections = {UntranslatableSection{11u, 15u}};
|
||||
|
||||
std::unique_ptr<StyledString> new_string =
|
||||
PseudolocalizeStyledString(original_string.get(), Pseudolocalizer::Method::kAccent, &pool);
|
||||
ASSERT_NE(nullptr, new_string);
|
||||
ASSERT_EQ(2u, new_string->value->spans.size());
|
||||
EXPECT_EQ(std::string("[Ţĥîš šĥöûļð NOT ɓé þšéûðöļöçåļîžéð. one two three four]"),
|
||||
*new_string->value->str);
|
||||
|
||||
EXPECT_EQ(std::string("em"), *new_string->value->spans[0].name);
|
||||
EXPECT_EQ(std::u16string(u"[Ţĥîš").size(), new_string->value->spans[0].first_char);
|
||||
EXPECT_EQ(std::u16string(u"[Ţĥîš šĥöûļð NO").size(), new_string->value->spans[0].last_char);
|
||||
|
||||
EXPECT_EQ(std::string("i"), *new_string->value->spans[1].name);
|
||||
EXPECT_EQ(std::u16string(u"[Ţĥîš šĥöûļð NOT ɓé").size(), new_string->value->spans[1].first_char);
|
||||
EXPECT_EQ(std::u16string(u"[Ţĥîš šĥöûļð NOT ɓé þšéûðöļöçåļîžé").size(),
|
||||
new_string->value->spans[1].last_char);
|
||||
}
|
||||
|
||||
TEST(PseudolocaleGeneratorTest, PseudolocalizeOnlyDefaultConfigs) {
|
||||
std::unique_ptr<ResourceTable> table =
|
||||
test::ResourceTableBuilder()
|
||||
@@ -138,7 +240,7 @@ TEST(PseudolocaleGeneratorTest, RespectUntranslateableSections) {
|
||||
{
|
||||
StyleString original_style;
|
||||
original_style.str = "Hello world!";
|
||||
original_style.spans = {Span{"b", 2, 3}, Span{"b", 6, 7}, Span{"i", 1, 10}};
|
||||
original_style.spans = {Span{"i", 1, 10}, Span{"b", 2, 3}, Span{"b", 6, 7}};
|
||||
|
||||
auto styled_string =
|
||||
util::make_unique<StyledString>(table->string_pool.MakeRef(original_style));
|
||||
|
||||
Reference in New Issue
Block a user