|  | #include "pseudolocalize.h" | 
|  |  | 
|  | using namespace std; | 
|  |  | 
|  | // String basis to generate expansion | 
|  | static const String16 k_expansion_string = String16("one two three " | 
|  | "four five six seven eight nine ten eleven twelve thirteen " | 
|  | "fourteen fiveteen sixteen seventeen nineteen twenty"); | 
|  |  | 
|  | // Special unicode characters to override directionality of the words | 
|  | static const String16 k_rlm = String16("\xe2\x80\x8f"); | 
|  | static const String16 k_rlo = String16("\xE2\x80\xae"); | 
|  | static const String16 k_pdf = String16("\xE2\x80\xac"); | 
|  |  | 
|  | // Placeholder marks | 
|  | static const String16 k_placeholder_open = String16("\xc2\xbb"); | 
|  | static const String16 k_placeholder_close = String16("\xc2\xab"); | 
|  |  | 
|  | static const char16_t k_arg_start = '{'; | 
|  | static const char16_t k_arg_end = '}'; | 
|  |  | 
|  | Pseudolocalizer::Pseudolocalizer(PseudolocalizationMethod m) | 
|  | : mImpl(nullptr), mLastDepth(0) { | 
|  | setMethod(m); | 
|  | } | 
|  |  | 
|  | void Pseudolocalizer::setMethod(PseudolocalizationMethod m) { | 
|  | if (mImpl) { | 
|  | delete mImpl; | 
|  | } | 
|  | if (m == PSEUDO_ACCENTED) { | 
|  | mImpl = new PseudoMethodAccent(); | 
|  | } else if (m == PSEUDO_BIDI) { | 
|  | mImpl = new PseudoMethodBidi(); | 
|  | } else { | 
|  | mImpl = new PseudoMethodNone(); | 
|  | } | 
|  | } | 
|  |  | 
|  | String16 Pseudolocalizer::text(const String16& text) { | 
|  | String16 out; | 
|  | size_t depth = mLastDepth; | 
|  | size_t lastpos, pos; | 
|  | const size_t length= text.size(); | 
|  | const char16_t* str = text.string(); | 
|  | bool escaped = false; | 
|  | for (lastpos = pos = 0; pos < length; pos++) { | 
|  | char16_t c = str[pos]; | 
|  | if (escaped) { | 
|  | escaped = false; | 
|  | continue; | 
|  | } | 
|  | if (c == '\'') { | 
|  | escaped = true; | 
|  | continue; | 
|  | } | 
|  |  | 
|  | if (c == k_arg_start) { | 
|  | depth++; | 
|  | } else if (c == k_arg_end && depth) { | 
|  | depth--; | 
|  | } | 
|  |  | 
|  | if (mLastDepth != depth || pos == length - 1) { | 
|  | bool pseudo = ((mLastDepth % 2) == 0); | 
|  | size_t nextpos = pos; | 
|  | if (!pseudo || depth == mLastDepth) { | 
|  | nextpos++; | 
|  | } | 
|  | size_t size = nextpos - lastpos; | 
|  | if (size) { | 
|  | String16 chunk = String16(text, size, lastpos); | 
|  | if (pseudo) { | 
|  | chunk = mImpl->text(chunk); | 
|  | } else if (str[lastpos] == k_arg_start && | 
|  | str[nextpos - 1] == k_arg_end) { | 
|  | chunk = mImpl->placeholder(chunk); | 
|  | } | 
|  | out.append(chunk); | 
|  | } | 
|  | if (pseudo && depth < mLastDepth) { // End of message | 
|  | out.append(mImpl->end()); | 
|  | } else if (!pseudo && depth > mLastDepth) { // Start of message | 
|  | out.append(mImpl->start()); | 
|  | } | 
|  | lastpos = nextpos; | 
|  | mLastDepth = depth; | 
|  | } | 
|  | } | 
|  | return out; | 
|  | } | 
|  |  | 
|  | static const char* | 
|  | pseudolocalize_char(const char16_t c) | 
|  | { | 
|  | switch (c) { | 
|  | case 'a':   return "\xc3\xa5"; | 
|  | case 'b':   return "\xc9\x93"; | 
|  | case 'c':   return "\xc3\xa7"; | 
|  | case 'd':   return "\xc3\xb0"; | 
|  | case 'e':   return "\xc3\xa9"; | 
|  | case 'f':   return "\xc6\x92"; | 
|  | case 'g':   return "\xc4\x9d"; | 
|  | case 'h':   return "\xc4\xa5"; | 
|  | case 'i':   return "\xc3\xae"; | 
|  | case 'j':   return "\xc4\xb5"; | 
|  | case 'k':   return "\xc4\xb7"; | 
|  | case 'l':   return "\xc4\xbc"; | 
|  | case 'm':   return "\xe1\xb8\xbf"; | 
|  | case 'n':   return "\xc3\xb1"; | 
|  | case 'o':   return "\xc3\xb6"; | 
|  | case 'p':   return "\xc3\xbe"; | 
|  | case 'q':   return "\x51"; | 
|  | case 'r':   return "\xc5\x95"; | 
|  | case 's':   return "\xc5\xa1"; | 
|  | case 't':   return "\xc5\xa3"; | 
|  | case 'u':   return "\xc3\xbb"; | 
|  | case 'v':   return "\x56"; | 
|  | case 'w':   return "\xc5\xb5"; | 
|  | case 'x':   return "\xd1\x85"; | 
|  | case 'y':   return "\xc3\xbd"; | 
|  | case 'z':   return "\xc5\xbe"; | 
|  | case 'A':   return "\xc3\x85"; | 
|  | case 'B':   return "\xce\xb2"; | 
|  | case 'C':   return "\xc3\x87"; | 
|  | case 'D':   return "\xc3\x90"; | 
|  | case 'E':   return "\xc3\x89"; | 
|  | case 'G':   return "\xc4\x9c"; | 
|  | case 'H':   return "\xc4\xa4"; | 
|  | case 'I':   return "\xc3\x8e"; | 
|  | case 'J':   return "\xc4\xb4"; | 
|  | case 'K':   return "\xc4\xb6"; | 
|  | case 'L':   return "\xc4\xbb"; | 
|  | case 'M':   return "\xe1\xb8\xbe"; | 
|  | case 'N':   return "\xc3\x91"; | 
|  | case 'O':   return "\xc3\x96"; | 
|  | case 'P':   return "\xc3\x9e"; | 
|  | case 'Q':   return "\x71"; | 
|  | case 'R':   return "\xc5\x94"; | 
|  | case 'S':   return "\xc5\xa0"; | 
|  | case 'T':   return "\xc5\xa2"; | 
|  | case 'U':   return "\xc3\x9b"; | 
|  | case 'V':   return "\xce\xbd"; | 
|  | case 'W':   return "\xc5\xb4"; | 
|  | case 'X':   return "\xc3\x97"; | 
|  | case 'Y':   return "\xc3\x9d"; | 
|  | case 'Z':   return "\xc5\xbd"; | 
|  | case '!':   return "\xc2\xa1"; | 
|  | case '?':   return "\xc2\xbf"; | 
|  | case '$':   return "\xe2\x82\xac"; | 
|  | default:    return NULL; | 
|  | } | 
|  | } | 
|  |  | 
|  | static bool is_possible_normal_placeholder_end(const char16_t c) { | 
|  | switch (c) { | 
|  | case 's': return true; | 
|  | case 'S': return true; | 
|  | case 'c': return true; | 
|  | case 'C': return true; | 
|  | case 'd': return true; | 
|  | case 'o': return true; | 
|  | case 'x': return true; | 
|  | case 'X': return true; | 
|  | case 'f': return true; | 
|  | case 'e': return true; | 
|  | case 'E': return true; | 
|  | case 'g': return true; | 
|  | case 'G': return true; | 
|  | case 'a': return true; | 
|  | case 'A': return true; | 
|  | case 'b': return true; | 
|  | case 'B': return true; | 
|  | case 'h': return true; | 
|  | case 'H': return true; | 
|  | case '%': return true; | 
|  | case 'n': return true; | 
|  | default:  return false; | 
|  | } | 
|  | } | 
|  |  | 
|  | static String16 pseudo_generate_expansion(const unsigned int length) { | 
|  | String16 result = k_expansion_string; | 
|  | const char16_t* s = result.string(); | 
|  | if (result.size() < length) { | 
|  | result += String16(" "); | 
|  | result += pseudo_generate_expansion(length - result.size()); | 
|  | } else { | 
|  | int ext = 0; | 
|  | // Should contain only whole words, so looking for a space | 
|  | for (unsigned int i = length + 1; i < result.size(); ++i) { | 
|  | ++ext; | 
|  | if (s[i] == ' ') { | 
|  | break; | 
|  | } | 
|  | } | 
|  | // Just keep the first length + ext characters | 
|  | result = String16(result, length + ext); | 
|  | } | 
|  | return result; | 
|  | } | 
|  |  | 
|  | static bool is_space(const char16_t c) { | 
|  | return (c == ' ' || c == '\t' || c == '\n'); | 
|  | } | 
|  |  | 
|  | String16 PseudoMethodAccent::start() { | 
|  | String16 result; | 
|  | if (mDepth == 0) { | 
|  | result = String16(String8("[")); | 
|  | } | 
|  | mWordCount = mLength = 0; | 
|  | mDepth++; | 
|  | return result; | 
|  | } | 
|  |  | 
|  | String16 PseudoMethodAccent::end() { | 
|  | String16 result; | 
|  | if (mLength) { | 
|  | result.append(String16(String8(" "))); | 
|  | result.append(pseudo_generate_expansion( | 
|  | mWordCount > 3 ? mLength : mLength / 2)); | 
|  | } | 
|  | mWordCount = mLength = 0; | 
|  | mDepth--; | 
|  | if (mDepth == 0) { | 
|  | result.append(String16(String8("]"))); | 
|  | } | 
|  | return result; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Converts characters so they look like they've been localized. | 
|  | * | 
|  | * Note: This leaves escape sequences untouched so they can later be | 
|  | * processed by ResTable::collectString in the normal way. | 
|  | */ | 
|  | String16 PseudoMethodAccent::text(const String16& source) | 
|  | { | 
|  | const char16_t* s = source.string(); | 
|  | String16 result; | 
|  | const size_t I = source.size(); | 
|  | bool lastspace = true; | 
|  | for (size_t i=0; i<I; i++) { | 
|  | char16_t c = s[i]; | 
|  | if (c == '\\') { | 
|  | // Escape syntax, no need to pseudolocalize | 
|  | if (i<I-1) { | 
|  | result += String16("\\"); | 
|  | i++; | 
|  | c = s[i]; | 
|  | switch (c) { | 
|  | case 'u': | 
|  | // this one takes up 5 chars | 
|  | result += String16(s+i, 5); | 
|  | i += 4; | 
|  | break; | 
|  | case 't': | 
|  | case 'n': | 
|  | case '#': | 
|  | case '@': | 
|  | case '?': | 
|  | case '"': | 
|  | case '\'': | 
|  | case '\\': | 
|  | default: | 
|  | result.append(&c, 1); | 
|  | break; | 
|  | } | 
|  | } else { | 
|  | result.append(&c, 1); | 
|  | } | 
|  | } else if (c == '%') { | 
|  | // Placeholder syntax, no need to pseudolocalize | 
|  | String16 chunk; | 
|  | bool end = false; | 
|  | chunk.append(&c, 1); | 
|  | while (!end && i < I) { | 
|  | ++i; | 
|  | c = s[i]; | 
|  | chunk.append(&c, 1); | 
|  | if (is_possible_normal_placeholder_end(c)) { | 
|  | end = true; | 
|  | } else if (c == 't') { | 
|  | ++i; | 
|  | c = s[i]; | 
|  | chunk.append(&c, 1); | 
|  | end = true; | 
|  | } | 
|  | } | 
|  | // Treat chunk as a placeholder unless it ends with %. | 
|  | result += ((c == '%') ? chunk : placeholder(chunk)); | 
|  | } else if (c == '<' || c == '&') { | 
|  | // html syntax, no need to pseudolocalize | 
|  | bool tag_closed = false; | 
|  | while (!tag_closed && i < I) { | 
|  | if (c == '&') { | 
|  | String16 escape_text; | 
|  | escape_text.append(&c, 1); | 
|  | bool end = false; | 
|  | size_t htmlCodePos = i; | 
|  | while (!end && htmlCodePos < I) { | 
|  | ++htmlCodePos; | 
|  | c = s[htmlCodePos]; | 
|  | escape_text.append(&c, 1); | 
|  | // Valid html code | 
|  | if (c == ';') { | 
|  | end = true; | 
|  | i = htmlCodePos; | 
|  | } | 
|  | // Wrong html code | 
|  | else if (!((c == '#' || | 
|  | (c >= 'a' && c <= 'z') || | 
|  | (c >= 'A' && c <= 'Z') || | 
|  | (c >= '0' && c <= '9')))) { | 
|  | end = true; | 
|  | } | 
|  | } | 
|  | result += escape_text; | 
|  | if (escape_text != String16("<")) { | 
|  | tag_closed = true; | 
|  | } | 
|  | continue; | 
|  | } | 
|  | if (c == '>') { | 
|  | tag_closed = true; | 
|  | result.append(&c, 1); | 
|  | continue; | 
|  | } | 
|  | result.append(&c, 1); | 
|  | i++; | 
|  | c = s[i]; | 
|  | } | 
|  | } else { | 
|  | // This is a pure text that should be pseudolocalized | 
|  | const char* p = pseudolocalize_char(c); | 
|  | if (p != NULL) { | 
|  | result += String16(p); | 
|  | } else { | 
|  | bool space = is_space(c); | 
|  | if (lastspace && !space) { | 
|  | mWordCount++; | 
|  | } | 
|  | lastspace = space; | 
|  | result.append(&c, 1); | 
|  | } | 
|  | // Count only pseudolocalizable chars and delimiters | 
|  | mLength++; | 
|  | } | 
|  | } | 
|  | return result; | 
|  | } | 
|  | String16 PseudoMethodAccent::placeholder(const String16& source) { | 
|  | // Surround a placeholder with brackets | 
|  | return k_placeholder_open + source + k_placeholder_close; | 
|  | } | 
|  |  | 
|  | String16 PseudoMethodBidi::text(const String16& source) | 
|  | { | 
|  | const char16_t* s = source.string(); | 
|  | String16 result; | 
|  | bool lastspace = true; | 
|  | bool space = true; | 
|  | bool escape = false; | 
|  | const char16_t ESCAPE_CHAR = '\\'; | 
|  | for (size_t i=0; i<source.size(); i++) { | 
|  | char16_t c = s[i]; | 
|  | if (!escape && c == ESCAPE_CHAR) { | 
|  | escape = true; | 
|  | continue; | 
|  | } | 
|  | space = (!escape && is_space(c)) || (escape && (c == 'n' || c == 't')); | 
|  | if (lastspace && !space) { | 
|  | // Word start | 
|  | result += k_rlm + k_rlo; | 
|  | } else if (!lastspace && space) { | 
|  | // Word end | 
|  | result += k_pdf + k_rlm; | 
|  | } | 
|  | lastspace = space; | 
|  | if (escape) { | 
|  | result.append(&ESCAPE_CHAR, 1); | 
|  | escape=false; | 
|  | } | 
|  | result.append(&c, 1); | 
|  | } | 
|  | if (!lastspace) { | 
|  | // End of last word | 
|  | result += k_pdf + k_rlm; | 
|  | } | 
|  | return result; | 
|  | } | 
|  |  | 
|  | String16 PseudoMethodBidi::placeholder(const String16& source) { | 
|  | // Surround a placeholder with directionality change sequence | 
|  | return k_rlm + k_rlo + source + k_pdf + k_rlm; | 
|  | } | 
|  |  |