tools/aapt/pseudolocalize.cpp - platform/frameworks/base - Git at Google

 #include "pseudolocalize.h"

 using namespace std;

 // String basis to generate expansion
 static const String16 k_expansion_string = String16("one two three "
     "four five six seven eight nine ten eleven twelve thirteen "
     "fourteen fiveteen sixteen seventeen nineteen twenty");

 // Special unicode characters to override directionality of the words
 static const String16 k_rlm = String16("\xe2\x80\x8f");
 static const String16 k_rlo = String16("\xE2\x80\xae");
 static const String16 k_pdf = String16("\xE2\x80\xac");

 // Placeholder marks
 static const String16 k_placeholder_open = String16("\xc2\xbb");
 static const String16 k_placeholder_close = String16("\xc2\xab");

 static const char16_t k_arg_start = '{';
 static const char16_t k_arg_end = '}';

 Pseudolocalizer::Pseudolocalizer(PseudolocalizationMethod m)
     : mImpl(nullptr), mLastDepth(0) {
   setMethod(m);
 }

 void Pseudolocalizer::setMethod(PseudolocalizationMethod m) {
   if (mImpl) {
     delete mImpl;
   }
   if (m == PSEUDO_ACCENTED) {
     mImpl = new PseudoMethodAccent();
   } else if (m == PSEUDO_BIDI) {
     mImpl = new PseudoMethodBidi();
   } else {
     mImpl = new PseudoMethodNone();
   }
 }

 String16 Pseudolocalizer::text(const String16& text) {
   String16 out;
   size_t depth = mLastDepth;
   size_t lastpos, pos;
   const size_t length= text.size();
   const char16_t* str = text.string();
   bool escaped = false;
   for (lastpos = pos = 0; pos < length; pos++) {
     char16_t c = str[pos];
     if (escaped) {
       escaped = false;
       continue;
     }
     if (c == '\'') {
       escaped = true;
       continue;
     }

     if (c == k_arg_start) {
       depth++;
     } else if (c == k_arg_end && depth) {
       depth--;
     }

     if (mLastDepth != depth || pos == length - 1) {
       bool pseudo = ((mLastDepth % 2) == 0);
       size_t nextpos = pos;
       if (!pseudo || depth == mLastDepth) {
         nextpos++;
       }
       size_t size = nextpos - lastpos;
       if (size) {
         String16 chunk = String16(text, size, lastpos);
         if (pseudo) {
           chunk = mImpl->text(chunk);
         } else if (str[lastpos] == k_arg_start &&
                    str[nextpos - 1] == k_arg_end) {
           chunk = mImpl->placeholder(chunk);
         }
         out.append(chunk);
       }
       if (pseudo && depth < mLastDepth) { // End of message
         out.append(mImpl->end());
       } else if (!pseudo && depth > mLastDepth) { // Start of message
         out.append(mImpl->start());
       }
       lastpos = nextpos;
       mLastDepth = depth;
     }
   }
   return out;
 }

 static const char*
 pseudolocalize_char(const char16_t c)
 {
     switch (c) {
         case 'a':   return "\xc3\xa5";
         case 'b':   return "\xc9\x93";
         case 'c':   return "\xc3\xa7";
         case 'd':   return "\xc3\xb0";
         case 'e':   return "\xc3\xa9";
         case 'f':   return "\xc6\x92";
         case 'g':   return "\xc4\x9d";
         case 'h':   return "\xc4\xa5";
         case 'i':   return "\xc3\xae";
         case 'j':   return "\xc4\xb5";
         case 'k':   return "\xc4\xb7";
         case 'l':   return "\xc4\xbc";
         case 'm':   return "\xe1\xb8\xbf";
         case 'n':   return "\xc3\xb1";
         case 'o':   return "\xc3\xb6";
         case 'p':   return "\xc3\xbe";
         case 'q':   return "\x51";
         case 'r':   return "\xc5\x95";
         case 's':   return "\xc5\xa1";
         case 't':   return "\xc5\xa3";
         case 'u':   return "\xc3\xbb";
         case 'v':   return "\x56";
         case 'w':   return "\xc5\xb5";
         case 'x':   return "\xd1\x85";
         case 'y':   return "\xc3\xbd";
         case 'z':   return "\xc5\xbe";
         case 'A':   return "\xc3\x85";
         case 'B':   return "\xce\xb2";
         case 'C':   return "\xc3\x87";
         case 'D':   return "\xc3\x90";
         case 'E':   return "\xc3\x89";
         case 'G':   return "\xc4\x9c";
         case 'H':   return "\xc4\xa4";
         case 'I':   return "\xc3\x8e";
         case 'J':   return "\xc4\xb4";
         case 'K':   return "\xc4\xb6";
         case 'L':   return "\xc4\xbb";
         case 'M':   return "\xe1\xb8\xbe";
         case 'N':   return "\xc3\x91";
         case 'O':   return "\xc3\x96";
         case 'P':   return "\xc3\x9e";
         case 'Q':   return "\x71";
         case 'R':   return "\xc5\x94";
         case 'S':   return "\xc5\xa0";
         case 'T':   return "\xc5\xa2";
         case 'U':   return "\xc3\x9b";
         case 'V':   return "\xce\xbd";
         case 'W':   return "\xc5\xb4";
         case 'X':   return "\xc3\x97";
         case 'Y':   return "\xc3\x9d";
         case 'Z':   return "\xc5\xbd";
         case '!':   return "\xc2\xa1";
         case '?':   return "\xc2\xbf";
         case '$':   return "\xe2\x82\xac";
         default:    return NULL;
     }
 }

 static bool is_possible_normal_placeholder_end(const char16_t c) {
     switch (c) {
         case 's': return true;
         case 'S': return true;
         case 'c': return true;
         case 'C': return true;
         case 'd': return true;
         case 'o': return true;
         case 'x': return true;
         case 'X': return true;
         case 'f': return true;
         case 'e': return true;
         case 'E': return true;
         case 'g': return true;
         case 'G': return true;
         case 'a': return true;
         case 'A': return true;
         case 'b': return true;
         case 'B': return true;
         case 'h': return true;
         case 'H': return true;
         case '%': return true;
         case 'n': return true;
         default:  return false;
     }
 }

 static String16 pseudo_generate_expansion(const unsigned int length) {
     String16 result = k_expansion_string;
     const char16_t* s = result.string();
     if (result.size() < length) {
         result += String16(" ");
         result += pseudo_generate_expansion(length - result.size());
     } else {
         int ext = 0;
         // Should contain only whole words, so looking for a space
         for (unsigned int i = length + 1; i < result.size(); ++i) {
           ++ext;
           if (s[i] == ' ') {
             break;
           }
         }
         // Just keep the first length + ext characters
         result = String16(result, length + ext);
     }
     return result;
 }

 static bool is_space(const char16_t c) {
   return (c == ' ' || c == '\t' || c == '\n');
 }

 String16 PseudoMethodAccent::start() {
   String16 result;
   if (mDepth == 0) {
     result = String16(String8("["));
   }
   mWordCount = mLength = 0;
   mDepth++;
   return result;
 }

 String16 PseudoMethodAccent::end() {
   String16 result;
   if (mLength) {
     result.append(String16(String8(" ")));
     result.append(pseudo_generate_expansion(
         mWordCount > 3 ? mLength : mLength / 2));
   }
   mWordCount = mLength = 0;
   mDepth--;
   if (mDepth == 0) {
     result.append(String16(String8("]")));
   }
   return result;
 }

 /**
  * Converts characters so they look like they've been localized.
  *
  * Note: This leaves escape sequences untouched so they can later be
  * processed by ResTable::collectString in the normal way.
  */
 String16 PseudoMethodAccent::text(const String16& source)
 {
     const char16_t* s = source.string();
     String16 result;
     const size_t I = source.size();
     bool lastspace = true;
     for (size_t i=0; i<I; i++) {
         char16_t c = s[i];
         if (c == '\\') {
             // Escape syntax, no need to pseudolocalize
             if (i<I-1) {
                 result += String16("\\");
                 i++;
                 c = s[i];
                 switch (c) {
                     case 'u':
                         // this one takes up 5 chars
                         result += String16(s+i, 5);
                         i += 4;
                         break;
                     case 't':
                     case 'n':
                     case '#':
                     case '@':
                     case '?':
                     case '"':
                     case '\'':
                     case '\\':
                     default:
                         result.append(&c, 1);
                         break;
                 }
             } else {
                 result.append(&c, 1);
             }
         } else if (c == '%') {
             // Placeholder syntax, no need to pseudolocalize
             String16 chunk;
             bool end = false;
             chunk.append(&c, 1);
             while (!end && i < I) {
                 ++i;
                 c = s[i];
                 chunk.append(&c, 1);
                 if (is_possible_normal_placeholder_end(c)) {
                     end = true;
                 } else if (c == 't') {
                     ++i;
                     c = s[i];
                     chunk.append(&c, 1);
                     end = true;
                 }
             }
             // Treat chunk as a placeholder unless it ends with %.
             result += ((c == '%') ? chunk : placeholder(chunk));
         } else if (c == '<' || c == '&') {
             // html syntax, no need to pseudolocalize
             bool tag_closed = false;
             while (!tag_closed && i < I) {
                 if (c == '&') {
                     String16 escape_text;
                     escape_text.append(&c, 1);
                     bool end = false;
                     size_t htmlCodePos = i;
                     while (!end && htmlCodePos < I) {
                         ++htmlCodePos;
                         c = s[htmlCodePos];
                         escape_text.append(&c, 1);
                         // Valid html code
                         if (c == ';') {
                             end = true;
                             i = htmlCodePos;
                         }
                         // Wrong html code
                         else if (!((c == '#' ||
                                  (c >= 'a' && c <= 'z') ||
                                  (c >= 'A' && c <= 'Z') ||
                                  (c >= '0' && c <= '9')))) {
                             end = true;
                         }
                     }
                     result += escape_text;
                     if (escape_text != String16("&lt;")) {
                         tag_closed = true;
                     }
                     continue;
                 }
                 if (c == '>') {
                     tag_closed = true;
                     result.append(&c, 1);
                     continue;
                 }
                 result.append(&c, 1);
                 i++;
                 c = s[i];
             }
         } else {
             // This is a pure text that should be pseudolocalized
             const char* p = pseudolocalize_char(c);
             if (p != NULL) {
                 result += String16(p);
             } else {
                 bool space = is_space(c);
                 if (lastspace && !space) {
                   mWordCount++;
                 }
                 lastspace = space;
                 result.append(&c, 1);
             }
             // Count only pseudolocalizable chars and delimiters
             mLength++;
         }
     }
     return result;
 }
 String16 PseudoMethodAccent::placeholder(const String16& source) {
   // Surround a placeholder with brackets
   return k_placeholder_open + source + k_placeholder_close;
 }

 String16 PseudoMethodBidi::text(const String16& source)
 {
     const char16_t* s = source.string();
     String16 result;
     bool lastspace = true;
     bool space = true;
     bool escape = false;
     const char16_t ESCAPE_CHAR = '\\';
     for (size_t i=0; i<source.size(); i++) {
         char16_t c = s[i];
         if (!escape && c == ESCAPE_CHAR) {
           escape = true;
           continue;
         }
         space = (!escape && is_space(c)) || (escape && (c == 'n' || c == 't'));
         if (lastspace && !space) {
           // Word start
           result += k_rlm + k_rlo;
         } else if (!lastspace && space) {
           // Word end
           result += k_pdf + k_rlm;
         }
         lastspace = space;
         if (escape) {
           result.append(&ESCAPE_CHAR, 1);
           escape=false;
         }
         result.append(&c, 1);
     }
     if (!lastspace) {
       // End of last word
       result += k_pdf + k_rlm;
     }
     return result;
 }

 String16 PseudoMethodBidi::placeholder(const String16& source) {
   // Surround a placeholder with directionality change sequence
   return k_rlm + k_rlo + source + k_pdf + k_rlm;
 }
	#include "pseudolocalize.h"

	using namespace std;

	// String basis to generate expansion
	static const String16 k_expansion_string = String16("one two three "
	"four five six seven eight nine ten eleven twelve thirteen "
	"fourteen fiveteen sixteen seventeen nineteen twenty");

	// Special unicode characters to override directionality of the words
	static const String16 k_rlm = String16("\xe2\x80\x8f");
	static const String16 k_rlo = String16("\xE2\x80\xae");
	static const String16 k_pdf = String16("\xE2\x80\xac");

	// Placeholder marks
	static const String16 k_placeholder_open = String16("\xc2\xbb");
	static const String16 k_placeholder_close = String16("\xc2\xab");

	static const char16_t k_arg_start = '{';
	static const char16_t k_arg_end = '}';

	Pseudolocalizer::Pseudolocalizer(PseudolocalizationMethod m)
	: mImpl(nullptr), mLastDepth(0) {
	setMethod(m);
	}

	void Pseudolocalizer::setMethod(PseudolocalizationMethod m) {
	if (mImpl) {
	delete mImpl;
	}
	if (m == PSEUDO_ACCENTED) {
	mImpl = new PseudoMethodAccent();
	} else if (m == PSEUDO_BIDI) {
	mImpl = new PseudoMethodBidi();
	} else {
	mImpl = new PseudoMethodNone();
	}
	}

	String16 Pseudolocalizer::text(const String16& text) {
	String16 out;
	size_t depth = mLastDepth;
	size_t lastpos, pos;
	const size_t length= text.size();
	const char16_t* str = text.string();
	bool escaped = false;
	for (lastpos = pos = 0; pos < length; pos++) {
	char16_t c = str[pos];
	if (escaped) {
	escaped = false;
	continue;
	}
	if (c == '\'') {
	escaped = true;
	continue;
	}

	if (c == k_arg_start) {
	depth++;
	} else if (c == k_arg_end && depth) {
	depth--;
	}

	if (mLastDepth != depth \|\| pos == length - 1) {
	bool pseudo = ((mLastDepth % 2) == 0);
	size_t nextpos = pos;
	if (!pseudo \|\| depth == mLastDepth) {
	nextpos++;
	}
	size_t size = nextpos - lastpos;
	if (size) {
	String16 chunk = String16(text, size, lastpos);
	if (pseudo) {
	chunk = mImpl->text(chunk);
	} else if (str[lastpos] == k_arg_start &&
	str[nextpos - 1] == k_arg_end) {
	chunk = mImpl->placeholder(chunk);
	}
	out.append(chunk);
	}
	if (pseudo && depth < mLastDepth) { // End of message
	out.append(mImpl->end());
	} else if (!pseudo && depth > mLastDepth) { // Start of message
	out.append(mImpl->start());
	}
	lastpos = nextpos;
	mLastDepth = depth;
	}
	}
	return out;
	}

	static const char*
	pseudolocalize_char(const char16_t c)
	{
	switch (c) {
	case 'a': return "\xc3\xa5";
	case 'b': return "\xc9\x93";
	case 'c': return "\xc3\xa7";
	case 'd': return "\xc3\xb0";
	case 'e': return "\xc3\xa9";
	case 'f': return "\xc6\x92";
	case 'g': return "\xc4\x9d";
	case 'h': return "\xc4\xa5";
	case 'i': return "\xc3\xae";
	case 'j': return "\xc4\xb5";
	case 'k': return "\xc4\xb7";
	case 'l': return "\xc4\xbc";
	case 'm': return "\xe1\xb8\xbf";
	case 'n': return "\xc3\xb1";
	case 'o': return "\xc3\xb6";
	case 'p': return "\xc3\xbe";
	case 'q': return "\x51";
	case 'r': return "\xc5\x95";
	case 's': return "\xc5\xa1";
	case 't': return "\xc5\xa3";
	case 'u': return "\xc3\xbb";
	case 'v': return "\x56";
	case 'w': return "\xc5\xb5";
	case 'x': return "\xd1\x85";
	case 'y': return "\xc3\xbd";
	case 'z': return "\xc5\xbe";
	case 'A': return "\xc3\x85";
	case 'B': return "\xce\xb2";
	case 'C': return "\xc3\x87";
	case 'D': return "\xc3\x90";
	case 'E': return "\xc3\x89";
	case 'G': return "\xc4\x9c";
	case 'H': return "\xc4\xa4";
	case 'I': return "\xc3\x8e";
	case 'J': return "\xc4\xb4";
	case 'K': return "\xc4\xb6";
	case 'L': return "\xc4\xbb";
	case 'M': return "\xe1\xb8\xbe";
	case 'N': return "\xc3\x91";
	case 'O': return "\xc3\x96";
	case 'P': return "\xc3\x9e";
	case 'Q': return "\x71";
	case 'R': return "\xc5\x94";
	case 'S': return "\xc5\xa0";
	case 'T': return "\xc5\xa2";
	case 'U': return "\xc3\x9b";
	case 'V': return "\xce\xbd";
	case 'W': return "\xc5\xb4";
	case 'X': return "\xc3\x97";
	case 'Y': return "\xc3\x9d";
	case 'Z': return "\xc5\xbd";
	case '!': return "\xc2\xa1";
	case '?': return "\xc2\xbf";
	case '$': return "\xe2\x82\xac";
	default: return NULL;
	}
	}

	static bool is_possible_normal_placeholder_end(const char16_t c) {
	switch (c) {
	case 's': return true;
	case 'S': return true;
	case 'c': return true;
	case 'C': return true;
	case 'd': return true;
	case 'o': return true;
	case 'x': return true;
	case 'X': return true;
	case 'f': return true;
	case 'e': return true;
	case 'E': return true;
	case 'g': return true;
	case 'G': return true;
	case 'a': return true;
	case 'A': return true;
	case 'b': return true;
	case 'B': return true;
	case 'h': return true;
	case 'H': return true;
	case '%': return true;
	case 'n': return true;
	default: return false;
	}
	}

	static String16 pseudo_generate_expansion(const unsigned int length) {
	String16 result = k_expansion_string;
	const char16_t* s = result.string();
	if (result.size() < length) {
	result += String16(" ");
	result += pseudo_generate_expansion(length - result.size());
	} else {
	int ext = 0;
	// Should contain only whole words, so looking for a space
	for (unsigned int i = length + 1; i < result.size(); ++i) {
	++ext;
	if (s[i] == ' ') {
	break;
	}
	}
	// Just keep the first length + ext characters
	result = String16(result, length + ext);
	}
	return result;
	}

	static bool is_space(const char16_t c) {
	return (c == ' ' \|\| c == '\t' \|\| c == '\n');
	}

	String16 PseudoMethodAccent::start() {
	String16 result;
	if (mDepth == 0) {
	result = String16(String8("["));
	}
	mWordCount = mLength = 0;
	mDepth++;
	return result;
	}

	String16 PseudoMethodAccent::end() {
	String16 result;
	if (mLength) {
	result.append(String16(String8(" ")));
	result.append(pseudo_generate_expansion(
	mWordCount > 3 ? mLength : mLength / 2));
	}
	mWordCount = mLength = 0;
	mDepth--;
	if (mDepth == 0) {
	result.append(String16(String8("]")));
	}
	return result;
	}

	/**
	* Converts characters so they look like they've been localized.
	*
	* Note: This leaves escape sequences untouched so they can later be
	* processed by ResTable::collectString in the normal way.
	*/
	String16 PseudoMethodAccent::text(const String16& source)
	{
	const char16_t* s = source.string();
	String16 result;
	const size_t I = source.size();
	bool lastspace = true;
	for (size_t i=0; i<I; i++) {
	char16_t c = s[i];
	if (c == '\\') {
	// Escape syntax, no need to pseudolocalize
	if (i<I-1) {
	result += String16("\\");
	i++;
	c = s[i];
	switch (c) {
	case 'u':
	// this one takes up 5 chars
	result += String16(s+i, 5);
	i += 4;
	break;
	case 't':
	case 'n':
	case '#':
	case '@':
	case '?':
	case '"':
	case '\'':
	case '\\':
	default:
	result.append(&c, 1);
	break;
	}
	} else {
	result.append(&c, 1);
	}
	} else if (c == '%') {
	// Placeholder syntax, no need to pseudolocalize
	String16 chunk;
	bool end = false;
	chunk.append(&c, 1);
	while (!end && i < I) {
	++i;
	c = s[i];
	chunk.append(&c, 1);
	if (is_possible_normal_placeholder_end(c)) {
	end = true;
	} else if (c == 't') {
	++i;
	c = s[i];
	chunk.append(&c, 1);
	end = true;
	}
	}
	// Treat chunk as a placeholder unless it ends with %.
	result += ((c == '%') ? chunk : placeholder(chunk));
	} else if (c == '<' \|\| c == '&') {
	// html syntax, no need to pseudolocalize
	bool tag_closed = false;
	while (!tag_closed && i < I) {
	if (c == '&') {
	String16 escape_text;
	escape_text.append(&c, 1);
	bool end = false;
	size_t htmlCodePos = i;
	while (!end && htmlCodePos < I) {
	++htmlCodePos;
	c = s[htmlCodePos];
	escape_text.append(&c, 1);
	// Valid html code
	if (c == ';') {
	end = true;
	i = htmlCodePos;
	}
	// Wrong html code
	else if (!((c == '#' \|\|
	(c >= 'a' && c <= 'z') \|\|
	(c >= 'A' && c <= 'Z') \|\|
	(c >= '0' && c <= '9')))) {
	end = true;
	}
	}
	result += escape_text;
	if (escape_text != String16("<")) {
	tag_closed = true;
	}
	continue;
	}
	if (c == '>') {
	tag_closed = true;
	result.append(&c, 1);
	continue;
	}
	result.append(&c, 1);
	i++;
	c = s[i];
	}
	} else {
	// This is a pure text that should be pseudolocalized
	const char* p = pseudolocalize_char(c);
	if (p != NULL) {
	result += String16(p);
	} else {
	bool space = is_space(c);
	if (lastspace && !space) {
	mWordCount++;
	}
	lastspace = space;
	result.append(&c, 1);
	}
	// Count only pseudolocalizable chars and delimiters
	mLength++;
	}
	}
	return result;
	}
	String16 PseudoMethodAccent::placeholder(const String16& source) {
	// Surround a placeholder with brackets
	return k_placeholder_open + source + k_placeholder_close;
	}

	String16 PseudoMethodBidi::text(const String16& source)
	{
	const char16_t* s = source.string();
	String16 result;
	bool lastspace = true;
	bool space = true;
	bool escape = false;
	const char16_t ESCAPE_CHAR = '\\';
	for (size_t i=0; i<source.size(); i++) {
	char16_t c = s[i];
	if (!escape && c == ESCAPE_CHAR) {
	escape = true;
	continue;
	}
	space = (!escape && is_space(c)) \|\| (escape && (c == 'n' \|\| c == 't'));
	if (lastspace && !space) {
	// Word start
	result += k_rlm + k_rlo;
	} else if (!lastspace && space) {
	// Word end
	result += k_pdf + k_rlm;
	}
	lastspace = space;
	if (escape) {
	result.append(&ESCAPE_CHAR, 1);
	escape=false;
	}
	result.append(&c, 1);
	}
	if (!lastspace) {
	// End of last word
	result += k_pdf + k_rlm;
	}
	return result;
	}

	String16 PseudoMethodBidi::placeholder(const String16& source) {
	// Surround a placeholder with directionality change sequence
	return k_rlm + k_rlo + source + k_pdf + k_rlm;
	}