Merge from Chromium at DEPS revision 278856 This commit was generated by merge_to_master.py. Change-Id: I283eef90c15d40ea8cd6290f12094fc152a2c45e

commit: 6ce3a9ad00160cd58574b6bca6d2220c4dbfc83e [log] [tgz]
author: Torne (Richard Coles) <torne@google.com> Wed Jun 25 10:31:36 2014 +0100
committer: Torne (Richard Coles) <torne@google.com> Wed Jun 25 10:31:36 2014 +0100
tree: bd4c283a39b6659d25c49870a045ff73b049eebc
parent: b8347ad8ead685b8afe0ff329ae047f17c7b817c [diff]
parent: f7ddeee545f03c948074c921c4648807d90227ae [diff]
diff --git a/cpp/include/libaddressinput/address_ui.h b/cpp/include/libaddressinput/address_ui.h
index 8939501..cc39f6a 100644
--- a/cpp/include/libaddressinput/address_ui.h
+++ b/cpp/include/libaddressinput/address_ui.h

@@ -29,7 +29,7 @@
 
 // Returns the UI components for the CLDR |region_code|. Uses the strings from
 // |localization|. The components can be in default or Latin order, depending on
-// the language of |localization|.
+// the BCP 47 |ui_language_tag|.
 //
 // Sets the |best_address_language_tag| to the BCP 47 language tag that should
 // be saved with this address. This language will be used to get drop-downs to
@@ -40,6 +40,7 @@
 std::vector<AddressUiComponent> BuildComponents(
     const std::string& region_code,
     const Localization& localization,
+    const std::string& ui_language_tag,
     std::string* best_address_language_tag);
 
 }  // namespace addressinput

diff --git a/cpp/include/libaddressinput/localization.h b/cpp/include/libaddressinput/localization.h
index 08b5964..acfdf7e 100644
--- a/cpp/include/libaddressinput/localization.h
+++ b/cpp/include/libaddressinput/localization.h

@@ -26,17 +26,17 @@
 
 struct AddressData;
 
-// The object to retrieve localized strings based on message IDs. Sample usage:
+// The object to retrieve localized strings based on message IDs. It returns
+// English by default. Sample usage:
 //    Localization localization;
-//    localization.SetLanguage("en");
 //    std::string best_language_tag;
-//    Process(BuildComponents("CA", localization, &best_language_tag));
+//    Process(BuildComponents("CA", localization, "en-US", &best_language_tag));
 //
 // Alternative usage:
 //    Localization localization;
-//    localization.SetGetter(&MyStringGetter, "fr");
+//    localization.SetGetter(&MyStringGetter);
 //    std::string best_language_tag;
-//    Process(BuildComponents("CA", localization, &best_language_tag));
+//    Process(BuildComponents("CA", localization, "fr-CA", &best_language_tag));
 class Localization {
  public:
   // Initializes with English messages by default.
@@ -63,17 +63,11 @@
                               bool enable_examples,
                               bool enable_links) const;
 
-  // Sets the language for the strings. The only supported language is "en"
-  // until we have translations.
-  void SetLanguage(const std::string& language_tag);
-
   // Sets the string getter that takes a message identifier and returns the
-  // corresponding localized string. The |language_tag| parameter is used only
-  // for information purposes here.
-  void SetGetter(std::string (*getter)(int), const std::string& language_tag);
-
-  // Returns the current language tag.
-  const std::string& GetLanguage() const { return language_tag_; }
+  // corresponding localized string. For example, in Chromium there is
+  // l10n_util::GetStringUTF8 which always returns strings in the current
+  // application locale.
+  void SetGetter(std::string (*getter)(int));
 
  private:
   // Returns the error message where the address field is a postal code. Helper
@@ -90,9 +84,6 @@
 
   // The string getter.
   std::string (*get_string_)(int);
-
-  // The current language tag.
-  std::string language_tag_;
 };
 
 }  // namespace addressinput

diff --git a/cpp/include/libaddressinput/preload_supplier.h b/cpp/include/libaddressinput/preload_supplier.h
index f7654ab..740b4d0 100644
--- a/cpp/include/libaddressinput/preload_supplier.h
+++ b/cpp/include/libaddressinput/preload_supplier.h

@@ -20,14 +20,15 @@
 #include <libaddressinput/util/basictypes.h>
 #include <libaddressinput/util/scoped_ptr.h>
 
-#include <map>
 #include <set>
 #include <string>
+#include <vector>
 
 namespace i18n {
 namespace addressinput {
 
 class Downloader;
+class IndexMap;
 class LookupKey;
 class Retriever;
 class Rule;
@@ -90,7 +91,8 @@
 
   const scoped_ptr<const Retriever> retriever_;
   std::set<std::string> pending_;
-  std::map<std::string, const Rule*> rule_cache_;
+  const scoped_ptr<IndexMap> rule_index_;
+  std::vector<const Rule*> rule_storage_;
 
   DISALLOW_COPY_AND_ASSIGN(PreloadSupplier);
 };

diff --git a/cpp/src/address_ui.cc b/cpp/src/address_ui.cc
index d515765..a099335 100644
--- a/cpp/src/address_ui.cc
+++ b/cpp/src/address_ui.cc

@@ -82,6 +82,7 @@
 std::vector<AddressUiComponent> BuildComponents(
     const std::string& region_code,
     const Localization& localization,
+    const std::string& ui_language_tag,
     std::string* best_address_language_tag) {
   assert(best_address_language_tag != NULL);
   std::vector<AddressUiComponent> result;
@@ -93,8 +94,8 @@
     return result;
   }
 
-  const Language& best_address_language = ChooseBestAddressLanguage(
-      rule, Language(localization.GetLanguage()));
+  const Language& best_address_language =
+      ChooseBestAddressLanguage(rule, Language(ui_language_tag));
   *best_address_language_tag = best_address_language.tag;
 
   const std::vector<FormatElement>& format =

diff --git a/cpp/src/localization.cc b/cpp/src/localization.cc
index 951f1a7..9bb32a5 100644
--- a/cpp/src/localization.cc
+++ b/cpp/src/localization.cc

@@ -25,6 +25,7 @@
 #include "grit.h"
 #include "region_data_constants.h"
 #include "rule.h"
+#include "util/string_split.h"
 #include "util/string_util.h"
 
 namespace {
@@ -42,28 +43,16 @@
 
 namespace {
 
-static const char kDefaultLanguage[] = "en";
-
-// For each language XX with translations:
-//    (1) Add a namespace XX here with an include of "XX_messages.cc".
-//    (2) Add a wrapper that converts the char pointer to std::string. (GRIT
-//        generated functions return char pointers.)
-//    (2) Use the XX::GetStdString in the SetLanguage() method below.
-namespace en {
-
 #include "en_messages.cc"
 
-std::string GetStdString(int message_id) {
+std::string GetEnglishString(int message_id) {
   const char* str = GetString(message_id);
   return str != NULL ? std::string(str) : std::string();
 }
 
-}  // namespace en
-
 }  // namespace
 
-Localization::Localization() : get_string_(&en::GetStdString),
-                               language_tag_(kDefaultLanguage) {}
+Localization::Localization() : get_string_(&GetEnglishString) {}
 
 Localization::~Localization() {}
 
@@ -83,7 +72,11 @@
     if (rule.ParseSerializedRule(
             RegionDataConstants::GetRegionData(address.region_code))) {
       if (enable_examples) {
-        postal_code_example = rule.GetPostalCodeExample();
+        std::vector<std::string> examples_list;
+        SplitString(rule.GetPostalCodeExample(), ',', &examples_list);
+        if (!examples_list.empty()) {
+          postal_code_example = examples_list.front();
+        }
       }
       if (enable_links) {
         post_service_url = rule.GetPostServiceUrl();
@@ -124,20 +117,9 @@
   }
 }
 
-void Localization::SetLanguage(const std::string& language_tag) {
-  if (language_tag == kDefaultLanguage) {
-    get_string_ = &en::GetStdString;
-  } else {
-    assert(false);
-  }
-  language_tag_ = language_tag;
-}
-
-void Localization::SetGetter(std::string (*getter)(int),
-                             const std::string& language_tag) {
+void Localization::SetGetter(std::string (*getter)(int)) {
   assert(getter != NULL);
   get_string_ = getter;
-  language_tag_ = language_tag;
 }
 
 std::string Localization::GetErrorMessageForPostalCode(

diff --git a/cpp/src/preload_supplier.cc b/cpp/src/preload_supplier.cc
index 5219e9a..31325bd 100644
--- a/cpp/src/preload_supplier.cc
+++ b/cpp/src/preload_supplier.cc

@@ -21,10 +21,13 @@
 #include <libaddressinput/util/basictypes.h>
 #include <libaddressinput/util/scoped_ptr.h>
 
+#include <algorithm>
 #include <cassert>
 #include <cstddef>
+#include <functional>
 #include <map>
 #include <set>
+#include <stack>
 #include <string>
 #include <utility>
 #include <vector>
@@ -35,12 +38,36 @@
 #include "retriever.h"
 #include "rule.h"
 #include "util/json.h"
+#include "util/string_compare.h"
 
 namespace i18n {
 namespace addressinput {
 
 namespace {
 
+// STL predicate less<> that uses StringCompare to match strings that a human
+// reader would consider to be "the same". The default implementation just does
+// case insensitive string comparison, but StringCompare can be overriden with
+// more sophisticated implementations.
+class IndexLess : public std::binary_function<std::string, std::string, bool> {
+ public:
+  result_type operator()(const first_argument_type& a,
+                         const second_argument_type& b) const {
+    return kStringCompare.NaturalLess(a, b);
+  }
+
+ private:
+  static const StringCompare kStringCompare;
+};
+
+const StringCompare IndexLess::kStringCompare;
+
+}  // namespace
+
+class IndexMap : public std::map<std::string, const Rule*, IndexLess> {};
+
+namespace {
+
 class Helper {
  public:
   // Does not take ownership of its parameters.
@@ -49,14 +76,17 @@
          const PreloadSupplier::Callback& loaded,
          const Retriever& retriever,
          std::set<std::string>* pending,
-         std::map<std::string, const Rule*>* rule_cache)
+         IndexMap* rule_index,
+         std::vector<const Rule*>* rule_storage)
       : region_code_(region_code),
         loaded_(loaded),
         pending_(pending),
-        rule_cache_(rule_cache),
+        rule_index_(rule_index),
+        rule_storage_(rule_storage),
         retrieved_(BuildCallback(this, &Helper::OnRetrieved)) {
     assert(pending_ != NULL);
-    assert(rule_cache_ != NULL);
+    assert(rule_index_ != NULL);
+    assert(rule_storage_ != NULL);
     assert(retrieved_ != NULL);
     pending_->insert(key);
     retriever.Retrieve(key, *retrieved_);
@@ -75,6 +105,7 @@
     (void)status;  // Prevent unused variable if assert() is optimized away.
 
     Json json;
+    std::vector<const Rule*> sub_rules;
 
     if (!success) {
       goto callback;
@@ -112,13 +143,95 @@
       rule->ParseJsonRule(value);
       assert(id == rule->GetId());  // Sanity check.
 
-      std::pair<std::map<std::string, const Rule*>::iterator, bool> result =
-          rule_cache_->insert(std::make_pair(rule->GetId(), rule));
+      rule_storage_->push_back(rule);
+      if (depth > 0) {
+        sub_rules.push_back(rule);
+      }
+
+      // Add the ID of this Rule object to the rule index.
+      std::pair<IndexMap::iterator, bool> result =
+          rule_index_->insert(std::make_pair(id, rule));
       assert(result.second);
       (void)result;  // Prevent unused variable if assert() is optimized away.
+
       ++rule_count;
     }
 
+    /*
+     * Normally the address metadata server takes care of mapping from natural
+     * language names to metadata IDs (eg. "São Paulo" -> "SP") and from Latin
+     * script names to local script names (eg. "Tokushima" -> "徳島県").
+     *
+     * As the PreloadSupplier doesn't contact the metadata server upon each
+     * Supply() request, it instead has an internal lookup table (rule_index_)
+     * that contains such mappings.
+     *
+     * This lookup table is populated by iterating over all sub rules and for
+     * each of them construct ID strings using human readable names (eg. "São
+     * Paulo") and using Latin script names (eg. "Tokushima").
+     */
+    for (std::vector<const Rule*>::const_iterator
+         it = sub_rules.begin(); it != sub_rules.end(); ++it) {
+      std::stack<const Rule*> hierarchy;
+      hierarchy.push(*it);
+
+      // Push pointers to all parent Rule objects onto the hierarchy stack.
+      for (std::string parent_id((*it)->GetId());;) {
+        // Strip the last part of parent_id. Break if COUNTRY level is reached.
+        std::string::size_type pos = parent_id.rfind('/');
+        if (pos == sizeof "data/ZZ" - 1) {
+          break;
+        }
+        parent_id.resize(pos);
+
+        IndexMap::const_iterator jt = rule_index_->find(parent_id);
+        assert(jt != rule_index_->end());
+        hierarchy.push(jt->second);
+      }
+
+      std::string human_id((*it)->GetId().substr(0, sizeof "data/ZZ" - 1));
+      std::string latin_id(human_id);
+
+      // Append the names from all Rule objects on the hierarchy stack.
+      for (; !hierarchy.empty(); hierarchy.pop()) {
+        const Rule* rule = hierarchy.top();
+
+        human_id.push_back('/');
+        if (!rule->GetName().empty()) {
+          human_id.append(rule->GetName());
+        } else {
+          // If the "name" field is empty, the name is the last part of the ID.
+          const std::string& id = rule->GetId();
+          std::string::size_type pos = id.rfind('/');
+          assert(pos != std::string::npos);
+          human_id.append(id.substr(pos + 1));
+        }
+
+        if (!rule->GetLatinName().empty()) {
+          latin_id.push_back('/');
+          latin_id.append(rule->GetLatinName());
+        }
+      }
+
+      // If the ID has a language tag, copy it.
+      {
+        const std::string& id = (*it)->GetId();
+        std::string::size_type pos = id.rfind("--");
+        if (pos != std::string::npos) {
+          human_id.append(id, pos, id.size() - pos);
+        }
+      }
+
+      rule_index_->insert(std::make_pair(human_id, *it));
+
+      // Add the Latin script ID, if a Latin script name could be found for
+      // every part of the ID.
+      if (std::count(human_id.begin(), human_id.end(), '/') ==
+          std::count(latin_id.begin(), latin_id.end(), '/')) {
+        rule_index_->insert(std::make_pair(latin_id, *it));
+      }
+    }
+
   callback:
     loaded_(success, region_code_, rule_count);
     delete this;
@@ -127,7 +240,8 @@
   const std::string region_code_;
   const PreloadSupplier::Callback& loaded_;
   std::set<std::string>* const pending_;
-  std::map<std::string, const Rule*>* const rule_cache_;
+  IndexMap* const rule_index_;
+  std::vector<const Rule*>* const rule_storage_;
   const scoped_ptr<const Retriever::Callback> retrieved_;
 
   DISALLOW_COPY_AND_ASSIGN(Helper);
@@ -148,12 +262,13 @@
                                  Storage* storage)
     : retriever_(new Retriever(validation_data_url, downloader, storage)),
       pending_(),
-      rule_cache_() {}
+      rule_index_(new IndexMap),
+      rule_storage_() {}
 
 PreloadSupplier::~PreloadSupplier() {
-  for (std::map<std::string, const Rule*>::const_iterator
-       it = rule_cache_.begin(); it != rule_cache_.end(); ++it) {
-    delete it->second;
+  for (std::vector<const Rule*>::const_iterator
+       it = rule_storage_.begin(); it != rule_storage_.end(); ++it) {
+    delete *it;
   }
 }
 
@@ -192,7 +307,8 @@
       loaded,
       *retriever_,
       &pending_,
-      &rule_cache_);
+      rule_index_.get(),
+      &rule_storage_);
 }
 
 bool PreloadSupplier::IsLoaded(const std::string& region_code) const {
@@ -214,9 +330,8 @@
 
     for (size_t depth = 0; depth <= max_depth; ++depth) {
       const std::string& key = lookup_key.ToKeyString(depth);
-      std::map<std::string, const Rule*>::const_iterator it =
-          rule_cache_.find(key);
-      if (it == rule_cache_.end()) {
+      IndexMap::const_iterator it = rule_index_->find(key);
+      if (it == rule_index_->end()) {
         return depth > 0;  // No data on COUNTRY level is failure.
       }
       hierarchy->rule[depth] = it->second;
@@ -227,7 +342,7 @@
 }
 
 bool PreloadSupplier::IsLoadedKey(const std::string& key) const {
-  return rule_cache_.find(key) != rule_cache_.end();
+  return rule_index_->find(key) != rule_index_->end();
 }
 
 bool PreloadSupplier::IsPendingKey(const std::string& key) const {

diff --git a/cpp/src/region_data_constants.cc b/cpp/src/region_data_constants.cc
index f3d9c3c..cabee28 100644
--- a/cpp/src/region_data_constants.cc
+++ b/cpp/src/region_data_constants.cc

@@ -12,10 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
-// The data in this file will be automatically generated. For now, the data
-// comes from:
-//
-// https://code.google.com/p/libaddressinput/source/browse/trunk/java/src/com/android/i18n/addressinput/RegionDataConstants.java?r=137
+// The data in this file is automatically generated.
 
 #include "region_data_constants.h"
 
@@ -1263,7 +1260,7 @@
       "\"languages\":\"en\""
       "}"));
   region_data.insert(std::make_pair("US", "{"
-      "\"fmt\":\"%N%n%O%n%A%n%C %S %Z\","
+      "\"fmt\":\"%N%n%O%n%A%n%C, %S %Z\","
       "\"require\":\"ACSZ\","
       "\"zip_name_type\":\"zip\","
       "\"state_name_type\":\"state\","

diff --git a/cpp/src/util/lru_cache_using_std.h b/cpp/src/util/lru_cache_using_std.h
new file mode 100644
index 0000000..25aced7
--- /dev/null
+++ b/cpp/src/util/lru_cache_using_std.h

@@ -0,0 +1,168 @@
+/******************************************************************************/
+/*  Copyright (c) 2010-2011, Tim Day <timday@timday.com>                      */
+/*                                                                            */
+/*  Permission to use, copy, modify, and/or distribute this software for any  */
+/*  purpose with or without fee is hereby granted, provided that the above    */
+/*  copyright notice and this permission notice appear in all copies.         */
+/*                                                                            */
+/*  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES  */
+/*  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF          */
+/*  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR   */
+/*  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
+/*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN     */
+/*  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF   */
+/*  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.            */
+/******************************************************************************/
+
+// The original source code is from:
+// https://bitbucket.org/timday/lru_cache/src/497822a492a8/include/lru_cache_using_std.h
+
+#ifndef I18N_ADDRESSINPUT_UTIL_LRU_CACHE_USING_STD_H_
+#define I18N_ADDRESSINPUT_UTIL_LRU_CACHE_USING_STD_H_
+
+#include <cassert>
+#include <list>
+#include <map>
+
+// Class providing fixed-size (by number of records)
+// LRU-replacement cache of a function with signature
+// V f(K).
+// The default comparator/hash/allocator will be used.
+template <
+  typename K,
+  typename V
+  > class lru_cache_using_std
+{
+public:
+
+  typedef K key_type;
+  typedef V value_type;
+
+  // Key access history, most recent at back
+  typedef std::list<key_type> key_tracker_type;
+
+  // Key to value and key history iterator
+  typedef std::map<
+    key_type,
+    std::pair<
+      value_type,
+      typename key_tracker_type::iterator
+      >
+  > key_to_value_type;
+
+  // Constuctor specifies the cached function and
+  // the maximum number of records to be stored
+  lru_cache_using_std(
+    value_type (*f)(const key_type&),
+    size_t c
+  )
+    :_fn(f)
+    ,_capacity(c)
+  {
+    assert(_capacity!=0);
+  }
+
+  // Obtain value of the cached function for k
+  value_type operator()(const key_type& k) {
+
+    // Attempt to find existing record
+    const typename key_to_value_type::iterator it
+      =_key_to_value.find(k);
+
+    if (it==_key_to_value.end()) {
+
+      // We don't have it:
+
+      // Evaluate function and create new record
+      const value_type v=_fn(k);
+      insert(k,v);
+
+      // Return the freshly computed value
+      return v;
+
+    } else {
+
+      // We do have it:
+
+      // Update access record by moving
+      // accessed key to back of list
+      _key_tracker.splice(
+	_key_tracker.end(),
+	_key_tracker,
+	(*it).second.second
+      );
+
+      // Return the retrieved value
+      return (*it).second.first;
+    }
+  }
+
+  // Obtain the cached keys, most recently used element
+  // at head, least recently used at tail.
+  // This method is provided purely to support testing.
+  template <typename IT> void get_keys(IT dst) const {
+    typename key_tracker_type::const_reverse_iterator src
+	=_key_tracker.rbegin();
+    while (src!=_key_tracker.rend()) {
+      *dst++ = *src++;
+    }
+  }
+
+private:
+
+  // Record a fresh key-value pair in the cache
+  void insert(const key_type& k,const value_type& v) {
+
+    // Method is only called on cache misses
+    assert(_key_to_value.find(k)==_key_to_value.end());
+
+    // Make space if necessary
+    if (_key_to_value.size()==_capacity)
+      evict();
+
+    // Record k as most-recently-used key
+    typename key_tracker_type::iterator it
+      =_key_tracker.insert(_key_tracker.end(),k);
+
+    // Create the key-value entry,
+    // linked to the usage record.
+    _key_to_value.insert(
+      std::make_pair(
+	k,
+	std::make_pair(v,it)
+      )
+    );
+    // No need to check return,
+    // given previous assert.
+  }
+
+  // Purge the least-recently-used element in the cache
+  void evict() {
+
+    // Assert method is never called when cache is empty
+    assert(!_key_tracker.empty());
+
+    // Identify least recently used key
+    const typename key_to_value_type::iterator it
+      =_key_to_value.find(_key_tracker.front());
+    assert(it!=_key_to_value.end());
+
+    // Erase both elements to completely purge record
+    _key_to_value.erase(it);
+    _key_tracker.pop_front();
+  }
+
+  // The function to be cached
+  value_type (*_fn)(const key_type&);
+
+  // Maximum number of key-value pairs to be retained
+  const size_t _capacity;
+
+  // Key access history
+  key_tracker_type _key_tracker;
+
+  // Key-to-value lookup
+  key_to_value_type _key_to_value;
+};
+
+#endif  // I18N_ADDRESSINPUT_UTIL_LRU_CACHE_USING_STD_H_

diff --git a/cpp/src/util/string_compare.cc b/cpp/src/util/string_compare.cc
index c63b138..31a7534 100644
--- a/cpp/src/util/string_compare.cc
+++ b/cpp/src/util/string_compare.cc

@@ -12,20 +12,54 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "re2ptr.h"  // Must be the first #include statement!
-
 #include "string_compare.h"
 
 #include <libaddressinput/util/basictypes.h>
 
+#include <cassert>
 #include <string>
 
+#include <re2/re2.h>
+
+#include "lru_cache_using_std.h"
+
+// RE2 uses type string, which is not necessarily the same as type std::string.
+// In order to create objects of the correct type, to be able to pass pointers
+// to these objects to RE2, the function that does that is defined inside an
+// unnamed namespace inside the re2 namespace. Oh, my ...
+namespace re2 {
+namespace {
+
+// In order to (mis-)use RE2 to implement UTF-8 capable less<>, this function
+// calls RE2::PossibleMatchRange() to calculate the "lessest" string that would
+// be a case-insensitive match to the string. This is far too expensive to do
+// repeatedly, so the function is only ever called through an LRU cache.
+std::string ComputeMinPossibleMatch(const std::string& str) {
+  string min, max;  // N.B.: RE2 type string!
+
+  RE2::Options options;
+  options.set_literal(true);
+  options.set_case_sensitive(false);
+  RE2 matcher(str, options);
+
+  bool success = matcher.PossibleMatchRange(&min, &max, str.size());
+  assert(success);
+  (void)success;  // Prevent unused variable if assert() is optimized away.
+
+  return min;
+}
+
+}  // namespace
+}  // namespace re2
+
 namespace i18n {
 namespace addressinput {
 
 class StringCompare::Impl {
+  enum { MAX_CACHE_SIZE = 1 << 15 };
+
  public:
-  Impl() {
+  Impl() : min_possible_match_(&re2::ComputeMinPossibleMatch, MAX_CACHE_SIZE) {
     options_.set_literal(true);
     options_.set_case_sensitive(false);
   }
@@ -37,8 +71,15 @@
     return RE2::FullMatch(a, matcher);
   }
 
+  bool NaturalLess(const std::string& a, const std::string& b) const {
+    const std::string& min_a(min_possible_match_(a));
+    const std::string& min_b(min_possible_match_(b));
+    return min_a < min_b;
+  }
+
  private:
   RE2::Options options_;
+  mutable lru_cache_using_std<std::string, std::string> min_possible_match_;
 
   DISALLOW_COPY_AND_ASSIGN(Impl);
 };
@@ -52,5 +93,10 @@
   return impl_->NaturalEquals(a, b);
 }
 
+bool StringCompare::NaturalLess(const std::string& a,
+                                const std::string& b) const {
+  return impl_->NaturalLess(a, b);
+}
+
 }  // namespace addressinput
 }  // namespace i18n

diff --git a/cpp/src/util/string_compare.h b/cpp/src/util/string_compare.h
index 9d530fa..ae680dd 100644
--- a/cpp/src/util/string_compare.h
+++ b/cpp/src/util/string_compare.h

@@ -33,6 +33,12 @@
   // default implementation just does case insensitive string matching.
   bool NaturalEquals(const std::string& a, const std::string& b) const;
 
+  // Comparison function for use with the STL analogous to NaturalEquals().
+  // Libaddressinput itself isn't really concerned about how this is done, as
+  // long as it conforms to the STL requirements on less<> predicates. This
+  // default implementation is VERY SLOW! Must be replaced if you need speed.
+  bool NaturalLess(const std::string& a, const std::string& b) const;
+
  private:
   class Impl;
   scoped_ptr<Impl> impl_;

diff --git a/cpp/test/address_ui_test.cc b/cpp/test/address_ui_test.cc
index b36ea8c..c23d205 100644
--- a/cpp/test/address_ui_test.cc
+++ b/cpp/test/address_ui_test.cc

@@ -37,6 +37,8 @@
 using i18n::addressinput::RECIPIENT;
 using i18n::addressinput::STREET_ADDRESS;
 
+static const char kUiLanguageTag[] = "en";
+
 // Returns testing::AssertionSuccess if the |components| are valid. Uses
 // |region_code| in test failure messages.
 testing::AssertionResult ComponentsAreValid(
@@ -81,14 +83,15 @@
 // code.
 TEST_P(AddressUiTest, ComponentsAreValid) {
   EXPECT_TRUE(ComponentsAreValid(BuildComponents(
-      GetParam(), localization_, &best_address_language_tag_)));
+      GetParam(), localization_, kUiLanguageTag, &best_address_language_tag_)));
 }
 
 // Verifies that BuildComponents() returns at most one input field of each type.
 TEST_P(AddressUiTest, UniqueFieldTypes) {
   std::set<AddressField> fields;
   const std::vector<AddressUiComponent>& components =
-      BuildComponents(GetParam(), localization_, &best_address_language_tag_);
+      BuildComponents(GetParam(), localization_, kUiLanguageTag,
+                      &best_address_language_tag_);
   for (std::vector<AddressUiComponent>::const_iterator it = components.begin();
        it != components.end(); ++it) {
     EXPECT_TRUE(fields.insert(it->field).second);
@@ -103,8 +106,9 @@
 // Verifies that BuildComponents() returns an empty vector for an invalid region
 // code.
 TEST_F(AddressUiTest, InvalidRegionCodeReturnsEmptyVector) {
-  EXPECT_TRUE(BuildComponents("INVALID-REGION-CODE", localization_,
-                              &best_address_language_tag_).empty());
+  EXPECT_TRUE(BuildComponents(
+      "INVALID-REGION-CODE", localization_, kUiLanguageTag,
+      &best_address_language_tag_).empty());
 }
 
 // Test data for determining the best language tag and whether the right format
@@ -145,9 +149,10 @@
 std::string GetterStub(int) { return std::string(); }
 
 TEST_P(BestAddressLanguageTagTest, CorrectBestAddressLanguageTag) {
-  localization_.SetGetter(&GetterStub, GetParam().ui_language_tag);
+  localization_.SetGetter(&GetterStub);
   const std::vector<AddressUiComponent>& components = BuildComponents(
-      GetParam().region_code, localization_, &best_address_language_tag_);
+      GetParam().region_code, localization_, GetParam().ui_language_tag,
+      &best_address_language_tag_);
   EXPECT_EQ(GetParam().expected_best_address_language_tag,
             best_address_language_tag_);
   ASSERT_FALSE(components.empty());

diff --git a/cpp/test/address_validator_test.cc b/cpp/test/address_validator_test.cc
index 5250502..79195e3 100644
--- a/cpp/test/address_validator_test.cc
+++ b/cpp/test/address_validator_test.cc

@@ -332,4 +332,87 @@
   EXPECT_EQ(expected_, problems_);
 }
 
+TEST_P(AddressValidatorTest, ValidKanjiAddressJP) {
+  address_.region_code = "JP";
+  address_.administrative_area =
+      "\xE5\xBE\xB3\xE5\xB3\xB6\xE7\x9C\x8C"; /* 徳島県 */
+  address_.locality =
+      "\xE5\xBE\xB3\xE5\xB3\xB6\xE5\xB8\x82";  /* 徳島市 */
+  address_.postal_code = "770-0847";
+  address_.address_line.push_back("...");
+  address_.language_code = "ja";
+
+  ASSERT_NO_FATAL_FAILURE(Validate());
+  ASSERT_TRUE(called_);
+  EXPECT_EQ(expected_, problems_);
+}
+
+TEST_P(AddressValidatorTest, ValidLatinAddressJP) {
+  // Skip this test case when using the OndemandSupplier, which depends on the
+  // address metadata server to map Latin script names to local script names.
+  if (GetParam() == &OndemandValidatorWrapper::Build) return;
+
+  address_.region_code = "JP";
+  address_.administrative_area = "Tokushima";
+  address_.locality = "Tokushima";
+  address_.postal_code = "770-0847";
+  address_.address_line.push_back("...");
+  address_.language_code = "ja-Latn";
+
+  ASSERT_NO_FATAL_FAILURE(Validate());
+  ASSERT_TRUE(called_);
+  EXPECT_EQ(expected_, problems_);
+}
+
+TEST_P(AddressValidatorTest, ValidAddressBR) {
+  // Skip this test case when using the OndemandSupplier, which depends on the
+  // address metadata server to map natural language names to metadata IDs.
+  if (GetParam() == &OndemandValidatorWrapper::Build) return;
+
+  address_.region_code = "BR";
+  address_.administrative_area = "S\xC3\xA3o Paulo";  /* São Paulo */
+  address_.locality = "Presidente Prudente";
+  address_.postal_code = "19063-008";
+  address_.address_line.push_back("Rodovia Raposo Tavares, 6388-6682");
+  address_.language_code = "pt";
+
+  ASSERT_NO_FATAL_FAILURE(Validate());
+  ASSERT_TRUE(called_);
+  EXPECT_EQ(expected_, problems_);
+}
+
+TEST_P(AddressValidatorTest, ValidAddressCA_en) {
+  // Skip this test case when using the OndemandSupplier, which depends on the
+  // address metadata server to map natural language names to metadata IDs.
+  if (GetParam() == &OndemandValidatorWrapper::Build) return;
+
+  address_.region_code = "CA";
+  address_.administrative_area = "New Brunswick";
+  address_.locality = "Saint John County";
+  address_.postal_code = "E2L 4Z6";
+  address_.address_line.push_back("...");
+  address_.language_code = "en";
+
+  ASSERT_NO_FATAL_FAILURE(Validate());
+  ASSERT_TRUE(called_);
+  EXPECT_EQ(expected_, problems_);
+}
+
+TEST_P(AddressValidatorTest, ValidAddressCA_fr) {
+  // Skip this test case when using the OndemandSupplier, which depends on the
+  // address metadata server to map natural language names to metadata IDs.
+  if (GetParam() == &OndemandValidatorWrapper::Build) return;
+
+  address_.region_code = "CA";
+  address_.administrative_area = "Nouveau-Brunswick";
+  address_.locality = "Comt\xC3\xA9 de Saint-Jean";  /* Comté de Saint-Jean */
+  address_.postal_code = "E2L 4Z6";
+  address_.address_line.push_back("...");
+  address_.language_code = "fr";
+
+  ASSERT_NO_FATAL_FAILURE(Validate());
+  ASSERT_TRUE(called_);
+  EXPECT_EQ(expected_, problems_);
+}
+
 }  // namespace

diff --git a/cpp/test/localization_test.cc b/cpp/test/localization_test.cc
index d1c4a9d..2ed4b07 100644
--- a/cpp/test/localization_test.cc
+++ b/cpp/test/localization_test.cc

@@ -54,12 +54,10 @@
 
 // Verifies that a custom message getter can be used.
 static const char kValidMessage[] = "Data";
-static const char kValidLanguageTag[] = "tlh";
 std::string GetValidMessage(int message_id) { return kValidMessage; }
 TEST_P(LocalizationTest, ValidStringGetterCanBeUsed) {
-  localization_.SetGetter(&GetValidMessage, kValidLanguageTag);
+  localization_.SetGetter(&GetValidMessage);
   EXPECT_EQ(kValidMessage, localization_.GetString(GetParam()));
-  EXPECT_EQ(kValidLanguageTag, localization_.GetLanguage());
 }
 
 // Verifies that the default language for messages does not have empty strings.
@@ -78,13 +76,6 @@
             localization_.GetString(GetParam()).find(std::string(2U, ' ')));
 }
 
-// Verifies that the default string is English.
-TEST_P(LocalizationTest, DefaultStringIsEnglish) {
-  std::string default_string = localization_.GetString(GetParam());
-  localization_.SetLanguage("en");
-  EXPECT_EQ(default_string, localization_.GetString(GetParam()));
-}
-
 // Tests all message identifiers.
 INSTANTIATE_TEST_CASE_P(
     AllMessages, LocalizationTest,
@@ -130,24 +121,17 @@
   EXPECT_TRUE(localization_.GetString(INVALID_MESSAGE_ID).empty());
 }
 
-// Verifies that the default language is English.
-TEST_F(LocalizationTest, DefaultLanguageIsEnglish) {
-  EXPECT_EQ("en", localization_.GetLanguage());
-}
-
 TEST(LocalizationGetErrorMessageTest, MissingRequiredPostalCode) {
   Localization localization;
   AddressData address;
   address.region_code = "CH";
-  EXPECT_EQ(std::string("You must provide a postal code, for example") +
-            " 2544,1211,1556,3030." +
+  EXPECT_EQ(std::string("You must provide a postal code, for example 2544.") +
             " Don't know your postal code? Find it out" +
             " <a href=\"http://www.post.ch/db/owa/pv_plz_pack/pr_main\">" +
             "here</a>.",
             localization.GetErrorMessage(address, POSTAL_CODE,
                                          MISSING_REQUIRED_FIELD, true, true));
-  EXPECT_EQ(std::string("You must provide a postal code, for example") +
-            " 2544,1211,1556,3030.",
+  EXPECT_EQ("You must provide a postal code, for example 2544.",
             localization.GetErrorMessage(address, POSTAL_CODE,
                                          MISSING_REQUIRED_FIELD, true, false));
   EXPECT_EQ("You can't leave this empty.",
@@ -162,15 +146,13 @@
   Localization localization;
   AddressData address;
   address.region_code = "US";
-  EXPECT_EQ(std::string("You must provide a ZIP code, for example") +
-            " 95014,22162-1010." +
+  EXPECT_EQ(std::string("You must provide a ZIP code, for example 95014.") +
             " Don't know your ZIP code? Find it out" +
             " <a href=\"https://tools.usps.com/go/ZipLookupAction!" +
             "input.action\">here</a>.",
             localization.GetErrorMessage(address, POSTAL_CODE,
                                          MISSING_REQUIRED_FIELD, true, true));
-  EXPECT_EQ(std::string("You must provide a ZIP code, for example") +
-            " 95014,22162-1010.",
+  EXPECT_EQ("You must provide a ZIP code, for example 95014.",
             localization.GetErrorMessage(address, POSTAL_CODE,
                                          MISSING_REQUIRED_FIELD, true, false));
   EXPECT_EQ("You can't leave this empty.",
@@ -342,16 +324,14 @@
   AddressData address;
   address.region_code = "CH";
   EXPECT_EQ(std::string("This postal code format is not recognized. Example ") +
-            "of a valid postal code:" +
-            " 2544,1211,1556,3030." +
+            "of a valid postal code: 2544." +
             " Don't know your postal code? Find it out" +
             " <a href=\"http://www.post.ch/db/owa/pv_plz_pack/pr_main\">" +
             "here</a>.",
             localization.GetErrorMessage(address, POSTAL_CODE,
                                          INVALID_FORMAT, true, true));
   EXPECT_EQ(std::string("This postal code format is not recognized. Example ") +
-            "of a valid postal code:" +
-            " 2544,1211,1556,3030.",
+            "of a valid postal code: 2544.",
             localization.GetErrorMessage(address, POSTAL_CODE,
                                          INVALID_FORMAT, true, false));
   EXPECT_EQ("This postal code format is not recognized.",
@@ -367,16 +347,14 @@
   AddressData address;
   address.region_code = "US";
   EXPECT_EQ(std::string("This ZIP code format is not recognized. Example of ") +
-            "a valid ZIP code:" +
-            " 95014,22162-1010." +
+            "a valid ZIP code: 95014." +
             " Don't know your ZIP code? Find it out" +
             " <a href=\"https://tools.usps.com/go/ZipLookupAction!" +
             "input.action\">here</a>.",
             localization.GetErrorMessage(address, POSTAL_CODE,
                                          INVALID_FORMAT, true, true));
   EXPECT_EQ(std::string("This ZIP code format is not recognized. Example of ") +
-            "a valid ZIP code:" +
-            " 95014,22162-1010.",
+            "a valid ZIP code: 95014.",
             localization.GetErrorMessage(address, POSTAL_CODE,
                                          INVALID_FORMAT, true, false));
   EXPECT_EQ("This ZIP code format is not recognized.",

diff --git a/cpp/test/util/string_compare_test.cc b/cpp/test/util/string_compare_test.cc
index d5990d9..8f8d4d5 100644
--- a/cpp/test/util/string_compare_test.cc
+++ b/cpp/test/util/string_compare_test.cc

@@ -25,14 +25,19 @@
 struct TestCase {
   TestCase(const std::string& left,
            const std::string& right,
-           bool should_be_equal)
-      : left(left), right(right), should_be_equal(should_be_equal) {}
+           bool should_be_equal,
+           bool should_be_less)
+      : left(left),
+        right(right),
+        should_be_equal(should_be_equal),
+        should_be_less(should_be_less) {}
 
   ~TestCase() {}
 
   std::string left;
   std::string right;
   bool should_be_equal;
+  bool should_be_less;
 };
 
 class StringCompareTest : public testing::TestWithParam<TestCase> {
@@ -48,13 +53,25 @@
   }
 }
 
+TEST_P(StringCompareTest, CorrectLess) {
+  if (GetParam().should_be_less) {
+    EXPECT_TRUE(compare_.NaturalLess(GetParam().left, GetParam().right));
+  } else {
+    EXPECT_FALSE(compare_.NaturalLess(GetParam().left, GetParam().right));
+  }
+}
+
 INSTANTIATE_TEST_CASE_P(
     Comparisons, StringCompareTest,
-    testing::Values(TestCase("foo", "foo", true),
-                    TestCase("foo", "FOO", true),
-                    TestCase("bar", "foo", false),
-                    TestCase("강원도", "강원도", true),
-                    TestCase("강원도", "대구광역시", false),
-                    TestCase("ZÜRICH", "zürich", true)));
+    testing::Values(TestCase("foo", "foo", true, false),
+                    TestCase("foo", "FOO", true, false),
+                    TestCase("bar", "foo", false, true),
+                    TestCase("강원도", "강원도", true, false),
+                    TestCase("강원도", "대구광역시", false, true),
+                    TestCase("ZÜRICH", "zürich", true, false),
+                    TestCase("абв", "где", false, true),
+                    TestCase("абв", "ГДЕ", false, true),
+                    TestCase("где", "абв", false, false),
+                    TestCase("где", "АБВ", false, false)));
 
 }  // namespace

diff --git a/cpp/test/validation_task_test.cc b/cpp/test/validation_task_test.cc
index e8136f3..011a978 100644
--- a/cpp/test/validation_task_test.cc
+++ b/cpp/test/validation_task_test.cc

@@ -98,14 +98,14 @@
         &problems_,
         *validated_);
 
-    Supplier::RuleHierarchy* hierarchy = new Supplier::RuleHierarchy();
+    Supplier::RuleHierarchy hierarchy;
 
     for (size_t i = 0; i < arraysize(json_) && json_[i] != NULL; ++i) {
       ASSERT_TRUE(rule[i].ParseSerializedRule(json_[i]));
-      hierarchy->rule[i] = &rule[i];
+      hierarchy.rule[i] = &rule[i];
     }
 
-    (*task->supplied_)(success_, *task->lookup_key_, *hierarchy);
+    (*task->supplied_)(success_, *task->lookup_key_, hierarchy);
   }
 
   const char* json_[arraysize(LookupKey::kHierarchy)];
commit	6ce3a9ad00160cd58574b6bca6d2220c4dbfc83e	[log] [tgz]
author	Torne (Richard Coles) <torne@google.com>	Wed Jun 25 10:31:36 2014 +0100
committer	Torne (Richard Coles) <torne@google.com>	Wed Jun 25 10:31:36 2014 +0100
tree	bd4c283a39b6659d25c49870a045ff73b049eebc
parent	b8347ad8ead685b8afe0ff329ae047f17c7b817c [diff]
parent	f7ddeee545f03c948074c921c4648807d90227ae [diff]