Improve efficiency of PreloadSupplier and RegionDataBuilder.

before: RegionDataBuilderTest.BuildCnRegionTree (8485 ms)
after: RegionDataBuilderTest.BuildCnRegionTree (5765 ms)
 => 32% improvement.

(Built using 'g++ -g -pg -O0')

R=roubert@google.com

Review URL: https://codereview.appspot.com/111390043

git-svn-id: http://libaddressinput.googlecode.com/svn/trunk@322 38ededc0-08b8-5190-f2ac-b31f878777ad
diff --git a/cpp/include/libaddressinput/preload_supplier.h b/cpp/include/libaddressinput/preload_supplier.h
index 740b4d0..a4025cb 100644
--- a/cpp/include/libaddressinput/preload_supplier.h
+++ b/cpp/include/libaddressinput/preload_supplier.h
@@ -20,6 +20,7 @@
 #include <libaddressinput/util/basictypes.h>
 #include <libaddressinput/util/scoped_ptr.h>
 
+#include <map>
 #include <set>
 #include <string>
 #include <vector>
@@ -80,6 +81,11 @@
   // Calls |loaded| when the loading has finished.
   void LoadRules(const std::string& region_code, const Callback& loaded);
 
+  // Returns a mapping of lookup keys to rules. Should be called only when
+  // IsLoaded() returns true for the |region_code|.
+  const std::map<std::string, const Rule*>& GetRulesForRegion(
+      const std::string& region_code) const;
+
   bool IsLoaded(const std::string& region_code) const;
   bool IsPending(const std::string& region_code) const;
 
@@ -93,6 +99,7 @@
   std::set<std::string> pending_;
   const scoped_ptr<IndexMap> rule_index_;
   std::vector<const Rule*> rule_storage_;
+  std::map<std::string, std::map<std::string, const Rule*> > region_rules_;
 
   DISALLOW_COPY_AND_ASSIGN(PreloadSupplier);
 };
diff --git a/cpp/src/preload_supplier.cc b/cpp/src/preload_supplier.cc
index 26f7e2b..2557e33 100644
--- a/cpp/src/preload_supplier.cc
+++ b/cpp/src/preload_supplier.cc
@@ -72,16 +72,19 @@
          const Retriever& retriever,
          std::set<std::string>* pending,
          IndexMap* rule_index,
-         std::vector<const Rule*>* rule_storage)
+         std::vector<const Rule*>* rule_storage,
+         std::map<std::string, const Rule*>* region_rules)
       : region_code_(region_code),
         loaded_(loaded),
         pending_(pending),
         rule_index_(rule_index),
         rule_storage_(rule_storage),
+        region_rules_(region_rules),
         retrieved_(BuildCallback(this, &Helper::OnRetrieved)) {
     assert(pending_ != NULL);
     assert(rule_index_ != NULL);
     assert(rule_storage_ != NULL);
+    assert(region_rules_ != NULL);
     assert(retrieved_ != NULL);
     pending_->insert(key);
     retriever.Retrieve(key, *retrieved_);
@@ -103,6 +106,14 @@
     std::string id;
     std::vector<const Rule*> sub_rules;
 
+    IndexMap::iterator last_index_it = rule_index_->end();
+    IndexMap::iterator last_latin_it = rule_index_->end();
+    std::map<std::string, const Rule*>::iterator last_region_it =
+        region_rules_->end();
+
+    IndexMap::const_iterator hints[arraysize(LookupKey::kHierarchy) - 1];
+    std::fill(hints, hints + arraysize(hints), rule_index_->end());
+
     if (!success) {
       goto callback;
     }
@@ -112,19 +123,17 @@
       goto callback;
     }
 
-    for (std::vector<std::string>::const_iterator
-         it = json.GetKeys().begin(); it != json.GetKeys().end(); ++it) {
-      const Json* value;
-      if (!json.GetDictionaryValueForKey(*it, &value)) {
-        success = false;
-        goto callback;
-      }
-
+    for (std::vector<const Json*>::const_iterator
+         it = json.GetSubDictionaries().begin();
+         it != json.GetSubDictionaries().end();
+         ++it) {
+      const Json* value = *it;
+      assert(value != NULL);
       if (!value->GetStringValueForKey("id", &id)) {
         success = false;
         goto callback;
       }
-      assert(*it == id);  // Sanity check.
+      assert(!id.empty());
 
       size_t depth = std::count(id.begin(), id.end(), '/') - 1;
       assert(depth < arraysize(LookupKey::kHierarchy));
@@ -143,11 +152,15 @@
         sub_rules.push_back(rule);
       }
 
-      // Add the ID of this Rule object to the rule index.
-      std::pair<IndexMap::iterator, bool> result =
-          rule_index_->insert(std::make_pair(id, rule));
-      assert(result.second);
-      (void)result;  // Prevent unused variable if assert() is optimized away.
+      // Add the ID of this Rule object to the rule index with natural string
+      // comparison for keys.
+      last_index_it =
+          rule_index_->insert(last_index_it, std::make_pair(id, rule));
+
+      // Add the ID of this Rule object to the region-specific rule index with
+      // exact string comparison for keys.
+      last_region_it =
+          region_rules_->insert(last_region_it, std::make_pair(id, rule));
 
       ++rule_count;
     }
@@ -179,9 +192,12 @@
         }
         parent_id.resize(pos);
 
-        IndexMap::const_iterator jt = rule_index_->find(parent_id);
-        assert(jt != rule_index_->end());
-        hierarchy.push(jt->second);
+        IndexMap::const_iterator* const hint = &hints[hierarchy.size() - 1];
+        if (*hint == rule_index_->end() || (*hint)->first != parent_id) {
+          *hint = rule_index_->find(parent_id);
+        }
+        assert(*hint != rule_index_->end());
+        hierarchy.push((*hint)->second);
       }
 
       std::string human_id((*it)->GetId().substr(0, sizeof "data/ZZ" - 1));
@@ -217,13 +233,15 @@
         }
       }
 
-      rule_index_->insert(std::make_pair(human_id, *it));
+      last_index_it =
+          rule_index_->insert(last_index_it, std::make_pair(human_id, *it));
 
       // Add the Latin script ID, if a Latin script name could be found for
       // every part of the ID.
       if (std::count(human_id.begin(), human_id.end(), '/') ==
           std::count(latin_id.begin(), latin_id.end(), '/')) {
-        rule_index_->insert(std::make_pair(latin_id, *it));
+        last_latin_it =
+            rule_index_->insert(last_latin_it, std::make_pair(latin_id, *it));
       }
     }
 
@@ -237,6 +255,7 @@
   std::set<std::string>* const pending_;
   IndexMap* const rule_index_;
   std::vector<const Rule*>* const rule_storage_;
+  std::map<std::string, const Rule*>* const region_rules_;
   const scoped_ptr<const Retriever::Callback> retrieved_;
 
   DISALLOW_COPY_AND_ASSIGN(Helper);
@@ -258,7 +277,8 @@
     : retriever_(new Retriever(validation_data_url, downloader, storage)),
       pending_(),
       rule_index_(new IndexMap),
-      rule_storage_() {}
+      rule_storage_(),
+      region_rules_() {}
 
 PreloadSupplier::~PreloadSupplier() {
   for (std::vector<const Rule*>::const_iterator
@@ -303,7 +323,14 @@
       *retriever_,
       &pending_,
       rule_index_.get(),
-      &rule_storage_);
+      &rule_storage_,
+      &region_rules_[region_code]);
+}
+
+const std::map<std::string, const Rule*>& PreloadSupplier::GetRulesForRegion(
+    const std::string& region_code) const {
+  assert(IsLoaded(region_code));
+  return region_rules_.find(region_code)->second;
 }
 
 bool PreloadSupplier::IsLoaded(const std::string& region_code) const {
diff --git a/cpp/src/region_data_builder.cc b/cpp/src/region_data_builder.cc
index cf49c71..b600d09 100644
--- a/cpp/src/region_data_builder.cc
+++ b/cpp/src/region_data_builder.cc
@@ -17,6 +17,7 @@
 #include <libaddressinput/address_data.h>
 #include <libaddressinput/preload_supplier.h>
 #include <libaddressinput/region_data.h>
+#include <libaddressinput/util/basictypes.h>
 
 #include <cassert>
 #include <cstddef>
@@ -34,59 +35,91 @@
 
 namespace {
 
-// Does not take ownership of |supplier| or |parent_region|, neither of which is
-// allowed to be NULL.
-void BuildRegionTreeRecursively(PreloadSupplier* supplier,
-                                const LookupKey& parent_key,
-                                RegionData* parent_region,
-                                const std::vector<std::string>& keys,
-                                bool prefer_latin_name) {
-  assert(supplier != NULL);
+// The maximum depth of lookup keys.
+static const size_t kLookupKeysMaxDepth = arraysize(LookupKey::kHierarchy) - 1;
+
+// Does not take ownership of |parent_region|, which is not allowed to be NULL.
+void BuildRegionTreeRecursively(
+    const std::map<std::string, const Rule*>& rules,
+    std::map<std::string, const Rule*>::const_iterator hint,
+    const LookupKey& parent_key,
+    RegionData* parent_region,
+    const std::vector<std::string>& keys,
+    bool prefer_latin_name,
+    size_t region_max_depth) {
   assert(parent_region != NULL);
 
   LookupKey lookup_key;
   for (std::vector<std::string>::const_iterator key_it = keys.begin();
        key_it != keys.end(); ++key_it) {
     lookup_key.FromLookupKey(parent_key, *key_it);
-    const Rule* rule = supplier->GetRule(lookup_key);
-    if (rule == NULL) {
-      return;
+    const std::string& lookup_key_string =
+        lookup_key.ToKeyString(kLookupKeysMaxDepth);
+
+    ++hint;
+    if (hint == rules.end() || hint->first != lookup_key_string) {
+      hint = rules.find(lookup_key_string);
+      if (hint == rules.end()) {
+        return;
+      }
     }
+
+    const Rule* rule = hint->second;
+    assert(rule != NULL);
+
     const std::string& local_name = rule->GetName().empty()
         ? *key_it : rule->GetName();
     const std::string& name =
         prefer_latin_name && !rule->GetLatinName().empty()
             ? rule->GetLatinName() : local_name;
     RegionData* region = parent_region->AddSubRegion(*key_it, name);
-    if (!rule->GetSubKeys().empty()) {
-      BuildRegionTreeRecursively(
-          supplier, lookup_key, region, rule->GetSubKeys(), prefer_latin_name);
+
+    if (!rule->GetSubKeys().empty() &&
+        region_max_depth > parent_key.GetDepth()) {
+      BuildRegionTreeRecursively(rules,
+                                 hint,
+                                 lookup_key,
+                                 region,
+                                 rule->GetSubKeys(),
+                                 prefer_latin_name,
+                                 region_max_depth);
     }
   }
 }
 
-// Does not take ownership of |supplier|, which cannot be NULL. The caller owns
-// the result.
-RegionData* BuildRegion(PreloadSupplier* supplier,
+// The caller owns the result.
+RegionData* BuildRegion(const std::map<std::string, const Rule*>& rules,
                         const std::string& region_code,
                         const Language& language) {
-  assert(supplier != NULL);
-
   AddressData address;
   address.region_code = region_code;
 
   LookupKey lookup_key;
   lookup_key.FromAddress(address);
 
-  const Rule* const rule = supplier->GetRule(lookup_key);
+  std::map<std::string, const Rule*>::const_iterator hint =
+      rules.find(lookup_key.ToKeyString(kLookupKeysMaxDepth));
+  assert(hint != rules.end());
+
+  const Rule* rule = hint->second;
   assert(rule != NULL);
 
   RegionData* region = new RegionData(region_code);
-  BuildRegionTreeRecursively(supplier,
-                             lookup_key,
-                             region,
-                             rule->GetSubKeys(),
-                             language.has_latin_script);
+
+  // If there're sub-keys for field X, but field X is not used in this region
+  // code, then these sub-keys are skipped over. For example, CH has sub-keys
+  // for field ADMIN_AREA, but CH does not use ADMIN_AREA field.
+  size_t region_max_depth =
+      RegionDataConstants::GetMaxLookupKeyDepth(region_code);
+  if (region_max_depth > 0) {
+    BuildRegionTreeRecursively(rules,
+                               hint,
+                               lookup_key,
+                               region,
+                               rule->GetSubKeys(),
+                               language.has_latin_script,
+                               region_max_depth);
+  }
 
   return region;
 }
@@ -139,9 +172,11 @@
   LanguageRegionMap::const_iterator language_it =
       region_it->second->find(best_language.tag);
   if (language_it == region_it->second->end()) {
+    const std::map<std::string, const Rule*>& rules =
+        supplier_->GetRulesForRegion(region_code);
     language_it =
         region_it->second->insert(std::make_pair(best_language.tag,
-                                                 BuildRegion(supplier_,
+                                                 BuildRegion(rules,
                                                              region_code,
                                                              best_language)))
             .first;
diff --git a/cpp/src/util/json.cc b/cpp/src/util/json.cc
index ef3056b..730479c 100644
--- a/cpp/src/util/json.cc
+++ b/cpp/src/util/json.cc
@@ -19,9 +19,7 @@
 
 #include <cassert>
 #include <cstddef>
-#include <map>
 #include <string>
-#include <utility>
 #include <vector>
 
 #include <rapidjson/document.h>
@@ -40,27 +38,31 @@
       : document_(new Document),
         value_(document_.get()),
         dictionaries_(),
-        keys_(),
         valid_(false) {
     document_->Parse<kParseValidateEncodingFlag>(json.c_str());
     valid_ = !document_->HasParseError() && document_->IsObject();
-    if (valid_) {
-      BuildKeyList();
-    }
   }
 
   ~JsonImpl() {
-    for (std::map<std::string, const Json*>::const_iterator
-         it = dictionaries_.begin();
-         it != dictionaries_.end();
-         ++it) {
-      delete it->second;
+    for (std::vector<const Json*>::const_iterator it = dictionaries_.begin();
+         it != dictionaries_.end(); ++it) {
+      delete *it;
     }
   }
 
   bool valid() const { return valid_; }
 
-  const std::vector<std::string>& GetKeys() const { return keys_; }
+  const std::vector<const Json*>& GetSubDictionaries() {
+    if (dictionaries_.empty()) {
+      for (Value::ConstMemberIterator member = value_->MemberBegin();
+           member != value_->MemberEnd(); ++member) {
+        if (member->value.IsObject()) {
+          dictionaries_.push_back(new Json(new JsonImpl(&member->value)));
+        }
+      }
+    }
+    return dictionaries_;
+  }
 
   bool GetStringValueForKey(const std::string& key, std::string* value) const {
     assert(value != NULL);
@@ -75,48 +77,15 @@
     return true;
   }
 
-  bool GetDictionaryValueForKey(const std::string& key, const Json** value) {
-    assert(value != NULL);
-
-    std::map<std::string, const Json*>::const_iterator dict_it =
-        dictionaries_.find(key);
-    if (dict_it != dictionaries_.end()) {
-      *value = dict_it->second;
-      return true;
-    }
-
-    Value::ConstMemberIterator member = value_->FindMember(key.c_str());
-    if (member == NULL || !member->value.IsObject()) {
-      return false;
-    }
-
-    std::pair<std::map<std::string, const Json*>::iterator, bool> result =
-        dictionaries_.insert(
-            std::make_pair(key, new Json(new JsonImpl(&member->value))));
-    assert(result.second);
-    *value = result.first->second;
-    return true;
-  }
-
  private:
   // Does not take ownership of |value|.
   explicit JsonImpl(const Value* value)
       : document_(),
         value_(value),
         dictionaries_(),
-        keys_(),
         valid_(true) {
     assert(value_ != NULL);
     assert(value_->IsObject());
-    BuildKeyList();
-  }
-
-  void BuildKeyList() {
-    assert(keys_.empty());
-    for (Value::ConstMemberIterator member = value_->MemberBegin();
-         member != value_->MemberEnd(); ++member) {
-      keys_.push_back(member->name.GetString());
-    }
   }
 
   // An owned JSON document. Can be NULL if the JSON document is not owned.
@@ -125,11 +94,8 @@
   // A JSON document that is not owned. Cannot be NULL. Can point to document_.
   const Value* const value_;
 
-  // Owned JSON objects.
-  std::map<std::string, const Json*> dictionaries_;
-
-  // The list of keys with values in the JSON object.
-  std::vector<std::string> keys_;
+  // Owned JSON objects of sub-dictionaries.
+  std::vector<const Json*> dictionaries_;
 
   // True if the JSON object was parsed successfully.
   bool valid_;
@@ -150,9 +116,9 @@
   return impl_ != NULL;
 }
 
-const std::vector<std::string>& Json::GetKeys() const {
+const std::vector<const Json*>& Json::GetSubDictionaries() const {
   assert(impl_ != NULL);
-  return impl_->GetKeys();
+  return impl_->GetSubDictionaries();
 }
 
 bool Json::GetStringValueForKey(const std::string& key,
@@ -161,12 +127,6 @@
   return impl_->GetStringValueForKey(key, value);
 }
 
-bool Json::GetDictionaryValueForKey(const std::string& key,
-                                    const Json** value) const {
-  assert(impl_ != NULL);
-  return impl_->GetDictionaryValueForKey(key, value);
-}
-
 Json::Json(JsonImpl* impl) : impl_(impl) {}
 
 }  // namespace addressinput
diff --git a/cpp/src/util/json.h b/cpp/src/util/json.h
index 42cd331..1aac803 100644
--- a/cpp/src/util/json.h
+++ b/cpp/src/util/json.h
@@ -39,9 +39,10 @@
   // object.
   bool ParseObject(const std::string& json);
 
-  // Returns the list of keys in the parsed JSON. The JSON object must be parsed
-  // successfully in ParseObject() before invoking this method.
-  const std::vector<std::string>& GetKeys() const;
+  // Returns the list of sub dictionaries. The JSON object must be parsed
+  // successfully in ParseObject() before invoking this method. The caller does
+  // not own the result.
+  const std::vector<const Json*>& GetSubDictionaries() const;
 
   // Returns true if the parsed JSON contains a string value for |key|. Sets
   // |value| to the string value of the |key|. The JSON object must be parsed
@@ -49,13 +50,6 @@
   // parameter should not be NULL.
   bool GetStringValueForKey(const std::string& key, std::string* value) const;
 
-  // Returns true if the parsed JSON contains a dictionary value for |key|.
-  // Points |value| to the dictionary value of the |key|. The JSON object must
-  // be parsed successfully in ParseObject() before invoking this method. The
-  // caller does not own |value|.
-  bool GetDictionaryValueForKey(const std::string& key,
-                                const Json** value) const;
-
  private:
   class JsonImpl;
   friend class JsonImpl;
diff --git a/cpp/test/preload_supplier_test.cc b/cpp/test/preload_supplier_test.cc
index f8e9e23..7181ef2 100644
--- a/cpp/test/preload_supplier_test.cc
+++ b/cpp/test/preload_supplier_test.cc
@@ -121,4 +121,12 @@
   EXPECT_TRUE(rule == NULL);
 }
 
+TEST_F(PreloadSupplierTest, GetRulesForRegion) {
+  supplier_.LoadRules("CN", *loaded_callback_);
+  const std::map<std::string, const Rule*>& rules =
+      supplier_.GetRulesForRegion("CN");
+  EXPECT_TRUE(rules.find("data/CN") != rules.end());
+  EXPECT_LT(1, rules.size());
+}
+
 }  // namespace
diff --git a/cpp/test/util/json_test.cc b/cpp/test/util/json_test.cc
index 2306ac3..594c341 100644
--- a/cpp/test/util/json_test.cc
+++ b/cpp/test/util/json_test.cc
@@ -120,32 +120,35 @@
 TEST(JsonTest, NoDictionaryFound) {
   Json json;
   ASSERT_TRUE(json.ParseObject("{\"key\":\"value\"}"));
-  const Json* sub_json;
-  EXPECT_FALSE(json.GetDictionaryValueForKey("key", &sub_json));
+  EXPECT_TRUE(json.GetSubDictionaries().empty());
 }
 
 TEST(JsonTest, DictionaryFound) {
   Json json;
   ASSERT_TRUE(json.ParseObject("{\"key\":{\"inner_key\":\"value\"}}"));
-  const Json* sub_json = NULL;
-  ASSERT_TRUE(json.GetDictionaryValueForKey("key", &sub_json));
-  ASSERT_TRUE(sub_json != NULL);
+  const std::vector<const Json*>& sub_dicts = json.GetSubDictionaries();
+  ASSERT_EQ(1U, sub_dicts.size());
+
   std::string value;
-  EXPECT_TRUE(sub_json->GetStringValueForKey("inner_key", &value));
+  EXPECT_TRUE(sub_dicts.front()->GetStringValueForKey("inner_key", &value));
   EXPECT_EQ("value", value);
 }
 
-TEST(JsonTest, DictionariesHaveKeys) {
+TEST(JsonTest, DictionariesHaveSubDictionaries) {
   Json json;
-  ASSERT_TRUE(json.ParseObject("{\"key\":{\"inner_key\":\"value\"}}"));
-  std::vector<std::string> expected_keys(1, "key");
-  EXPECT_EQ(expected_keys, json.GetKeys());
+  ASSERT_TRUE(json.ParseObject(
+      "{\"key\":{\"inner_key\":{\"inner_inner_key\":\"value\"}}}"));
+  const std::vector<const Json*>& sub_dicts = json.GetSubDictionaries();
+  ASSERT_EQ(1U, sub_dicts.size());
 
-  const Json* sub_json = NULL;
-  ASSERT_TRUE(json.GetDictionaryValueForKey("key", &sub_json));
-  ASSERT_TRUE(sub_json != NULL);
-  std::vector<std::string> expected_sub_keys(1, "inner_key");
-  EXPECT_EQ(expected_sub_keys, sub_json->GetKeys());
+  const std::vector<const Json*>& sub_sub_dicts =
+      sub_dicts.front()->GetSubDictionaries();
+  ASSERT_EQ(1U, sub_sub_dicts.size());
+
+  std::string value;
+  EXPECT_TRUE(
+      sub_sub_dicts.front()->GetStringValueForKey("inner_inner_key", &value));
+  EXPECT_EQ("value", value);
 }
 
 }  // namespace