String intern table and support for unordered_map

Change-Id: I22d86d060780552675c5d7f14a98ffde480eac82
diff --git a/build/Android.common.mk b/build/Android.common.mk
index 64f0de4..3932b74 100644
--- a/build/Android.common.mk
+++ b/build/Android.common.mk
@@ -41,6 +41,7 @@
 	src/dex_instruction.cc \
 	src/dex_verifier.cc \
 	src/heap.cc \
+	src/intern_table.cc \
 	src/jni_compiler.cc \
 	src/jni_internal.cc \
 	src/mark_stack.cc \
@@ -83,6 +84,7 @@
 	src/dex_cache_test.cc \
 	src/dex_file_test.cc \
 	src/dex_instruction_visitor_test.cc \
+	src/intern_table_test.cc \
 	src/jni_compiler_test.cc.arm \
 	src/object_test.cc \
 	src/runtime_test.cc \
diff --git a/src/class_linker.cc b/src/class_linker.cc
index 1bfa009..e1dcbf8 100644
--- a/src/class_linker.cc
+++ b/src/class_linker.cc
@@ -186,19 +186,22 @@
   init_done_ = true;
 }
 
-void ClassLinker::VisitRoots(RootVistor* root_visitor, void* arg) {
+void ClassLinker::VisitRoots(Heap::RootVistor* root_visitor, void* arg) {
   root_visitor(class_roots_, arg);
 
   for (size_t i = 0; i < dex_caches_.size(); i++) {
       root_visitor(dex_caches_[i], arg);
   }
 
-  // TODO: acquire classes_lock_
-  typedef Table::const_iterator It; // TODO: C++0x auto
-  for (It it = classes_.begin(), end = classes_.end(); it != end; ++it) {
-      root_visitor(it->second, arg);
+  {
+    MutexLock mu(classes_lock_);
+    typedef Table::const_iterator It; // TODO: C++0x auto
+    for (It it = classes_.begin(), end = classes_.end(); it != end; ++it) {
+        root_visitor(it->second, arg);
+    }
   }
-  // TODO: release classes_lock_
+
+  intern_table_.VisitRoots(root_visitor, arg);
 
   root_visitor(array_interfaces_, arg);
 }
@@ -736,15 +739,14 @@
 }
 
 bool ClassLinker::InsertClass(Class* klass) {
-  // TODO: acquire classes_lock_
+  MutexLock mu(classes_lock_);
   const StringPiece& key = klass->GetDescriptor();
   Table::iterator it = classes_.insert(std::make_pair(key, klass));
   return ((*it).second == klass);
-  // TODO: release classes_lock_
 }
 
 Class* ClassLinker::LookupClass(const StringPiece& descriptor, ClassLoader* class_loader) {
-  // TODO: acquire classes_lock_
+  MutexLock mu(classes_lock_);
   typedef Table::const_iterator It; // TODO: C++0x auto
   for (It it = classes_.find(descriptor), end = classes_.end(); it != end; ++it) {
     Class* klass = it->second;
@@ -753,7 +755,6 @@
     }
   }
   return NULL;
-  // TODO: release classes_lock_
 }
 
 bool ClassLinker::InitializeClass(Class* klass) {
@@ -963,7 +964,7 @@
 
 bool ClassLinker::InitializeSuperClass(Class* klass) {
   CHECK(klass != NULL);
-  // TODO: assert klass lock is acquired
+  MutexLock mu(classes_lock_);
   if (!klass->IsInterface() && klass->HasSuperClass()) {
     Class* super_class = klass->GetSuperClass();
     if (super_class->GetStatus() != Class::kStatusInitialized) {
@@ -1536,10 +1537,9 @@
   const DexFile::StringId& string_id = dex_file.GetStringId(string_idx);
   int32_t utf16_length = dex_file.GetStringLength(string_id);
   const char* utf8_data = dex_file.GetStringData(string_id);
-  String* new_string = String::AllocFromModifiedUtf8(utf16_length, utf8_data);
-  // TODO: intern the new string
-  referring->GetDexCache()->SetResolvedString(string_idx, new_string);
-  return new_string;
+  String* string = intern_table_.Intern(utf16_length, utf8_data);
+  referring->GetDexCache()->SetResolvedString(string_idx, string);
+  return string;
 }
 
 }  // namespace art
diff --git a/src/class_linker.h b/src/class_linker.h
index 70d268d..15bd054 100644
--- a/src/class_linker.h
+++ b/src/class_linker.h
@@ -7,11 +7,14 @@
 #include <utility>
 #include <vector>
 
-#include "heap.h"
-#include "macros.h"
 #include "dex_file.h"
-#include "thread.h"
+#include "heap.h"
+#include "intern_table.h"
+#include "macros.h"
 #include "object.h"
+#include "thread.h"
+#include "unordered_map.h"
+
 #include "gtest/gtest.h"
 
 namespace art {
@@ -34,24 +37,14 @@
 
   bool InitializeClass(Class* klass);
 
-  Class* LookupClass(const StringPiece& descriptor, ClassLoader* class_loader);
-
-  Class* ResolveClass(const Class* referring,
-                      uint32_t class_idx,
-                      const DexFile& dex_file);
-
-  String* ResolveString(const Class* referring,
-                        uint32_t string_idx,
-                        const DexFile& dex_file);
-
   void RegisterDexFile(const DexFile* dex_file);
 
-  // TODO replace with heap interface
-  typedef void (RootVistor)(Object* root, void* arg);
-  void VisitRoots(RootVistor* root_visitor, void* arg);
+  void VisitRoots(Heap::RootVistor* root_visitor, void* arg);
 
  private:
-  ClassLinker() {}
+  ClassLinker() {
+    classes_lock_ = Mutex::Create("ClassLinker::Lock");
+  }
 
   void Init(const std::vector<DexFile*>& boot_class_path_);
 
@@ -104,6 +97,16 @@
                   Class* klass,
                   Method* dst);
 
+  Class* ResolveClass(const Class* referring,
+                      uint32_t class_idx,
+                      const DexFile& dex_file);
+
+  String* ResolveString(const Class* referring,
+                        uint32_t string_idx,
+                        const DexFile& dex_file);
+
+  Class* LookupClass(const StringPiece& descriptor, ClassLoader* class_loader);
+
   // Inserts a class into the class table.  Returns true if the class
   // was inserted.
   bool InsertClass(Class* klass);
@@ -149,14 +152,11 @@
   // multimap from String::descriptor_ to Class* instances. Results
   // should be compared for a matching Class::descriptor_ and
   // Class::class_loader_.
-  // TODO: unordered_multimap
-  typedef std::multimap<const StringPiece, Class*> Table;
-
+  typedef std::tr1::unordered_multimap<StringPiece, Class*> Table;
   Table classes_;
-
   Mutex* classes_lock_;
 
-  // TODO: classpath
+  InternTable intern_table_;
 
   // indexes into class_roots_
   enum ClassRoot {
diff --git a/src/class_linker_test.cc b/src/class_linker_test.cc
index a9c3e50..5951c0b 100644
--- a/src/class_linker_test.cc
+++ b/src/class_linker_test.cc
@@ -4,6 +4,7 @@
 #include "class_linker.h"
 #include "dex_file.h"
 #include "heap.h"
+
 #include "gtest/gtest.h"
 
 namespace art {
diff --git a/src/dex_file.cc b/src/dex_file.cc
index 35e98db..90893cb 100644
--- a/src/dex_file.cc
+++ b/src/dex_file.cc
@@ -34,7 +34,9 @@
       return ClassPathEntry(dex_file, dex_class_def);
     }
   }
-  return ClassPathEntry(NULL, NULL);
+  // TODO remove reinterpret_cast when issue with -std=gnu++0x host issue resolved
+  return ClassPathEntry(reinterpret_cast<const DexFile*>(NULL),
+                        reinterpret_cast<const DexFile::ClassDef*>(NULL));
 }
 
 DexFile::Closer::~Closer() {}
diff --git a/src/heap.h b/src/heap.h
index 1059fd7..f8a5d23 100644
--- a/src/heap.h
+++ b/src/heap.h
@@ -23,6 +23,8 @@
 
   static const size_t kMaximumSize = 64 * MB;
 
+  typedef void (RootVistor)(Object* root, void* arg);
+
   static bool Init() {
     return Init(kStartupSize, kMaximumSize);
   }
diff --git a/src/intern_table.cc b/src/intern_table.cc
new file mode 100644
index 0000000..31749fd
--- /dev/null
+++ b/src/intern_table.cc
@@ -0,0 +1,40 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+
+#include "intern_table.h"
+
+#include "scoped_ptr.h"
+
+namespace art {
+
+InternTable::InternTable() {
+  intern_table_lock_ = Mutex::Create("InternTable::Lock");
+}
+
+void InternTable::VisitRoots(Heap::RootVistor* root_visitor, void* arg) {
+  MutexLock mu(intern_table_lock_);
+  typedef Table::const_iterator It; // TODO: C++0x auto
+  for (It it = intern_table_.begin(), end = intern_table_.end(); it != end; ++it) {
+      root_visitor(it->second, arg);
+  }
+}
+
+String* InternTable::Intern(int32_t utf16_length, const char* utf8_data_in) {
+  scoped_ptr<uint16_t> utf16_data_out(new uint16_t[utf16_length]);
+  String::ConvertModifiedUtf8ToUtf16(utf16_data_out.get(), utf8_data_in);
+  int32_t hash_code = String::ComputeUtf16Hash(utf16_data_out.get(), utf16_length);
+  {
+    MutexLock mu(intern_table_lock_);
+    typedef Table::const_iterator It; // TODO: C++0x auto
+    for (It it = intern_table_.find(hash_code), end = intern_table_.end(); it != end; ++it) {
+      String* string = it->second;
+      if (string->Equals(utf16_data_out.get(), 0, utf16_length)) {
+        return string;
+      }
+    }
+    String* new_string = String::AllocFromUtf16(utf16_length, utf16_data_out.get(), hash_code);
+    intern_table_.insert(std::make_pair(hash_code, new_string));
+    return new_string;
+  }
+}
+
+}  // namespace art
diff --git a/src/intern_table.h b/src/intern_table.h
new file mode 100644
index 0000000..61da28b
--- /dev/null
+++ b/src/intern_table.h
@@ -0,0 +1,27 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+
+#ifndef ART_SRC_INTERN_TABLE_H_
+#define ART_SRC_INTERN_TABLE_H_
+
+#include "unordered_map.h"
+
+#include "heap.h"
+#include "object.h"
+
+namespace art {
+
+class InternTable {
+ public:
+  InternTable();
+  String* Intern(int32_t utf16_length, const char* utf8_data);
+  void VisitRoots(Heap::RootVistor* root_visitor, void* arg);
+
+ private:
+  typedef std::tr1::unordered_multimap<int32_t, String*> Table;
+  Table intern_table_;
+  Mutex* intern_table_lock_;
+};
+
+}  // namespace art
+
+#endif  // ART_SRC_CLASS_LINKER_H_
diff --git a/src/intern_table_test.cc b/src/intern_table_test.cc
new file mode 100644
index 0000000..daae841
--- /dev/null
+++ b/src/intern_table_test.cc
@@ -0,0 +1,31 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+
+#include "intern_table.h"
+
+#include "common_test.h"
+#include "object.h"
+
+#include "gtest/gtest.h"
+
+namespace art {
+
+class InternTableTest : public RuntimeTest {};
+
+TEST_F(InternTableTest, Intern) {
+  InternTable intern_table;
+  String* foo_1 = intern_table.Intern(3, "foo");
+  String* foo_2 = intern_table.Intern(3, "foo");
+  String* foo_3 = String::AllocFromAscii("foo");
+  String* bar = intern_table.Intern(3, "bar");
+  EXPECT_TRUE(foo_1->Equals("foo"));
+  EXPECT_TRUE(foo_2->Equals("foo"));
+  EXPECT_TRUE(foo_3->Equals("foo"));
+  EXPECT_TRUE(foo_1 != NULL);
+  EXPECT_TRUE(foo_2 != NULL);
+  EXPECT_EQ(foo_1, foo_2);
+  EXPECT_NE(foo_1, bar);
+  EXPECT_NE(foo_2, bar);
+  EXPECT_NE(foo_3, bar);
+}
+
+}  // namespace art
diff --git a/src/object.h b/src/object.h
index 0e0728d..33db15e 100644
--- a/src/object.h
+++ b/src/object.h
@@ -1096,7 +1096,7 @@
     return GetChars()[i];
   }
 
-  void  SetChar(uint32_t i, uint16_t ch) {
+  void SetChar(uint32_t i, uint16_t ch) {
     CHECK_LT(i, GetLength());
     GetChars()[i] = ch;
   }
@@ -1111,33 +1111,34 @@
     return array_;
   }
 
-  uint32_t GetHashCode() const {
+  int32_t GetHashCode() const {
     return hash_code_;
   }
 
-  uint32_t GetOffset() const {
+  int32_t GetOffset() const {
+    DCHECK_LE(0, offset_);
     return offset_;
   }
 
-  uint32_t GetLength() const {
+  int32_t GetLength() const {
+    DCHECK_LE(0, count_);
     return count_;
   }
 
-  uint16_t CharAt(uint32_t index) const {
+  uint16_t CharAt(int32_t index) const {
     return GetCharArray()->GetChar(index + GetOffset());
   }
 
-  static String* AllocFromUtf16(Class* java_lang_String,
-                                Class* char_array,
-                                int32_t utf16_length,
-                                uint16_t* utf16_data_in) {
-    String* string = Alloc(java_lang_String, char_array, utf16_length);
+  static String* AllocFromUtf16(int32_t utf16_length,
+                                uint16_t* utf16_data_in,
+                                int32_t hash_code) {
+    String* string = Alloc(GetJavaLangString(), GetCharArrayClass(), utf16_length);
     uint16_t* utf16_data_out = string->array_->GetChars();
     // TODO use 16-bit wide memset variant
     for (int i = 0; i < utf16_length; i++ ) {
         utf16_data_out[i] = utf16_data_in[i];
     }
-    string->hash_code_ = ComputeUtf16Hash(utf16_data_out, utf16_length);
+    string->hash_code_ = hash_code;
     return string;
   }
 
@@ -1155,17 +1156,16 @@
   // Creates a String of the given ASCII characters. It is an error to call this
   // using non-ASCII characters as this function assumes one char per byte.
   static String* AllocFromAscii(const char* ascii_data_in) {
-    DCHECK(java_lang_String_ != NULL);
-    DCHECK(char_array_ != NULL);
-    return AllocFromModifiedUtf8(java_lang_String_,
-                                 char_array_,
+    return AllocFromModifiedUtf8(GetJavaLangString(),
+                                 GetCharArrayClass(),
                                  strlen(ascii_data_in),
                                  ascii_data_in);
   }
 
   static String* AllocFromModifiedUtf8(int32_t utf16_length,
                                        const char* utf8_data_in) {
-    return AllocFromModifiedUtf8(java_lang_String_, char_array_, utf16_length, utf8_data_in);
+    return AllocFromModifiedUtf8(GetJavaLangString(), GetCharArrayClass(),
+                                 utf16_length, utf8_data_in);
   }
 
   static void InitClasses(Class* java_lang_String, Class* char_array);
@@ -1253,13 +1253,13 @@
   static int32_t ComputeUtf16Hash(const uint16_t* string_data, size_t string_length) {
     int32_t hash = 0;
     while (string_length--) {
-        hash = hash * 31 + *string_data++;
+      hash = hash * 31 + *string_data++;
     }
     return hash;
   }
 
   bool Equals(const char* modified_utf8) const {
-    for (size_t i = 0; i < GetLength(); ++i) {
+    for (int32_t i = 0; i < GetLength(); ++i) {
       uint16_t ch = GetUtf16FromUtf8(&modified_utf8);
       if (ch == '\0' || ch != CharAt(i)) {
         return false;
@@ -1274,10 +1274,11 @@
   }
 
   bool Equals(const String* that) const {
+    // TODO short circuit on hash_code_
     if (this->GetLength() != that->GetLength()) {
       return false;
     }
-    for (size_t i = 0; i < that->GetLength(); ++i) {
+    for (int32_t i = 0; i < that->GetLength(); ++i) {
       if (this->CharAt(i) != that->CharAt(i)) {
         return false;
       }
@@ -1285,15 +1286,36 @@
     return true;
   }
 
+  bool Equals(const uint16_t* that_chars, int32_t that_offset, int32_t that_length) const {
+    if (this->GetLength() != that_length) {
+      return false;
+    }
+    for (int32_t i = 0; i < that_length; ++i) {
+      if (this->CharAt(i) != that_chars[that_offset + i]) {
+        return false;
+      }
+    }
+    return true;
+  }
+
  private:
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
   CharArray* array_;
 
-  uint32_t hash_code_;
+  int32_t hash_code_;
 
-  uint32_t offset_;
+  int32_t offset_;
 
-  uint32_t count_;
+  int32_t count_;
+
+  static Class* GetJavaLangString() {
+    DCHECK(java_lang_String_ != NULL);
+    return java_lang_String_;
+  }
+  static Class* GetCharArrayClass() {
+    DCHECK(char_array_ != NULL);
+    return char_array_;
+  }
 
   static Class* java_lang_String_;
   static Class* char_array_;
diff --git a/src/object_test.cc b/src/object_test.cc
index 18e8e77..2d32ad0 100644
--- a/src/object_test.cc
+++ b/src/object_test.cc
@@ -16,12 +16,12 @@
 
 class ObjectTest : public RuntimeTest {
  protected:
-  void AssertString(size_t length,
+  void AssertString(int32_t length,
                     const char* utf8_in,
                     const char* utf16_expected_le,
-                    uint32_t hash_expected) {
+                    int32_t hash_expected) {
     uint16_t utf16_expected[length];
-    for (size_t i = 0; i < length; i++) {
+    for (int32_t i = 0; i < length; i++) {
       uint16_t ch = (((utf16_expected_le[i*2 + 0] & 0xff) << 8) |
                      ((utf16_expected_le[i*2 + 1] & 0xff) << 0));
       utf16_expected[i] = ch;
@@ -32,8 +32,8 @@
     ASSERT_TRUE(string->GetCharArray() != NULL);
     ASSERT_TRUE(string->GetCharArray()->GetChars() != NULL);
     // strlen is necessary because the 1-character string "\0" is interpreted as ""
-    ASSERT_TRUE(string->Equals(utf8_in) || length != strlen(utf8_in));
-    for (size_t i = 0; i < length; i++) {
+    ASSERT_TRUE(string->Equals(utf8_in) || length != static_cast<int32_t>(strlen(utf8_in)));
+    for (int32_t i = 0; i < length; i++) {
       EXPECT_EQ(utf16_expected[i], string->GetCharArray()->GetChar(i));
     }
     EXPECT_EQ(hash_expected, string->GetHashCode());
diff --git a/src/unordered_map.h b/src/unordered_map.h
new file mode 100644
index 0000000..66613c6
--- /dev/null
+++ b/src/unordered_map.h
@@ -0,0 +1,37 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+
+#ifndef ART_SRC_CLASS_UNORDERED_MAP_H_
+#define ART_SRC_CLASS_UNORDERED_MAP_H_
+
+#include "stringpiece.h"
+
+#ifdef __ANDROID__
+#include <unordered_map>
+#else
+#include <tr1/unordered_map>
+#endif
+
+namespace std {
+#ifndef __ANDROID__
+namespace tr1 {
+#endif
+template<>
+struct hash<art::StringPiece> {
+ public:
+  size_t operator()(const art::StringPiece& string_piece) const {
+    size_t string_size = string_piece.size();
+    const char* string_data = string_piece.data();
+    // this is the java.lang.String hashcode for convenience, not interoperability
+    size_t hash = 0;
+    while (string_size--) {
+      hash = hash * 31 + *string_data++;
+    }
+    return hash;
+  }
+};
+#ifndef __ANDROID__
+}  // namespace tr1
+#endif
+}  // namespace std
+
+#endif  // ART_SRC_CLASS_UNORDERED_MAP_H_
diff --git a/src/zip_archive.cc b/src/zip_archive.cc
index 09dc3d4..a615321 100644
--- a/src/zip_archive.cc
+++ b/src/zip_archive.cc
@@ -310,7 +310,7 @@
 
 ZipEntry* ZipArchive::Find(const char* name) {
   DCHECK(name != NULL);
-  std::map<StringPiece, const uint8_t*>::const_iterator it = dir_entries_.find(name);
+  DirEntries::const_iterator it = dir_entries_.find(name);
   if (it == dir_entries_.end()) {
     return NULL;
   }
diff --git a/src/zip_archive.h b/src/zip_archive.h
index 9eb2948..858afe09 100644
--- a/src/zip_archive.h
+++ b/src/zip_archive.h
@@ -26,6 +26,7 @@
 #include "logging.h"
 #include "scoped_ptr.h"
 #include "stringpiece.h"
+#include "unordered_map.h"
 
 namespace art {
 
@@ -183,8 +184,8 @@
   uint16_t num_entries_;
   off_t dir_offset_;
   scoped_ptr<MemMap> dir_map_;
-  // TODO: unordered map
-  std::map<StringPiece, const uint8_t*> dir_entries_;
+  typedef std::tr1::unordered_map<StringPiece, const uint8_t*> DirEntries;
+  DirEntries dir_entries_;
 
   friend class ZipEntry;
 };