Merge "Let the map function compute the relative pc." into mnc-dev
diff --git a/Android.mk b/Android.mk
index 9360355..e7623c6 100644
--- a/Android.mk
+++ b/Android.mk
@@ -55,16 +55,16 @@
 clean-oat-target:
 	adb root
 	adb wait-for-device remount
-	adb shell rm -rf $(ART_TARGET_NATIVETEST_DIR)
-	adb shell rm -rf $(ART_TARGET_TEST_DIR)
-	adb shell rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*/*
-	adb shell rm -rf $(DEXPREOPT_BOOT_JAR_DIR)/$(DEX2OAT_TARGET_ARCH)
-	adb shell rm -rf system/app/$(DEX2OAT_TARGET_ARCH)
+	adb shell sh -c "rm -rf $(ART_TARGET_NATIVETEST_DIR)"
+	adb shell sh -c "rm -rf $(ART_TARGET_TEST_DIR)"
+	adb shell sh -c "rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*/*"
+	adb shell sh -c "rm -rf $(DEXPREOPT_BOOT_JAR_DIR)/$(DEX2OAT_TARGET_ARCH)"
+	adb shell sh -c "rm -rf system/app/$(DEX2OAT_TARGET_ARCH)"
 ifdef TARGET_2ND_ARCH
-	adb shell rm -rf $(DEXPREOPT_BOOT_JAR_DIR)/$($(TARGET_2ND_ARCH_VAR_PREFIX)DEX2OAT_TARGET_ARCH)
-	adb shell rm -rf system/app/$($(TARGET_2ND_ARCH_VAR_PREFIX)DEX2OAT_TARGET_ARCH)
+	adb shell sh -c "rm -rf $(DEXPREOPT_BOOT_JAR_DIR)/$($(TARGET_2ND_ARCH_VAR_PREFIX)DEX2OAT_TARGET_ARCH)"
+	adb shell sh -c "rm -rf system/app/$($(TARGET_2ND_ARCH_VAR_PREFIX)DEX2OAT_TARGET_ARCH)"
 endif
-	adb shell rm -rf data/run-test/test-*/dalvik-cache/*
+	adb shell sh -c "rm -rf data/run-test/test-*/dalvik-cache/*"
 
 ifneq ($(art_dont_bother),true)
 
@@ -404,7 +404,7 @@
 use-art-full:
 	adb root
 	adb wait-for-device shell stop
-	adb shell rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*
+	adb shell sh -c "rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*"
 	adb shell setprop dalvik.vm.dex2oat-filter ""
 	adb shell setprop dalvik.vm.image-dex2oat-filter ""
 	adb shell setprop persist.sys.dalvik.vm.lib.2 libart.so
@@ -414,19 +414,19 @@
 use-artd-full:
 	adb root
 	adb wait-for-device shell stop
-	adb shell rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*
+	adb shell sh -c "rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*"
 	adb shell setprop dalvik.vm.dex2oat-filter ""
 	adb shell setprop dalvik.vm.image-dex2oat-filter ""
 	adb shell setprop persist.sys.dalvik.vm.lib.2 libartd.so
 	adb shell start
 
-.PHONY: use-art-smart
-use-art-smart:
+.PHONY: use-art-verify-at-runtime
+use-art-verify-at-runtime:
 	adb root
 	adb wait-for-device shell stop
-	adb shell rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*
-	adb shell setprop dalvik.vm.dex2oat-filter "interpret-only"
-	adb shell setprop dalvik.vm.image-dex2oat-filter ""
+	adb shell sh -c "rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*"
+	adb shell setprop dalvik.vm.dex2oat-filter "verify-at-runtime"
+	adb shell setprop dalvik.vm.image-dex2oat-filter "verify-at-runtime"
 	adb shell setprop persist.sys.dalvik.vm.lib.2 libart.so
 	adb shell start
 
@@ -434,7 +434,7 @@
 use-art-interpret-only:
 	adb root
 	adb wait-for-device shell stop
-	adb shell rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*
+	adb shell sh -c "rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*"
 	adb shell setprop dalvik.vm.dex2oat-filter "interpret-only"
 	adb shell setprop dalvik.vm.image-dex2oat-filter "interpret-only"
 	adb shell setprop persist.sys.dalvik.vm.lib.2 libart.so
@@ -444,7 +444,7 @@
 use-artd-interpret-only:
 	adb root
 	adb wait-for-device shell stop
-	adb shell rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*
+	adb shell sh -c "rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*"
 	adb shell setprop dalvik.vm.dex2oat-filter "interpret-only"
 	adb shell setprop dalvik.vm.image-dex2oat-filter "interpret-only"
 	adb shell setprop persist.sys.dalvik.vm.lib.2 libartd.so
@@ -454,7 +454,7 @@
 use-art-verify-none:
 	adb root
 	adb wait-for-device shell stop
-	adb shell rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*
+	adb shell sh -c "rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*"
 	adb shell setprop dalvik.vm.dex2oat-filter "verify-none"
 	adb shell setprop dalvik.vm.image-dex2oat-filter "verify-none"
 	adb shell setprop persist.sys.dalvik.vm.lib.2 libart.so
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk
index c60e75b..5beb959 100644
--- a/build/Android.common_build.mk
+++ b/build/Android.common_build.mk
@@ -189,6 +189,7 @@
 
 ART_C_INCLUDES := \
   external/gtest/include \
+  external/icu/icu4c/source/common \
   external/valgrind/main/include \
   external/valgrind/main \
   external/vixl/src \
diff --git a/compiler/dex/bb_optimizations.h b/compiler/dex/bb_optimizations.h
index eb87c29..02d5327 100644
--- a/compiler/dex/bb_optimizations.h
+++ b/compiler/dex/bb_optimizations.h
@@ -26,6 +26,30 @@
 namespace art {
 
 /**
+ * @class String Change
+ * @brief Converts calls to String.<init> to StringFactory instead.
+ */
+class StringChange : public PassME {
+ public:
+  StringChange() : PassME("StringChange", kNoNodes) {
+  }
+
+  void Start(PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
+    DCHECK(c_unit != nullptr);
+    c_unit->mir_graph->StringChange();
+  }
+
+  bool Gate(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(c_unit != nullptr);
+    return c_unit->mir_graph->HasInvokes();
+  }
+};
+
+/**
  * @class CacheFieldLoweringInfo
  * @brief Cache the lowering info for fields used by IGET/IPUT/SGET/SPUT insns.
  */
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 7bfbb34..7385a8b 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -519,6 +519,7 @@
   bool is_range;
   DexOffset offset;       // Offset in code units.
   MIR* mir;
+  int32_t string_init_offset;
 };
 
 
@@ -723,6 +724,8 @@
   void BasicBlockOptimization();
   void BasicBlockOptimizationEnd();
 
+  void StringChange();
+
   const ArenaVector<BasicBlockId>& GetTopologicalSortOrder() {
     DCHECK(!topological_order_.empty());
     return topological_order_;
diff --git a/compiler/dex/mir_method_info.cc b/compiler/dex/mir_method_info.cc
index 0c84b82..5654604 100644
--- a/compiler/dex/mir_method_info.cc
+++ b/compiler/dex/mir_method_info.cc
@@ -16,6 +16,7 @@
 
 # include "mir_method_info.h"
 
+#include "dex/compiler_ir.h"
 #include "dex/quick/dex_file_method_inliner.h"
 #include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "dex/verified_method.h"
@@ -83,6 +84,13 @@
     MethodReference* devirt_target = (it->target_dex_file_ != nullptr) ? &devirt_ref : nullptr;
     InvokeType invoke_type = it->GetInvokeType();
     mirror::ArtMethod* resolved_method = nullptr;
+
+    bool string_init = false;
+    if (default_inliner->IsStringInitMethodIndex(it->MethodIndex())) {
+      string_init = true;
+      invoke_type = kDirect;
+    }
+
     if (!it->IsQuickened()) {
       it->target_dex_file_ = dex_file;
       it->target_method_idx_ = it->MethodIndex();
@@ -170,6 +178,9 @@
     it->target_dex_file_ = target_method.dex_file;
     it->target_method_idx_ = target_method.dex_method_index;
     it->stats_flags_ = fast_path_flags;
+    if (string_init) {
+      it->direct_code_ = 0;
+    }
   }
 }
 
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index f7107c1..217dbee 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -18,6 +18,7 @@
 #include "base/logging.h"
 #include "base/scoped_arena_containers.h"
 #include "dataflow_iterator-inl.h"
+#include "dex/verified_method.h"
 #include "dex_flags.h"
 #include "driver/compiler_driver.h"
 #include "driver/dex_compilation_unit.h"
@@ -25,10 +26,11 @@
 #include "gvn_dead_code_elimination.h"
 #include "local_value_numbering.h"
 #include "mir_field_info.h"
-#include "type_inference.h"
+#include "mirror/string.h"
 #include "quick/dex_file_method_inliner.h"
 #include "quick/dex_file_to_method_inliner_map.h"
 #include "stack.h"
+#include "type_inference.h"
 
 namespace art {
 
@@ -1660,6 +1662,77 @@
   temp_scoped_alloc_.reset();
 }
 
+void MIRGraph::StringChange() {
+  AllNodesIterator iter(this);
+  for (BasicBlock* bb = iter.Next(); bb != nullptr; bb = iter.Next()) {
+    for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
+      // Look for new instance opcodes, skip otherwise
+      Instruction::Code opcode = mir->dalvikInsn.opcode;
+      if (opcode == Instruction::NEW_INSTANCE) {
+        uint32_t type_idx = mir->dalvikInsn.vB;
+        if (cu_->compiler_driver->IsStringTypeIndex(type_idx, cu_->dex_file)) {
+          // Change NEW_INSTANCE and throwing half of the insn (if it exists) into CONST_4 of 0
+          mir->dalvikInsn.opcode = Instruction::CONST_4;
+          mir->dalvikInsn.vB = 0;
+          MIR* check_mir = GetBasicBlock(bb->predecessors[0])->last_mir_insn;
+          if (check_mir != nullptr &&
+              static_cast<int>(check_mir->dalvikInsn.opcode) == kMirOpCheck) {
+            check_mir->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop);
+            check_mir->dalvikInsn.vB = 0;
+          }
+        }
+      } else if ((opcode == Instruction::INVOKE_DIRECT) ||
+                 (opcode == Instruction::INVOKE_DIRECT_RANGE)) {
+        uint32_t method_idx = mir->dalvikInsn.vB;
+        DexFileMethodInliner* inliner =
+            cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file);
+        if (inliner->IsStringInitMethodIndex(method_idx)) {
+          bool is_range = (opcode == Instruction::INVOKE_DIRECT_RANGE);
+          uint32_t orig_this_reg = is_range ? mir->dalvikInsn.vC : mir->dalvikInsn.arg[0];
+          // Remove this pointer from string init and change to static call.
+          mir->dalvikInsn.vA--;
+          if (!is_range) {
+            mir->dalvikInsn.opcode = Instruction::INVOKE_STATIC;
+            for (uint32_t i = 0; i < mir->dalvikInsn.vA; i++) {
+              mir->dalvikInsn.arg[i] = mir->dalvikInsn.arg[i + 1];
+            }
+          } else {
+            mir->dalvikInsn.opcode = Instruction::INVOKE_STATIC_RANGE;
+            mir->dalvikInsn.vC++;
+          }
+          // Insert a move-result instruction to the original this pointer reg.
+          MIR* move_result_mir = static_cast<MIR *>(arena_->Alloc(sizeof(MIR), kArenaAllocMIR));
+          move_result_mir->dalvikInsn.opcode = Instruction::MOVE_RESULT_OBJECT;
+          move_result_mir->dalvikInsn.vA = orig_this_reg;
+          move_result_mir->offset = mir->offset;
+          move_result_mir->m_unit_index = mir->m_unit_index;
+          bb->InsertMIRAfter(mir, move_result_mir);
+          // Add additional moves if this pointer was copied to other registers.
+          const VerifiedMethod* verified_method =
+              cu_->compiler_driver->GetVerifiedMethod(cu_->dex_file, cu_->method_idx);
+          DCHECK(verified_method != nullptr);
+          const SafeMap<uint32_t, std::set<uint32_t>>& string_init_map =
+              verified_method->GetStringInitPcRegMap();
+          auto map_it = string_init_map.find(mir->offset);
+          if (map_it != string_init_map.end()) {
+            const std::set<uint32_t>& reg_set = map_it->second;
+            for (auto set_it = reg_set.begin(); set_it != reg_set.end(); ++set_it) {
+              MIR* move_mir = static_cast<MIR *>(arena_->Alloc(sizeof(MIR), kArenaAllocMIR));
+              move_mir->dalvikInsn.opcode = Instruction::MOVE_OBJECT;
+              move_mir->dalvikInsn.vA = *set_it;
+              move_mir->dalvikInsn.vB = orig_this_reg;
+              move_mir->offset = mir->offset;
+              move_mir->m_unit_index = mir->m_unit_index;
+              bb->InsertMIRAfter(move_result_mir, move_mir);
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+
 bool MIRGraph::EliminateSuspendChecksGate() {
   if ((cu_->disable_opt & (1 << kSuspendCheckElimination)) != 0 ||  // Disabled.
       GetMaxNestedLoops() == 0u ||   // Nothing to do.
diff --git a/compiler/dex/pass_driver_me_opts.cc b/compiler/dex/pass_driver_me_opts.cc
index 3e193b4..375003b 100644
--- a/compiler/dex/pass_driver_me_opts.cc
+++ b/compiler/dex/pass_driver_me_opts.cc
@@ -35,6 +35,7 @@
    * Disadvantage is the passes can't change their internal states depending on CompilationUnit:
    *   - This is not yet an issue: no current pass would require it.
    */
+  pass_manager->AddPass(new StringChange);
   pass_manager->AddPass(new CacheFieldLoweringInfo);
   pass_manager->AddPass(new CacheMethodLoweringInfo);
   pass_manager->AddPass(new CalculatePredecessors);
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index 6ba4016..2b2d6af 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -21,6 +21,7 @@
 #include "arm_lir.h"
 #include "base/logging.h"
 #include "dex/mir_graph.h"
+#include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "dex/quick/mir_to_lir-inl.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
@@ -619,13 +620,31 @@
  * Bit of a hack here - in the absence of a real scheduling pass,
  * emit the next instruction in static & direct invoke sequences.
  */
-int ArmMir2Lir::ArmNextSDCallInsn(CompilationUnit* cu, CallInfo* info ATTRIBUTE_UNUSED,
+int ArmMir2Lir::ArmNextSDCallInsn(CompilationUnit* cu, CallInfo* info,
                                   int state, const MethodReference& target_method,
                                   uint32_t unused_idx ATTRIBUTE_UNUSED,
                                   uintptr_t direct_code, uintptr_t direct_method,
                                   InvokeType type) {
   ArmMir2Lir* cg = static_cast<ArmMir2Lir*>(cu->cg.get());
-  if (direct_code != 0 && direct_method != 0) {
+  if (info->string_init_offset != 0) {
+    RegStorage arg0_ref = cg->TargetReg(kArg0, kRef);
+    switch (state) {
+    case 0: {  // Grab target method* from thread pointer
+      cg->LoadRefDisp(rs_rARM_SELF, info->string_init_offset, arg0_ref, kNotVolatile);
+      break;
+    }
+    case 1:  // Grab the code from the method*
+      if (direct_code == 0) {
+        // kInvokeTgt := arg0_ref->entrypoint
+        cg->LoadWordDisp(arg0_ref,
+                         mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+                             kArmPointerSize).Int32Value(), cg->TargetPtrReg(kInvokeTgt));
+      }
+      break;
+    default:
+      return -1;
+    }
+  } else if (direct_code != 0 && direct_method != 0) {
     switch (state) {
     case 0:  // Get the current Method* [sets kArg0]
       if (direct_code != static_cast<uintptr_t>(-1)) {
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index 9a7c2ad..e49e40d 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -21,6 +21,7 @@
 #include "arm64_lir.h"
 #include "base/logging.h"
 #include "dex/mir_graph.h"
+#include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "dex/quick/mir_to_lir-inl.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
@@ -460,7 +461,25 @@
                                       InvokeType type) {
   UNUSED(info, unused_idx);
   Arm64Mir2Lir* cg = static_cast<Arm64Mir2Lir*>(cu->cg.get());
-  if (direct_code != 0 && direct_method != 0) {
+  if (info->string_init_offset != 0) {
+    RegStorage arg0_ref = cg->TargetReg(kArg0, kRef);
+    switch (state) {
+    case 0: {  // Grab target method* from thread pointer
+      cg->LoadRefDisp(rs_xSELF, info->string_init_offset, arg0_ref, kNotVolatile);
+      break;
+    }
+    case 1:  // Grab the code from the method*
+      if (direct_code == 0) {
+        // kInvokeTgt := arg0_ref->entrypoint
+        cg->LoadWordDisp(arg0_ref,
+                         mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+                             kArm64PointerSize).Int32Value(), cg->TargetPtrReg(kInvokeTgt));
+      }
+      break;
+    default:
+      return -1;
+    }
+  } else if (direct_code != 0 && direct_method != 0) {
     switch (state) {
     case 0:  // Get the current Method* [sets kArg0]
       if (direct_code != static_cast<uintptr_t>(-1)) {
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index f5e6c09..2568ee3 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -55,8 +55,12 @@
     false,  // kIntrinsicReferenceGetReferent
     false,  // kIntrinsicCharAt
     false,  // kIntrinsicCompareTo
+    false,  // kIntrinsicGetCharsNoCheck
     false,  // kIntrinsicIsEmptyOrLength
     false,  // kIntrinsicIndexOf
+    true,   // kIntrinsicNewStringFromBytes
+    true,   // kIntrinsicNewStringFromChars
+    true,   // kIntrinsicNewStringFromString
     true,   // kIntrinsicCurrentThread
     true,   // kIntrinsicPeek
     true,   // kIntrinsicPoke
@@ -88,8 +92,15 @@
 static_assert(!kIntrinsicIsStatic[kIntrinsicReferenceGetReferent], "Get must not be static");
 static_assert(!kIntrinsicIsStatic[kIntrinsicCharAt], "CharAt must not be static");
 static_assert(!kIntrinsicIsStatic[kIntrinsicCompareTo], "CompareTo must not be static");
+static_assert(!kIntrinsicIsStatic[kIntrinsicGetCharsNoCheck], "GetCharsNoCheck must not be static");
 static_assert(!kIntrinsicIsStatic[kIntrinsicIsEmptyOrLength], "IsEmptyOrLength must not be static");
 static_assert(!kIntrinsicIsStatic[kIntrinsicIndexOf], "IndexOf must not be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicNewStringFromBytes],
+              "NewStringFromBytes must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicNewStringFromChars],
+              "NewStringFromChars must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicNewStringFromString],
+              "NewStringFromString must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicCurrentThread], "CurrentThread must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicPeek], "Peek must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicPoke], "Poke must be static");
@@ -137,9 +148,15 @@
     "F",                       // kClassCacheFloat
     "D",                       // kClassCacheDouble
     "V",                       // kClassCacheVoid
+    "[B",                      // kClassCacheJavaLangByteArray
+    "[C",                      // kClassCacheJavaLangCharArray
+    "[I",                      // kClassCacheJavaLangIntArray
     "Ljava/lang/Object;",      // kClassCacheJavaLangObject
-    "Ljava/lang/ref/Reference;",  // kClassCacheJavaLangRefReference
+    "Ljava/lang/ref/Reference;",   // kClassCacheJavaLangRefReference
     "Ljava/lang/String;",      // kClassCacheJavaLangString
+    "Ljava/lang/StringBuffer;",    // kClassCacheJavaLangStringBuffer
+    "Ljava/lang/StringBuilder;",   // kClassCacheJavaLangStringBuilder
+    "Ljava/lang/StringFactory;",   // kClassCacheJavaLangStringFactory
     "Ljava/lang/Double;",      // kClassCacheJavaLangDouble
     "Ljava/lang/Float;",       // kClassCacheJavaLangFloat
     "Ljava/lang/Integer;",     // kClassCacheJavaLangInteger
@@ -148,10 +165,10 @@
     "Ljava/lang/Math;",        // kClassCacheJavaLangMath
     "Ljava/lang/StrictMath;",  // kClassCacheJavaLangStrictMath
     "Ljava/lang/Thread;",      // kClassCacheJavaLangThread
+    "Ljava/nio/charset/Charset;",  // kClassCacheJavaNioCharsetCharset
     "Llibcore/io/Memory;",     // kClassCacheLibcoreIoMemory
     "Lsun/misc/Unsafe;",       // kClassCacheSunMiscUnsafe
     "Ljava/lang/System;",      // kClassCacheJavaLangSystem
-    "[C"                       // kClassCacheJavaLangCharArray
 };
 
 const char* const DexFileMethodInliner::kNameCacheNames[] = {
@@ -172,9 +189,14 @@
     "getReferent",           // kNameCacheReferenceGet
     "charAt",                // kNameCacheCharAt
     "compareTo",             // kNameCacheCompareTo
+    "getCharsNoCheck",       // kNameCacheGetCharsNoCheck
     "isEmpty",               // kNameCacheIsEmpty
     "indexOf",               // kNameCacheIndexOf
     "length",                // kNameCacheLength
+    "<init>",                // kNameCacheInit
+    "newStringFromBytes",    // kNameCacheNewStringFromBytes
+    "newStringFromChars",    // kNameCacheNewStringFromChars
+    "newStringFromString",   // kNameCacheNewStringFromString
     "currentThread",         // kNameCacheCurrentThread
     "peekByte",              // kNameCachePeekByte
     "peekIntNative",         // kNameCachePeekIntNative
@@ -282,7 +304,53 @@
         kClassCacheJavaLangObject } },
     // kProtoCacheCharArrayICharArrayII_V
     { kClassCacheVoid, 5, {kClassCacheJavaLangCharArray, kClassCacheInt,
-                kClassCacheJavaLangCharArray, kClassCacheInt, kClassCacheInt}}
+        kClassCacheJavaLangCharArray, kClassCacheInt, kClassCacheInt} },
+    // kProtoCacheIICharArrayI_V
+    { kClassCacheVoid, 4, { kClassCacheInt, kClassCacheInt, kClassCacheJavaLangCharArray,
+        kClassCacheInt } },
+    // kProtoCacheByteArrayIII_String
+    { kClassCacheJavaLangString, 4, { kClassCacheJavaLangByteArray, kClassCacheInt, kClassCacheInt,
+        kClassCacheInt } },
+    // kProtoCacheIICharArray_String
+    { kClassCacheJavaLangString, 3, { kClassCacheInt, kClassCacheInt,
+        kClassCacheJavaLangCharArray } },
+    // kProtoCacheString_String
+    { kClassCacheJavaLangString, 1, { kClassCacheJavaLangString } },
+    // kProtoCache_V
+    { kClassCacheVoid, 0, { } },
+    // kProtoCacheByteArray_V
+    { kClassCacheVoid, 1, { kClassCacheJavaLangByteArray } },
+    // kProtoCacheByteArrayI_V
+    { kClassCacheVoid, 2, { kClassCacheJavaLangByteArray, kClassCacheInt } },
+    // kProtoCacheByteArrayII_V
+    { kClassCacheVoid, 3, { kClassCacheJavaLangByteArray, kClassCacheInt, kClassCacheInt } },
+    // kProtoCacheByteArrayIII_V
+    { kClassCacheVoid, 4, { kClassCacheJavaLangByteArray, kClassCacheInt, kClassCacheInt,
+        kClassCacheInt } },
+    // kProtoCacheByteArrayIIString_V
+    { kClassCacheVoid, 4, { kClassCacheJavaLangByteArray, kClassCacheInt, kClassCacheInt,
+        kClassCacheJavaLangString } },
+    // kProtoCacheByteArrayString_V
+    { kClassCacheVoid, 2, { kClassCacheJavaLangByteArray, kClassCacheJavaLangString } },
+    // kProtoCacheByteArrayIICharset_V
+    { kClassCacheVoid, 4, { kClassCacheJavaLangByteArray, kClassCacheInt, kClassCacheInt,
+        kClassCacheJavaNioCharsetCharset } },
+    // kProtoCacheByteArrayCharset_V
+    { kClassCacheVoid, 2, { kClassCacheJavaLangByteArray, kClassCacheJavaNioCharsetCharset } },
+    // kProtoCacheCharArray_V
+    { kClassCacheVoid, 1, { kClassCacheJavaLangCharArray } },
+    // kProtoCacheCharArrayII_V
+    { kClassCacheVoid, 3, { kClassCacheJavaLangCharArray, kClassCacheInt, kClassCacheInt } },
+    // kProtoCacheIICharArray_V
+    { kClassCacheVoid, 3, { kClassCacheInt, kClassCacheInt, kClassCacheJavaLangCharArray } },
+    // kProtoCacheIntArrayII_V
+    { kClassCacheVoid, 3, { kClassCacheJavaLangIntArray, kClassCacheInt, kClassCacheInt } },
+    // kProtoCacheString_V
+    { kClassCacheVoid, 1, { kClassCacheJavaLangString } },
+    // kProtoCacheStringBuffer_V
+    { kClassCacheVoid, 1, { kClassCacheJavaLangStringBuffer } },
+    // kProtoCacheStringBuilder_V
+    { kClassCacheVoid, 1, { kClassCacheJavaLangStringBuilder } },
 };
 
 const DexFileMethodInliner::IntrinsicDef DexFileMethodInliner::kIntrinsicMethods[] = {
@@ -343,6 +411,7 @@
 
     INTRINSIC(JavaLangString, CharAt, I_C, kIntrinsicCharAt, 0),
     INTRINSIC(JavaLangString, CompareTo, String_I, kIntrinsicCompareTo, 0),
+    INTRINSIC(JavaLangString, GetCharsNoCheck, IICharArrayI_V, kIntrinsicGetCharsNoCheck, 0),
     INTRINSIC(JavaLangString, IsEmpty, _Z, kIntrinsicIsEmptyOrLength, kIntrinsicFlagIsEmpty),
     INTRINSIC(JavaLangString, IndexOf, II_I, kIntrinsicIndexOf, kIntrinsicFlagNone),
     INTRINSIC(JavaLangString, IndexOf, I_I, kIntrinsicIndexOf, kIntrinsicFlagBase0),
@@ -386,8 +455,29 @@
     INTRINSIC(JavaLangSystem, ArrayCopy, CharArrayICharArrayII_V , kIntrinsicSystemArrayCopyCharArray,
               0),
 
-
 #undef INTRINSIC
+
+#define SPECIAL(c, n, p, o, d) \
+    { { kClassCache ## c, kNameCache ## n, kProtoCache ## p }, { o, kInlineSpecial, { d } } }
+
+    SPECIAL(JavaLangString, Init, _V, kInlineStringInit, 0),
+    SPECIAL(JavaLangString, Init, ByteArray_V, kInlineStringInit, 1),
+    SPECIAL(JavaLangString, Init, ByteArrayI_V, kInlineStringInit, 2),
+    SPECIAL(JavaLangString, Init, ByteArrayII_V, kInlineStringInit, 3),
+    SPECIAL(JavaLangString, Init, ByteArrayIII_V, kInlineStringInit, 4),
+    SPECIAL(JavaLangString, Init, ByteArrayIIString_V, kInlineStringInit, 5),
+    SPECIAL(JavaLangString, Init, ByteArrayString_V, kInlineStringInit, 6),
+    SPECIAL(JavaLangString, Init, ByteArrayIICharset_V, kInlineStringInit, 7),
+    SPECIAL(JavaLangString, Init, ByteArrayCharset_V, kInlineStringInit, 8),
+    SPECIAL(JavaLangString, Init, CharArray_V, kInlineStringInit, 9),
+    SPECIAL(JavaLangString, Init, CharArrayII_V, kInlineStringInit, 10),
+    SPECIAL(JavaLangString, Init, IICharArray_V, kInlineStringInit, 11),
+    SPECIAL(JavaLangString, Init, IntArrayII_V, kInlineStringInit, 12),
+    SPECIAL(JavaLangString, Init, String_V, kInlineStringInit, 13),
+    SPECIAL(JavaLangString, Init, StringBuffer_V, kInlineStringInit, 14),
+    SPECIAL(JavaLangString, Init, StringBuilder_V, kInlineStringInit, 15),
+
+#undef SPECIAL
 };
 
 DexFileMethodInliner::DexFileMethodInliner()
@@ -491,11 +581,19 @@
       return backend->GenInlinedCharAt(info);
     case kIntrinsicCompareTo:
       return backend->GenInlinedStringCompareTo(info);
+    case kIntrinsicGetCharsNoCheck:
+      return backend->GenInlinedStringGetCharsNoCheck(info);
     case kIntrinsicIsEmptyOrLength:
       return backend->GenInlinedStringIsEmptyOrLength(
           info, intrinsic.d.data & kIntrinsicFlagIsEmpty);
     case kIntrinsicIndexOf:
       return backend->GenInlinedIndexOf(info, intrinsic.d.data & kIntrinsicFlagBase0);
+    case kIntrinsicNewStringFromBytes:
+      return backend->GenInlinedStringFactoryNewStringFromBytes(info);
+    case kIntrinsicNewStringFromChars:
+      return backend->GenInlinedStringFactoryNewStringFromChars(info);
+    case kIntrinsicNewStringFromString:
+      return backend->GenInlinedStringFactoryNewStringFromString(info);
     case kIntrinsicCurrentThread:
       return backend->GenInlinedCurrentThread(info);
     case kIntrinsicPeek:
@@ -574,6 +672,8 @@
       move_result = mir_graph->FindMoveResult(bb, invoke);
       result = GenInlineIPut(mir_graph, bb, invoke, move_result, method);
       break;
+    case kInlineStringInit:
+      return false;
     default:
       LOG(FATAL) << "Unexpected inline op: " << method.opcode;
       break;
@@ -921,4 +1021,21 @@
   return true;
 }
 
+uint32_t DexFileMethodInliner::GetOffsetForStringInit(uint32_t method_index, size_t pointer_size) {
+  ReaderMutexLock mu(Thread::Current(), lock_);
+  auto it = inline_methods_.find(method_index);
+  if (it != inline_methods_.end() && (it->second.opcode == kInlineStringInit)) {
+    uint32_t string_init_base_offset = Thread::QuickEntryPointOffsetWithSize(
+              OFFSETOF_MEMBER(QuickEntryPoints, pNewEmptyString), pointer_size);
+    return string_init_base_offset + it->second.d.data * pointer_size;
+  }
+  return 0;
+}
+
+bool DexFileMethodInliner::IsStringInitMethodIndex(uint32_t method_index) {
+  ReaderMutexLock mu(Thread::Current(), lock_);
+  auto it = inline_methods_.find(method_index);
+  return (it != inline_methods_.end()) && (it->second.opcode == kInlineStringInit);
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h
index d1e5621..26b41bf 100644
--- a/compiler/dex/quick/dex_file_method_inliner.h
+++ b/compiler/dex/quick/dex_file_method_inliner.h
@@ -96,6 +96,17 @@
         LOCKS_EXCLUDED(lock_);
 
     /**
+     * Gets the thread pointer entrypoint offset for a string init method index and pointer size.
+     */
+    uint32_t GetOffsetForStringInit(uint32_t method_index, size_t pointer_size)
+        LOCKS_EXCLUDED(lock_);
+
+    /**
+     * Check whether a particular method index is a string init.
+     */
+    bool IsStringInitMethodIndex(uint32_t method_index) LOCKS_EXCLUDED(lock_);
+
+    /**
      * To avoid multiple lookups of a class by its descriptor, we cache its
      * type index in the IndexCache. These are the indexes into the IndexCache
      * class_indexes array.
@@ -111,9 +122,15 @@
       kClassCacheFloat,
       kClassCacheDouble,
       kClassCacheVoid,
+      kClassCacheJavaLangByteArray,
+      kClassCacheJavaLangCharArray,
+      kClassCacheJavaLangIntArray,
       kClassCacheJavaLangObject,
       kClassCacheJavaLangRefReference,
       kClassCacheJavaLangString,
+      kClassCacheJavaLangStringBuffer,
+      kClassCacheJavaLangStringBuilder,
+      kClassCacheJavaLangStringFactory,
       kClassCacheJavaLangDouble,
       kClassCacheJavaLangFloat,
       kClassCacheJavaLangInteger,
@@ -122,10 +139,10 @@
       kClassCacheJavaLangMath,
       kClassCacheJavaLangStrictMath,
       kClassCacheJavaLangThread,
+      kClassCacheJavaNioCharsetCharset,
       kClassCacheLibcoreIoMemory,
       kClassCacheSunMiscUnsafe,
       kClassCacheJavaLangSystem,
-      kClassCacheJavaLangCharArray,
       kClassCacheLast
     };
 
@@ -153,9 +170,14 @@
       kNameCacheReferenceGetReferent,
       kNameCacheCharAt,
       kNameCacheCompareTo,
+      kNameCacheGetCharsNoCheck,
       kNameCacheIsEmpty,
       kNameCacheIndexOf,
       kNameCacheLength,
+      kNameCacheInit,
+      kNameCacheNewStringFromBytes,
+      kNameCacheNewStringFromChars,
+      kNameCacheNewStringFromString,
       kNameCacheCurrentThread,
       kNameCachePeekByte,
       kNameCachePeekIntNative,
@@ -230,6 +252,26 @@
       kProtoCacheObjectJ_Object,
       kProtoCacheObjectJObject_V,
       kProtoCacheCharArrayICharArrayII_V,
+      kProtoCacheIICharArrayI_V,
+      kProtoCacheByteArrayIII_String,
+      kProtoCacheIICharArray_String,
+      kProtoCacheString_String,
+      kProtoCache_V,
+      kProtoCacheByteArray_V,
+      kProtoCacheByteArrayI_V,
+      kProtoCacheByteArrayII_V,
+      kProtoCacheByteArrayIII_V,
+      kProtoCacheByteArrayIIString_V,
+      kProtoCacheByteArrayString_V,
+      kProtoCacheByteArrayIICharset_V,
+      kProtoCacheByteArrayCharset_V,
+      kProtoCacheCharArray_V,
+      kProtoCacheCharArrayII_V,
+      kProtoCacheIICharArray_V,
+      kProtoCacheIntArrayII_V,
+      kProtoCacheString_V,
+      kProtoCacheStringBuffer_V,
+      kProtoCacheStringBuilder_V,
       kProtoCacheLast
     };
 
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 1eb3a5f..ab011fc 100755
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -375,6 +375,18 @@
   CallHelper(r_tgt, trampoline, safepoint_pc);
 }
 
+void Mir2Lir::CallRuntimeHelperRegLocationRegLocationRegLocationRegLocation(
+    QuickEntrypointEnum trampoline, RegLocation arg0, RegLocation arg1, RegLocation arg2,
+    RegLocation arg3, bool safepoint_pc) {
+  RegStorage r_tgt = CallHelperSetup(trampoline);
+  LoadValueDirectFixed(arg0, TargetReg(kArg0, arg0));
+  LoadValueDirectFixed(arg1, TargetReg(kArg1, arg1));
+  LoadValueDirectFixed(arg2, TargetReg(kArg2, arg2));
+  LoadValueDirectFixed(arg3, TargetReg(kArg3, arg3));
+  ClobberCallerSave();
+  CallHelper(r_tgt, trampoline, safepoint_pc);
+}
+
 /*
  * If there are any ins passed in registers that have not been promoted
  * to a callee-save register, flush them to the frame.  Perform initial
@@ -966,14 +978,10 @@
 }
 
 bool Mir2Lir::GenInlinedCharAt(CallInfo* info) {
-  // Location of reference to data array
+  // Location of char array data
   int value_offset = mirror::String::ValueOffset().Int32Value();
   // Location of count
   int count_offset = mirror::String::CountOffset().Int32Value();
-  // Starting offset within data array
-  int offset_offset = mirror::String::OffsetOffset().Int32Value();
-  // Start of char data with array_
-  int data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value();
 
   RegLocation rl_obj = info->args[0];
   RegLocation rl_idx = info->args[1];
@@ -983,38 +991,21 @@
   GenNullCheck(rl_obj.reg, info->opt_flags);
   bool range_check = (!(info->opt_flags & MIR_IGNORE_RANGE_CHECK));
   LIR* range_check_branch = nullptr;
-  RegStorage reg_off;
-  RegStorage reg_ptr;
-  reg_off = AllocTemp();
-  reg_ptr = AllocTempRef();
   if (range_check) {
     reg_max = AllocTemp();
     Load32Disp(rl_obj.reg, count_offset, reg_max);
     MarkPossibleNullPointerException(info->opt_flags);
-  }
-  Load32Disp(rl_obj.reg, offset_offset, reg_off);
-  MarkPossibleNullPointerException(info->opt_flags);
-  LoadRefDisp(rl_obj.reg, value_offset, reg_ptr, kNotVolatile);
-  if (range_check) {
-    // Set up a slow path to allow retry in case of bounds violation */
+    // Set up a slow path to allow retry in case of bounds violation
     OpRegReg(kOpCmp, rl_idx.reg, reg_max);
     FreeTemp(reg_max);
     range_check_branch = OpCondBranch(kCondUge, nullptr);
   }
-  OpRegImm(kOpAdd, reg_ptr, data_offset);
-  if (rl_idx.is_const) {
-    OpRegImm(kOpAdd, reg_off, mir_graph_->ConstantValue(rl_idx.orig_sreg));
-  } else {
-    OpRegReg(kOpAdd, reg_off, rl_idx.reg);
-  }
+  RegStorage reg_ptr = AllocTempRef();
+  OpRegRegImm(kOpAdd, reg_ptr, rl_obj.reg, value_offset);
   FreeTemp(rl_obj.reg);
-  if (rl_idx.location == kLocPhysReg) {
-    FreeTemp(rl_idx.reg);
-  }
   RegLocation rl_dest = InlineTarget(info);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  LoadBaseIndexed(reg_ptr, reg_off, rl_result.reg, 1, kUnsignedHalf);
-  FreeTemp(reg_off);
+  LoadBaseIndexed(reg_ptr, rl_idx.reg, rl_result.reg, 1, kUnsignedHalf);
   FreeTemp(reg_ptr);
   StoreValue(rl_dest, rl_result);
   if (range_check) {
@@ -1025,6 +1016,59 @@
   return true;
 }
 
+bool Mir2Lir::GenInlinedStringGetCharsNoCheck(CallInfo* info) {
+  if (cu_->instruction_set == kMips) {
+    // TODO - add Mips implementation
+    return false;
+  }
+  size_t char_component_size = Primitive::ComponentSize(Primitive::kPrimChar);
+  // Location of data in char array buffer
+  int data_offset = mirror::Array::DataOffset(char_component_size).Int32Value();
+  // Location of char array data in string
+  int value_offset = mirror::String::ValueOffset().Int32Value();
+
+  RegLocation rl_obj = info->args[0];
+  RegLocation rl_start = info->args[1];
+  RegLocation rl_end = info->args[2];
+  RegLocation rl_buffer = info->args[3];
+  RegLocation rl_index = info->args[4];
+
+  ClobberCallerSave();
+  LockCallTemps();  // Using fixed registers
+  RegStorage reg_dst_ptr = TargetReg(kArg0, kRef);
+  RegStorage reg_src_ptr = TargetReg(kArg1, kRef);
+  RegStorage reg_length = TargetReg(kArg2, kNotWide);
+  RegStorage reg_tmp = TargetReg(kArg3, kNotWide);
+  RegStorage reg_tmp_ptr = RegStorage(RegStorage::k64BitSolo, reg_tmp.GetRawBits() & RegStorage::kRegTypeMask);
+
+  LoadValueDirectFixed(rl_buffer, reg_dst_ptr);
+  OpRegImm(kOpAdd, reg_dst_ptr, data_offset);
+  LoadValueDirectFixed(rl_index, reg_tmp);
+  OpRegRegImm(kOpLsl, reg_tmp, reg_tmp, 1);
+  OpRegReg(kOpAdd, reg_dst_ptr, cu_->instruction_set == kArm64 ? reg_tmp_ptr : reg_tmp);
+
+  LoadValueDirectFixed(rl_start, reg_tmp);
+  LoadValueDirectFixed(rl_end, reg_length);
+  OpRegReg(kOpSub, reg_length, reg_tmp);
+  OpRegRegImm(kOpLsl, reg_length, reg_length, 1);
+  LoadValueDirectFixed(rl_obj, reg_src_ptr);
+
+  OpRegImm(kOpAdd, reg_src_ptr, value_offset);
+  OpRegRegImm(kOpLsl, reg_tmp, reg_tmp, 1);
+  OpRegReg(kOpAdd, reg_src_ptr, cu_->instruction_set == kArm64 ? reg_tmp_ptr : reg_tmp);
+
+  RegStorage r_tgt;
+  if (cu_->instruction_set != kX86 && cu_->instruction_set != kX86_64) {
+    r_tgt = LoadHelper(kQuickMemcpy);
+  } else {
+    r_tgt = RegStorage::InvalidReg();
+  }
+  // NOTE: not a safepoint
+  CallHelper(r_tgt, kQuickMemcpy, false, true);
+
+  return true;
+}
+
 // Generates an inlined String.is_empty or String.length.
 bool Mir2Lir::GenInlinedStringIsEmptyOrLength(CallInfo* info, bool is_empty) {
   if (cu_->instruction_set == kMips || cu_->instruction_set == kMips64) {
@@ -1058,6 +1102,58 @@
   return true;
 }
 
+bool Mir2Lir::GenInlinedStringFactoryNewStringFromBytes(CallInfo* info) {
+  if (cu_->instruction_set == kMips) {
+    // TODO - add Mips implementation
+    return false;
+  }
+  RegLocation rl_data = info->args[0];
+  RegLocation rl_high = info->args[1];
+  RegLocation rl_offset = info->args[2];
+  RegLocation rl_count = info->args[3];
+  rl_data = LoadValue(rl_data, kRefReg);
+  LIR* data_null_check_branch = OpCmpImmBranch(kCondEq, rl_data.reg, 0, nullptr);
+  AddIntrinsicSlowPath(info, data_null_check_branch);
+  CallRuntimeHelperRegLocationRegLocationRegLocationRegLocation(
+      kQuickAllocStringFromBytes, rl_data, rl_high, rl_offset, rl_count, true);
+  RegLocation rl_return = GetReturn(kRefReg);
+  RegLocation rl_dest = InlineTarget(info);
+  StoreValue(rl_dest, rl_return);
+  return true;
+}
+
+bool Mir2Lir::GenInlinedStringFactoryNewStringFromChars(CallInfo* info) {
+  if (cu_->instruction_set == kMips) {
+    // TODO - add Mips implementation
+    return false;
+  }
+  RegLocation rl_offset = info->args[0];
+  RegLocation rl_count = info->args[1];
+  RegLocation rl_data = info->args[2];
+  CallRuntimeHelperRegLocationRegLocationRegLocation(
+      kQuickAllocStringFromChars, rl_offset, rl_count, rl_data, true);
+  RegLocation rl_return = GetReturn(kRefReg);
+  RegLocation rl_dest = InlineTarget(info);
+  StoreValue(rl_dest, rl_return);
+  return true;
+}
+
+bool Mir2Lir::GenInlinedStringFactoryNewStringFromString(CallInfo* info) {
+  if (cu_->instruction_set == kMips) {
+    // TODO - add Mips implementation
+    return false;
+  }
+  RegLocation rl_string = info->args[0];
+  rl_string = LoadValue(rl_string, kRefReg);
+  LIR* string_null_check_branch = OpCmpImmBranch(kCondEq, rl_string.reg, 0, nullptr);
+  AddIntrinsicSlowPath(info, string_null_check_branch);
+  CallRuntimeHelperRegLocation(kQuickAllocStringFromString, rl_string, true);
+  RegLocation rl_return = GetReturn(kRefReg);
+  RegLocation rl_dest = InlineTarget(info);
+  StoreValue(rl_dest, rl_return);
+  return true;
+}
+
 bool Mir2Lir::GenInlinedReverseBytes(CallInfo* info, OpSize size) {
   if (cu_->instruction_set == kMips || cu_->instruction_set == kMips64) {
     // TODO: add Mips and Mips64 implementations.
@@ -1451,9 +1547,22 @@
   LockCallTemps();
 
   const MirMethodLoweringInfo& method_info = mir_graph_->GetMethodLoweringInfo(info->mir);
+  MethodReference target_method = method_info.GetTargetMethod();
   cu_->compiler_driver->ProcessedInvoke(method_info.GetInvokeType(), method_info.StatsFlags());
   InvokeType original_type = static_cast<InvokeType>(method_info.GetInvokeType());
   info->type = method_info.GetSharpType();
+  bool is_string_init = false;
+  if (method_info.IsSpecial()) {
+    DexFileMethodInliner* inliner = cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(
+        target_method.dex_file);
+    if (inliner->IsStringInitMethodIndex(target_method.dex_method_index)) {
+      is_string_init = true;
+      size_t pointer_size = GetInstructionSetPointerSize(cu_->instruction_set);
+      info->string_init_offset = inliner->GetOffsetForStringInit(target_method.dex_method_index,
+                                                                 pointer_size);
+      info->type = kStatic;
+    }
+  }
   bool fast_path = method_info.FastPath();
   bool skip_this;
 
@@ -1478,7 +1587,6 @@
     next_call_insn = fast_path ? NextVCallInsn : NextVCallInsnSP;
     skip_this = fast_path;
   }
-  MethodReference target_method = method_info.GetTargetMethod();
   call_state = GenDalvikArgs(info, call_state, p_null_ck,
                              next_call_insn, target_method, method_info.VTableIndex(),
                              method_info.DirectCode(), method_info.DirectMethod(),
@@ -1495,7 +1603,7 @@
   FreeCallTemps();
   if (info->result.location != kLocInvalid) {
     // We have a following MOVE_RESULT - do it now.
-    RegisterClass reg_class =
+    RegisterClass reg_class = is_string_init ? kRefReg :
         ShortyToRegClass(mir_graph_->GetShortyFromMethodReference(info->method_ref)[0]);
     if (info->result.wide) {
       RegLocation ret_loc = GetReturnWide(reg_class);
diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc
index 39b9cc7..3d25384 100644
--- a/compiler/dex/quick/mips/call_mips.cc
+++ b/compiler/dex/quick/mips/call_mips.cc
@@ -20,7 +20,9 @@
 
 #include "base/logging.h"
 #include "dex/mir_graph.h"
+#include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "dex/quick/mir_to_lir-inl.h"
+#include "driver/compiler_driver.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "gc/accounting/card_table.h"
 #include "mips_lir.h"
@@ -397,11 +399,28 @@
  * Bit of a hack here - in the absence of a real scheduling pass,
  * emit the next instruction in static & direct invoke sequences.
  */
-static int NextSDCallInsn(CompilationUnit* cu, CallInfo* info ATTRIBUTE_UNUSED, int state,
+static int NextSDCallInsn(CompilationUnit* cu, CallInfo* info, int state,
                           const MethodReference& target_method, uint32_t, uintptr_t direct_code,
                           uintptr_t direct_method, InvokeType type) {
   Mir2Lir* cg = static_cast<Mir2Lir*>(cu->cg.get());
-  if (direct_code != 0 && direct_method != 0) {
+  if (info->string_init_offset != 0) {
+    RegStorage arg0_ref = cg->TargetReg(kArg0, kRef);
+    switch (state) {
+    case 0: {  // Grab target method* from thread pointer
+      cg->LoadRefDisp(cg->TargetPtrReg(kSelf), info->string_init_offset, arg0_ref, kNotVolatile);
+      break;
+    }
+    case 1:  // Grab the code from the method*
+      if (direct_code == 0) {
+        int32_t offset = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+            InstructionSetPointerSize(cu->instruction_set)).Int32Value();
+        cg->LoadWordDisp(arg0_ref, offset, cg->TargetPtrReg(kInvokeTgt));
+      }
+      break;
+    default:
+      return -1;
+    }
+  } else if (direct_code != 0 && direct_method != 0) {
     switch (state) {
       case 0:  // Get the current Method* [sets kArg0]
         if (direct_code != static_cast<uintptr_t>(-1)) {
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 6f227fc..4fdc728 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -897,6 +897,10 @@
                                                             RegLocation arg0, RegLocation arg1,
                                                             RegLocation arg2,
                                                             bool safepoint_pc);
+    void CallRuntimeHelperRegLocationRegLocationRegLocationRegLocation(
+        QuickEntrypointEnum trampoline, RegLocation arg0, RegLocation arg1,
+        RegLocation arg2, RegLocation arg3, bool safepoint_pc);
+
     void GenInvoke(CallInfo* info);
     void GenInvokeNoInline(CallInfo* info);
     virtual NextCallInsn GetNextSDCallInsn() = 0;
@@ -937,7 +941,11 @@
 
     bool GenInlinedReferenceGetReferent(CallInfo* info);
     virtual bool GenInlinedCharAt(CallInfo* info);
+    bool GenInlinedStringGetCharsNoCheck(CallInfo* info);
     bool GenInlinedStringIsEmptyOrLength(CallInfo* info, bool is_empty);
+    bool GenInlinedStringFactoryNewStringFromBytes(CallInfo* info);
+    bool GenInlinedStringFactoryNewStringFromChars(CallInfo* info);
+    bool GenInlinedStringFactoryNewStringFromString(CallInfo* info);
     virtual bool GenInlinedReverseBits(CallInfo* info, OpSize size);
     bool GenInlinedReverseBytes(CallInfo* info, OpSize size);
     virtual bool GenInlinedAbsInt(CallInfo* info);
@@ -1459,26 +1467,6 @@
       return InexpensiveConstantInt(value);
     }
 
-    /**
-     * @brief Whether division by the given divisor can be converted to multiply by its reciprocal.
-     * @param divisor A constant divisor bits of float type.
-     * @return Returns true iff, x/divisor == x*(1.0f/divisor), for every float x.
-     */
-    bool CanDivideByReciprocalMultiplyFloat(int32_t divisor) {
-      // True, if float value significand bits are 0.
-      return ((divisor & 0x7fffff) == 0);
-    }
-
-    /**
-     * @brief Whether division by the given divisor can be converted to multiply by its reciprocal.
-     * @param divisor A constant divisor bits of double type.
-     * @return Returns true iff, x/divisor == x*(1.0/divisor), for every double x.
-     */
-    bool CanDivideByReciprocalMultiplyDouble(int64_t divisor) {
-      // True, if double value significand bits are 0.
-      return ((divisor & ((UINT64_C(1) << 52) - 1)) == 0);
-    }
-
     // May be optimized by targets.
     virtual void GenMonitorEnter(int opt_flags, RegLocation rl_src);
     virtual void GenMonitorExit(int opt_flags, RegLocation rl_src);
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index e2364d8..2495757 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -19,6 +19,7 @@
 #include "codegen_x86.h"
 
 #include "base/logging.h"
+#include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "dex/quick/mir_to_lir-inl.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
@@ -343,11 +344,20 @@
 int X86Mir2Lir::X86NextSDCallInsn(CompilationUnit* cu, CallInfo* info,
                                   int state, const MethodReference& target_method,
                                   uint32_t,
-                                  uintptr_t direct_code, uintptr_t direct_method,
+                                  uintptr_t direct_code ATTRIBUTE_UNUSED, uintptr_t direct_method,
                                   InvokeType type) {
-  UNUSED(info, direct_code);
   X86Mir2Lir* cg = static_cast<X86Mir2Lir*>(cu->cg.get());
-  if (direct_method != 0) {
+  if (info->string_init_offset != 0) {
+    RegStorage arg0_ref = cg->TargetReg(kArg0, kRef);
+    switch (state) {
+    case 0: {  // Grab target method* from thread pointer
+      cg->NewLIR2(kX86Mov32RT, arg0_ref.GetReg(), info->string_init_offset);
+      break;
+    }
+    default:
+      return -1;
+    }
+  } else if (direct_method != 0) {
     switch (state) {
     case 0:  // Get the current Method* [sets kArg0]
       if (direct_method != static_cast<uintptr_t>(-1)) {
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index b460379..2f211da 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -1302,10 +1302,6 @@
   int value_offset = mirror::String::ValueOffset().Int32Value();
   // Location of count within the String object.
   int count_offset = mirror::String::CountOffset().Int32Value();
-  // Starting offset within data array.
-  int offset_offset = mirror::String::OffsetOffset().Int32Value();
-  // Start of char data with array_.
-  int data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value();
 
   // Compute the number of words to search in to rCX.
   Load32Disp(rs_rDX, count_offset, rs_rCX);
@@ -1388,15 +1384,13 @@
 
   // Load the address of the string into EDI.
   // In case of start index we have to add the address to existing value in EDI.
-  // The string starts at VALUE(String) + 2 * OFFSET(String) + DATA_OFFSET.
   if (zero_based || (!zero_based && rl_start.is_const && start_value == 0)) {
-    Load32Disp(rs_rDX, offset_offset, rs_rDI);
+    OpRegRegImm(kOpAdd, rs_rDI, rs_rDX, value_offset);
   } else {
-    OpRegMem(kOpAdd, rs_rDI, rs_rDX, offset_offset);
+    OpRegImm(kOpLsl, rs_rDI, 1);
+    OpRegReg(kOpAdd, rs_rDI, rs_rDX);
+    OpRegImm(kOpAdd, rs_rDI, value_offset);
   }
-  OpRegImm(kOpLsl, rs_rDI, 1);
-  OpRegMem(kOpAdd, rs_rDI, rs_rDX, value_offset);
-  OpRegImm(kOpAdd, rs_rDI, data_offset);
 
   // EDI now contains the start of the string to be searched.
   // We are all prepared to do the search for the character.
@@ -2423,24 +2417,15 @@
   int value_offset = mirror::String::ValueOffset().Int32Value();
   // Location of count
   int count_offset = mirror::String::CountOffset().Int32Value();
-  // Starting offset within data array
-  int offset_offset = mirror::String::OffsetOffset().Int32Value();
-  // Start of char data with array_
-  int data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value();
 
   RegLocation rl_obj = info->args[0];
   RegLocation rl_idx = info->args[1];
   rl_obj = LoadValue(rl_obj, kRefReg);
-  // X86 wants to avoid putting a constant index into a register.
-  if (!rl_idx.is_const) {
-    rl_idx = LoadValue(rl_idx, kCoreReg);
-  }
+  rl_idx = LoadValue(rl_idx, kCoreReg);
   RegStorage reg_max;
   GenNullCheck(rl_obj.reg, info->opt_flags);
   bool range_check = (!(info->opt_flags & MIR_IGNORE_RANGE_CHECK));
   LIR* range_check_branch = nullptr;
-  RegStorage reg_off;
-  RegStorage reg_ptr;
   if (range_check) {
     // On x86, we can compare to memory directly
     // Set up a launch pad to allow retry in case of bounds violation */
@@ -2456,24 +2441,11 @@
       range_check_branch = OpCondBranch(kCondUge, nullptr);
     }
   }
-  reg_off = AllocTemp();
-  reg_ptr = AllocTempRef();
-  Load32Disp(rl_obj.reg, offset_offset, reg_off);
-  LoadRefDisp(rl_obj.reg, value_offset, reg_ptr, kNotVolatile);
-  if (rl_idx.is_const) {
-    OpRegImm(kOpAdd, reg_off, mir_graph_->ConstantValue(rl_idx.orig_sreg));
-  } else {
-    OpRegReg(kOpAdd, reg_off, rl_idx.reg);
-  }
-  FreeTemp(rl_obj.reg);
-  if (rl_idx.location == kLocPhysReg) {
-    FreeTemp(rl_idx.reg);
-  }
   RegLocation rl_dest = InlineTarget(info);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  LoadBaseIndexedDisp(reg_ptr, reg_off, 1, data_offset, rl_result.reg, kUnsignedHalf);
-  FreeTemp(reg_off);
-  FreeTemp(reg_ptr);
+  LoadBaseIndexedDisp(rl_obj.reg, rl_idx.reg, 1, value_offset, rl_result.reg, kUnsignedHalf);
+  FreeTemp(rl_idx.reg);
+  FreeTemp(rl_obj.reg);
   StoreValue(rl_dest, rl_result);
   if (range_check) {
     DCHECK(range_check_branch != nullptr);
diff --git a/compiler/dex/verified_method.cc b/compiler/dex/verified_method.cc
index 7eba515..e788261 100644
--- a/compiler/dex/verified_method.cc
+++ b/compiler/dex/verified_method.cc
@@ -64,6 +64,9 @@
   if (method_verifier->HasCheckCasts()) {
     verified_method->GenerateSafeCastSet(method_verifier);
   }
+
+  verified_method->SetStringInitPcRegMap(method_verifier->GetStringInitPcRegMap());
+
   return verified_method.release();
 }
 
diff --git a/compiler/dex/verified_method.h b/compiler/dex/verified_method.h
index ad07639..242e3df 100644
--- a/compiler/dex/verified_method.h
+++ b/compiler/dex/verified_method.h
@@ -75,6 +75,13 @@
     return has_verification_failures_;
   }
 
+  void SetStringInitPcRegMap(SafeMap<uint32_t, std::set<uint32_t>>& string_init_pc_reg_map) {
+    string_init_pc_reg_map_ = string_init_pc_reg_map;
+  }
+  const SafeMap<uint32_t, std::set<uint32_t>>& GetStringInitPcRegMap() const {
+    return string_init_pc_reg_map_;
+  }
+
  private:
   VerifiedMethod() = default;
 
@@ -114,6 +121,10 @@
   SafeCastSet safe_cast_set_;
 
   bool has_verification_failures_;
+
+  // Copy of mapping generated by verifier of dex PCs of string init invocations
+  // to the set of other registers that the receiver has been copied into.
+  SafeMap<uint32_t, std::set<uint32_t>> string_init_pc_reg_map_;
 };
 
 }  // namespace art
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index c858326..47288b5 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -40,6 +40,7 @@
 #include "dex/verification_results.h"
 #include "dex/verified_method.h"
 #include "dex/quick/dex_file_method_inliner.h"
+#include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "driver/compiler_options.h"
 #include "elf_writer_quick.h"
 #include "jni_internal.h"
@@ -2485,4 +2486,16 @@
   return oss.str();
 }
 
+bool CompilerDriver::IsStringTypeIndex(uint16_t type_index, const DexFile* dex_file) {
+  const char* type = dex_file->GetTypeDescriptor(dex_file->GetTypeId(type_index));
+  return strcmp(type, "Ljava/lang/String;") == 0;
+}
+
+bool CompilerDriver::IsStringInit(uint32_t method_index, const DexFile* dex_file, int32_t* offset) {
+  DexFileMethodInliner* inliner = GetMethodInlinerMap()->GetMethodInliner(dex_file);
+  size_t pointer_size = InstructionSetPointerSize(GetInstructionSet());
+  *offset = inliner->GetOffsetForStringInit(method_index, pointer_size);
+  return inliner->IsStringInitMethodIndex(method_index);
+}
+
 }  // namespace art
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 02de11e..2b0985a 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -466,6 +466,9 @@
   // Get memory usage during compilation.
   std::string GetMemoryUsageString(bool extended) const;
 
+  bool IsStringTypeIndex(uint16_t type_index, const DexFile* dex_file);
+  bool IsStringInit(uint32_t method_index, const DexFile* dex_file, int32_t* offset);
+
   void SetHadHardVerifierFailure() {
     had_hard_verifier_failure_ = true;
   }
diff --git a/compiler/dwarf/dwarf_test.h b/compiler/dwarf/dwarf_test.h
index 99b8e79..230ebe3 100644
--- a/compiler/dwarf/dwarf_test.h
+++ b/compiler/dwarf/dwarf_test.h
@@ -57,44 +57,41 @@
 
   // Pretty-print the generated DWARF data using objdump.
   template<typename ElfTypes>
-  std::vector<std::string> Objdump(bool is64bit, const char* args) {
+  std::vector<std::string> Objdump(const char* args) {
     // Write simple elf file with just the DWARF sections.
+    InstructionSet isa = (sizeof(typename ElfTypes::Addr) == 8) ? kX86_64 : kX86;
     class NoCode : public CodeOutput {
-      virtual void SetCodeOffset(size_t) { }
-      virtual bool Write(OutputStream*) { return true; }
-    } code;
-    ScratchFile file;
-    InstructionSet isa = is64bit ? kX86_64 : kX86;
-    ElfBuilder<ElfTypes> builder(
-        &code, file.GetFile(), isa, 0, 0, 0, 0, 0, 0, false, false);
-    typedef ElfRawSectionBuilder<ElfTypes> Section;
-    Section debug_info(".debug_info", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
-    Section debug_abbrev(".debug_abbrev", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
-    Section debug_str(".debug_str", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
-    Section debug_line(".debug_line", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
-    Section eh_frame(".eh_frame", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, 4, 0);
+      bool Write(OutputStream*) OVERRIDE { return true; }  // NOLINT
+    } no_code;
+    ElfBuilder<ElfTypes> builder(isa, 0, &no_code, 0, &no_code, 0);
+    typedef typename ElfBuilder<ElfTypes>::RawSection RawSection;
+    RawSection debug_info(".debug_info", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
+    RawSection debug_abbrev(".debug_abbrev", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
+    RawSection debug_str(".debug_str", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
+    RawSection debug_line(".debug_line", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
+    RawSection eh_frame(".eh_frame", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0);
     if (!debug_info_data_.empty()) {
       debug_info.SetBuffer(debug_info_data_);
-      builder.RegisterRawSection(&debug_info);
+      builder.RegisterSection(&debug_info);
     }
     if (!debug_abbrev_data_.empty()) {
       debug_abbrev.SetBuffer(debug_abbrev_data_);
-      builder.RegisterRawSection(&debug_abbrev);
+      builder.RegisterSection(&debug_abbrev);
     }
     if (!debug_str_data_.empty()) {
       debug_str.SetBuffer(debug_str_data_);
-      builder.RegisterRawSection(&debug_str);
+      builder.RegisterSection(&debug_str);
     }
     if (!debug_line_data_.empty()) {
       debug_line.SetBuffer(debug_line_data_);
-      builder.RegisterRawSection(&debug_line);
+      builder.RegisterSection(&debug_line);
     }
     if (!eh_frame_data_.empty()) {
       eh_frame.SetBuffer(eh_frame_data_);
-      builder.RegisterRawSection(&eh_frame);
+      builder.RegisterSection(&eh_frame);
     }
-    builder.Init();
-    builder.Write();
+    ScratchFile file;
+    builder.Write(file.GetFile());
 
     // Read the elf file back using objdump.
     std::vector<std::string> lines;
@@ -123,9 +120,9 @@
 
   std::vector<std::string> Objdump(bool is64bit, const char* args) {
     if (is64bit) {
-      return Objdump<ElfTypes64>(is64bit, args);
+      return Objdump<ElfTypes64>(args);
     } else {
-      return Objdump<ElfTypes32>(is64bit, args);
+      return Objdump<ElfTypes32>(args);
     }
   }
 
diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h
index 32c8cce..07976e8 100644
--- a/compiler/elf_builder.h
+++ b/compiler/elf_builder.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 The Android Open Source Project
+ * Copyright (C) 2015 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,535 +17,32 @@
 #ifndef ART_COMPILER_ELF_BUILDER_H_
 #define ART_COMPILER_ELF_BUILDER_H_
 
+#include <vector>
+
 #include "arch/instruction_set.h"
-#include "base/stl_util.h"
-#include "base/value_object.h"
+#include "base/unix_file/fd_file.h"
 #include "buffered_output_stream.h"
 #include "elf_utils.h"
 #include "file_output_stream.h"
 
 namespace art {
 
-template <typename ElfTypes>
-class ElfSectionBuilder : public ValueObject {
- public:
-  using Elf_Word = typename ElfTypes::Word;
-  using Elf_Shdr = typename ElfTypes::Shdr;
-
-  ElfSectionBuilder(const std::string& sec_name, Elf_Word type, Elf_Word flags,
-                    const ElfSectionBuilder<ElfTypes> *link, Elf_Word info,
-                    Elf_Word align, Elf_Word entsize)
-      : section_index_(0), name_(sec_name), link_(link) {
-    memset(&section_, 0, sizeof(section_));
-    section_.sh_type = type;
-    section_.sh_flags = flags;
-    section_.sh_info = info;
-    section_.sh_addralign = align;
-    section_.sh_entsize = entsize;
-  }
-  ElfSectionBuilder(const ElfSectionBuilder&) = default;
-
-  ~ElfSectionBuilder() {}
-
-  Elf_Word GetLink() const {
-    return (link_ != nullptr) ? link_->section_index_ : 0;
-  }
-
-  const Elf_Shdr* GetSection() const {
-    return &section_;
-  }
-
-  Elf_Shdr* GetSection() {
-    return &section_;
-  }
-
-  Elf_Word GetSectionIndex() const {
-    return section_index_;
-  }
-
-  void SetSectionIndex(Elf_Word section_index) {
-    section_index_ = section_index;
-  }
-
-  const std::string& GetName() const {
-    return name_;
-  }
-
- private:
-  Elf_Shdr section_;
-  Elf_Word section_index_;
-  const std::string name_;
-  const ElfSectionBuilder* const link_;
-};
-
-template <typename ElfTypes>
-class ElfDynamicBuilder FINAL : public ElfSectionBuilder<ElfTypes> {
- public:
-  using Elf_Word = typename ElfTypes::Word;
-  using Elf_Sword = typename ElfTypes::Sword;
-  using Elf_Shdr = typename ElfTypes::Shdr;
-  using Elf_Dyn = typename ElfTypes::Dyn;
-
-  void AddDynamicTag(Elf_Sword tag, Elf_Word d_un) {
-    if (tag == DT_NULL) {
-      return;
-    }
-    dynamics_.push_back({nullptr, tag, d_un});
-  }
-
-  void AddDynamicTag(Elf_Sword tag, Elf_Word d_un,
-                     const ElfSectionBuilder<ElfTypes>* section) {
-    if (tag == DT_NULL) {
-      return;
-    }
-    dynamics_.push_back({section, tag, d_un});
-  }
-
-  ElfDynamicBuilder(const std::string& sec_name,
-                    ElfSectionBuilder<ElfTypes> *link)
-  : ElfSectionBuilder<ElfTypes>(sec_name, SHT_DYNAMIC, SHF_ALLOC | SHF_ALLOC,
-                                link, 0, kPageSize, sizeof(Elf_Dyn)) {}
-  ~ElfDynamicBuilder() {}
-
-  Elf_Word GetSize() const {
-    // Add 1 for the DT_NULL, 1 for DT_STRSZ, and 1 for DT_SONAME. All of
-    // these must be added when we actually put the file together because
-    // their values are very dependent on state.
-    return dynamics_.size() + 3;
-  }
-
-  // Create the actual dynamic vector. strsz should be the size of the .dynstr
-  // table and soname_off should be the offset of the soname in .dynstr.
-  // Since niether can be found prior to final layout we will wait until here
-  // to add them.
-  std::vector<Elf_Dyn> GetDynamics(Elf_Word strsz, Elf_Word soname) const {
-    std::vector<Elf_Dyn> ret;
-    for (auto it = dynamics_.cbegin(); it != dynamics_.cend(); ++it) {
-      if (it->section_ != nullptr) {
-        // We are adding an address relative to a section.
-        ret.push_back(
-            {it->tag_, {it->off_ + it->section_->GetSection()->sh_addr}});
-      } else {
-        ret.push_back({it->tag_, {it->off_}});
-      }
-    }
-    ret.push_back({DT_STRSZ, {strsz}});
-    ret.push_back({DT_SONAME, {soname}});
-    ret.push_back({DT_NULL, {0}});
-    return ret;
-  }
-
- private:
-  struct ElfDynamicState {
-    const ElfSectionBuilder<ElfTypes>* section_;
-    Elf_Sword tag_;
-    Elf_Word off_;
-  };
-  std::vector<ElfDynamicState> dynamics_;
-};
-
-template <typename ElfTypes>
-class ElfRawSectionBuilder FINAL : public ElfSectionBuilder<ElfTypes> {
- public:
-  using Elf_Word = typename ElfTypes::Word;
-
-  ElfRawSectionBuilder(const std::string& sec_name, Elf_Word type, Elf_Word flags,
-                       const ElfSectionBuilder<ElfTypes>* link, Elf_Word info,
-                       Elf_Word align, Elf_Word entsize)
-    : ElfSectionBuilder<ElfTypes>(sec_name, type, flags, link, info, align, entsize) {
-  }
-  ElfRawSectionBuilder(const ElfRawSectionBuilder&) = default;
-
-  ~ElfRawSectionBuilder() {}
-
-  std::vector<uint8_t>* GetBuffer() {
-    return &buf_;
-  }
-
-  void SetBuffer(const std::vector<uint8_t>& buf) {
-    buf_ = buf;
-  }
-
- private:
-  std::vector<uint8_t> buf_;
-};
-
-template <typename ElfTypes>
-class ElfOatSectionBuilder FINAL : public ElfSectionBuilder<ElfTypes> {
- public:
-  using Elf_Word = typename ElfTypes::Word;
-
-  ElfOatSectionBuilder(const std::string& sec_name, Elf_Word size, Elf_Word offset,
-                       Elf_Word type, Elf_Word flags)
-    : ElfSectionBuilder<ElfTypes>(sec_name, type, flags, nullptr, 0, kPageSize, 0),
-      offset_(offset), size_(size) {
-  }
-
-  ~ElfOatSectionBuilder() {}
-
-  Elf_Word GetOffset() const {
-    return offset_;
-  }
-
-  Elf_Word GetSize() const {
-    return size_;
-  }
-
- private:
-  // Offset of the content within the file.
-  Elf_Word offset_;
-  // Size of the content within the file.
-  Elf_Word size_;
-};
-
-static inline constexpr uint8_t MakeStInfo(uint8_t binding, uint8_t type) {
-  return ((binding) << 4) + ((type) & 0xf);
-}
-
-// from bionic
-static inline unsigned elfhash(const char *_name) {
-  const unsigned char *name = (const unsigned char *) _name;
-  unsigned h = 0, g;
-
-  while (*name) {
-    h = (h << 4) + *name++;
-    g = h & 0xf0000000;
-    h ^= g;
-    h ^= g >> 24;
-  }
-  return h;
-}
-
-template <typename ElfTypes>
-class ElfSymtabBuilder FINAL : public ElfSectionBuilder<ElfTypes> {
- public:
-  using Elf_Addr = typename ElfTypes::Addr;
-  using Elf_Word = typename ElfTypes::Word;
-  using Elf_Sym = typename ElfTypes::Sym;
-
-  // Add a symbol with given name to this symtab. The symbol refers to
-  // 'relative_addr' within the given section and has the given attributes.
-  void AddSymbol(const std::string& name,
-                 const ElfSectionBuilder<ElfTypes>* section,
-                 Elf_Addr addr,
-                 bool is_relative,
-                 Elf_Word size,
-                 uint8_t binding,
-                 uint8_t type,
-                 uint8_t other = 0) {
-    CHECK(section);
-    ElfSymtabBuilder::ElfSymbolState state {name, section, addr, size, is_relative,
-                                            MakeStInfo(binding, type), other, 0};
-    symbols_.push_back(state);
-  }
-
-  ElfSymtabBuilder(const std::string& sec_name, Elf_Word type,
-                   const std::string& str_name, Elf_Word str_type, bool alloc)
-  : ElfSectionBuilder<ElfTypes>(sec_name, type, ((alloc) ? SHF_ALLOC : 0U),
-                                &strtab_, 0, sizeof(Elf_Word),
-                                sizeof(Elf_Sym)), str_name_(str_name),
-                                str_type_(str_type),
-                                strtab_(str_name,
-                                        str_type,
-                                        ((alloc) ? SHF_ALLOC : 0U),
-                                        nullptr, 0, 1, 1) {
-  }
-
-  ~ElfSymtabBuilder() {}
-
-  std::vector<Elf_Word> GenerateHashContents() const {
-    // Here is how The ELF hash table works.
-    // There are 3 arrays to worry about.
-    // * The symbol table where the symbol information is.
-    // * The bucket array which is an array of indexes into the symtab and chain.
-    // * The chain array which is also an array of indexes into the symtab and chain.
-    //
-    // Lets say the state is something like this.
-    // +--------+       +--------+      +-----------+
-    // | symtab |       | bucket |      |   chain   |
-    // |  null  |       | 1      |      | STN_UNDEF |
-    // | <sym1> |       | 4      |      | 2         |
-    // | <sym2> |       |        |      | 5         |
-    // | <sym3> |       |        |      | STN_UNDEF |
-    // | <sym4> |       |        |      | 3         |
-    // | <sym5> |       |        |      | STN_UNDEF |
-    // +--------+       +--------+      +-----------+
-    //
-    // The lookup process (in python psudocode) is
-    //
-    // def GetSym(name):
-    //     # NB STN_UNDEF == 0
-    //     indx = bucket[elfhash(name) % num_buckets]
-    //     while indx != STN_UNDEF:
-    //         if GetSymbolName(symtab[indx]) == name:
-    //             return symtab[indx]
-    //         indx = chain[indx]
-    //     return SYMBOL_NOT_FOUND
-    //
-    // Between bucket and chain arrays every symtab index must be present exactly
-    // once (except for STN_UNDEF, which must be present 1 + num_bucket times).
-
-    // Select number of buckets.
-    // This is essentially arbitrary.
-    Elf_Word nbuckets;
-    Elf_Word chain_size = GetSize();
-    if (symbols_.size() < 8) {
-      nbuckets = 2;
-    } else if (symbols_.size() < 32) {
-      nbuckets = 4;
-    } else if (symbols_.size() < 256) {
-      nbuckets = 16;
-    } else {
-      // Have about 32 ids per bucket.
-      nbuckets = RoundUp(symbols_.size()/32, 2);
-    }
-    std::vector<Elf_Word> hash;
-    hash.push_back(nbuckets);
-    hash.push_back(chain_size);
-    uint32_t bucket_offset = hash.size();
-    uint32_t chain_offset = bucket_offset + nbuckets;
-    hash.resize(hash.size() + nbuckets + chain_size, 0);
-
-    Elf_Word* buckets = hash.data() + bucket_offset;
-    Elf_Word* chain   = hash.data() + chain_offset;
-
-    // Set up the actual hash table.
-    for (Elf_Word i = 0; i < symbols_.size(); i++) {
-      // Add 1 since we need to have the null symbol that is not in the symbols
-      // list.
-      Elf_Word index = i + 1;
-      Elf_Word hash_val = static_cast<Elf_Word>(elfhash(symbols_[i].name_.c_str())) % nbuckets;
-      if (buckets[hash_val] == 0) {
-        buckets[hash_val] = index;
-      } else {
-        hash_val = buckets[hash_val];
-        CHECK_LT(hash_val, chain_size);
-        while (chain[hash_val] != 0) {
-          hash_val = chain[hash_val];
-          CHECK_LT(hash_val, chain_size);
-        }
-        chain[hash_val] = index;
-        // Check for loops. Works because if this is non-empty then there must be
-        // another cell which already contains the same symbol index as this one,
-        // which means some symbol has more then one name, which isn't allowed.
-        CHECK_EQ(chain[index], static_cast<Elf_Word>(0));
-      }
-    }
-
-    return hash;
-  }
-
-  std::string GenerateStrtab() {
-    std::string tab;
-    tab += '\0';
-    for (auto it = symbols_.begin(); it != symbols_.end(); ++it) {
-      it->name_idx_ = tab.size();
-      tab += it->name_;
-      tab += '\0';
-    }
-    strtab_.GetSection()->sh_size = tab.size();
-    return tab;
-  }
-
-  std::vector<Elf_Sym> GenerateSymtab() {
-    std::vector<Elf_Sym> ret;
-    Elf_Sym undef_sym;
-    memset(&undef_sym, 0, sizeof(undef_sym));
-    undef_sym.st_shndx = SHN_UNDEF;
-    ret.push_back(undef_sym);
-
-    for (auto it = symbols_.cbegin(); it != symbols_.cend(); ++it) {
-      Elf_Sym sym;
-      memset(&sym, 0, sizeof(sym));
-      sym.st_name = it->name_idx_;
-      if (it->is_relative_) {
-        sym.st_value = it->addr_ + it->section_->GetSection()->sh_offset;
-      } else {
-        sym.st_value = it->addr_;
-      }
-      sym.st_size = it->size_;
-      sym.st_other = it->other_;
-      sym.st_shndx = it->section_->GetSectionIndex();
-      sym.st_info = it->info_;
-
-      ret.push_back(sym);
-    }
-    return ret;
-  }
-
-  Elf_Word GetSize() const {
-    // 1 is for the implicit null symbol.
-    return symbols_.size() + 1;
-  }
-
-  ElfSectionBuilder<ElfTypes>* GetStrTab() {
-    return &strtab_;
-  }
-
- private:
-  struct ElfSymbolState {
-    const std::string name_;
-    const ElfSectionBuilder<ElfTypes>* section_;
-    Elf_Addr addr_;
-    Elf_Word size_;
-    bool is_relative_;
-    uint8_t info_;
-    uint8_t other_;
-    // Used during Write() to temporarially hold name index in the strtab.
-    Elf_Word name_idx_;
-  };
-
-  // Information for the strsym for dynstr sections.
-  const std::string str_name_;
-  Elf_Word str_type_;
-  // The symbols in the same order they will be in the symbol table.
-  std::vector<ElfSymbolState> symbols_;
-  ElfSectionBuilder<ElfTypes> strtab_;
-};
-
-template <typename Elf_Word>
-class ElfFilePiece {
- public:
-  virtual ~ElfFilePiece() {}
-
-  virtual bool Write(File* elf_file) {
-    if (static_cast<off_t>(offset_) != lseek(elf_file->Fd(), offset_, SEEK_SET)) {
-      PLOG(ERROR) << "Failed to seek to " << GetDescription() << " offset " << offset_ << " for "
-          << elf_file->GetPath();
-      return false;
-    }
-
-    return DoActualWrite(elf_file);
-  }
-
-  static bool Compare(ElfFilePiece* a, ElfFilePiece* b) {
-    return a->offset_ < b->offset_;
-  }
-
- protected:
-  explicit ElfFilePiece(Elf_Word offset) : offset_(offset) {}
-
-  Elf_Word GetOffset() const {
-    return offset_;
-  }
-
-  virtual const char* GetDescription() const = 0;
-  virtual bool DoActualWrite(File* elf_file) = 0;
-
- private:
-  const Elf_Word offset_;
-
-  DISALLOW_COPY_AND_ASSIGN(ElfFilePiece);
-};
-
-template <typename Elf_Word>
-class ElfFileMemoryPiece FINAL : public ElfFilePiece<Elf_Word> {
- public:
-  ElfFileMemoryPiece(const std::string& name, Elf_Word offset, const void* data, Elf_Word size)
-      : ElfFilePiece<Elf_Word>(offset), dbg_name_(name), data_(data), size_(size) {}
-
- protected:
-  bool DoActualWrite(File* elf_file) OVERRIDE {
-    DCHECK(data_ != nullptr || size_ == 0U) << dbg_name_ << " " << size_;
-
-    if (!elf_file->WriteFully(data_, size_)) {
-      PLOG(ERROR) << "Failed to write " << dbg_name_ << " for " << elf_file->GetPath();
-      return false;
-    }
-
-    return true;
-  }
-
-  const char* GetDescription() const OVERRIDE {
-    return dbg_name_.c_str();
-  }
-
- private:
-  const std::string& dbg_name_;
-  const void *data_;
-  Elf_Word size_;
-};
-
 class CodeOutput {
  public:
-  virtual void SetCodeOffset(size_t offset) = 0;
   virtual bool Write(OutputStream* out) = 0;
   virtual ~CodeOutput() {}
 };
 
-template <typename Elf_Word>
-class ElfFileRodataPiece FINAL : public ElfFilePiece<Elf_Word> {
- public:
-  ElfFileRodataPiece(Elf_Word offset, CodeOutput* output) : ElfFilePiece<Elf_Word>(offset),
-      output_(output) {}
-
- protected:
-  bool DoActualWrite(File* elf_file) OVERRIDE {
-    output_->SetCodeOffset(this->GetOffset());
-    std::unique_ptr<BufferedOutputStream> output_stream(
-        new BufferedOutputStream(new FileOutputStream(elf_file)));
-    if (!output_->Write(output_stream.get())) {
-      PLOG(ERROR) << "Failed to write .rodata and .text for " << elf_file->GetPath();
-      return false;
-    }
-
-    return true;
-  }
-
-  const char* GetDescription() const OVERRIDE {
-    return ".rodata";
-  }
-
- private:
-  CodeOutput* const output_;
-
-  DISALLOW_COPY_AND_ASSIGN(ElfFileRodataPiece);
-};
-
-template <typename Elf_Word>
-class ElfFileOatTextPiece FINAL : public ElfFilePiece<Elf_Word> {
- public:
-  ElfFileOatTextPiece(Elf_Word offset, CodeOutput* output) : ElfFilePiece<Elf_Word>(offset),
-      output_(output) {}
-
- protected:
-  bool DoActualWrite(File* elf_file ATTRIBUTE_UNUSED) OVERRIDE {
-    // All data is written by the ElfFileRodataPiece right now, as the oat writer writes in one
-    // piece. This is for future flexibility.
-    UNUSED(output_);
-    return true;
-  }
-
-  const char* GetDescription() const OVERRIDE {
-    return ".text";
-  }
-
- private:
-  CodeOutput* const output_;
-
-  DISALLOW_COPY_AND_ASSIGN(ElfFileOatTextPiece);
-};
-
-template <typename Elf_Word>
-static bool WriteOutFile(const std::vector<ElfFilePiece<Elf_Word>*>& pieces, File* elf_file) {
-  // TODO It would be nice if this checked for overlap.
-  for (auto it = pieces.begin(); it != pieces.end(); ++it) {
-    if (!(*it)->Write(elf_file)) {
-      return false;
-    }
-  }
-  return true;
-}
-
-template <typename Elf_Word, typename Elf_Shdr>
-static inline constexpr Elf_Word NextOffset(const Elf_Shdr& cur, const Elf_Shdr& prev) {
-  return RoundUp(prev.sh_size + prev.sh_offset, cur.sh_addralign);
-}
-
+// Writes ELF file.
+// The main complication is that the sections often want to reference
+// each other.  We solve this by writing the ELF file in two stages:
+//  * Sections are asked about their size, and overall layout is calculated.
+//  * Sections do the actual writes which may use offsets of other sections.
 template <typename ElfTypes>
 class ElfBuilder FINAL {
  public:
   using Elf_Addr = typename ElfTypes::Addr;
+  using Elf_Off = typename ElfTypes::Off;
   using Elf_Word = typename ElfTypes::Word;
   using Elf_Sword = typename ElfTypes::Sword;
   using Elf_Ehdr = typename ElfTypes::Ehdr;
@@ -554,47 +51,481 @@
   using Elf_Phdr = typename ElfTypes::Phdr;
   using Elf_Dyn = typename ElfTypes::Dyn;
 
-  ElfBuilder(CodeOutput* oat_writer,
-             File* elf_file,
-             InstructionSet isa,
-             Elf_Word rodata_relative_offset,
-             Elf_Word rodata_size,
-             Elf_Word text_relative_offset,
-             Elf_Word text_size,
-             Elf_Word bss_relative_offset,
-             Elf_Word bss_size,
-             const bool add_symbols,
-             bool debug = false)
-    : oat_writer_(oat_writer),
-      elf_file_(elf_file),
-      add_symbols_(add_symbols),
-      debug_logging_(debug),
-      text_builder_(".text", text_size, text_relative_offset, SHT_PROGBITS,
-                    SHF_ALLOC | SHF_EXECINSTR),
-      rodata_builder_(".rodata", rodata_size, rodata_relative_offset, SHT_PROGBITS, SHF_ALLOC),
-      bss_builder_(".bss", bss_size, bss_relative_offset, SHT_NOBITS, SHF_ALLOC),
-      dynsym_builder_(".dynsym", SHT_DYNSYM, ".dynstr", SHT_STRTAB, true),
-      symtab_builder_(".symtab", SHT_SYMTAB, ".strtab", SHT_STRTAB, false),
-      hash_builder_(".hash", SHT_HASH, SHF_ALLOC, &dynsym_builder_, 0, sizeof(Elf_Word),
-                    sizeof(Elf_Word)),
-      dynamic_builder_(".dynamic", &dynsym_builder_),
-      shstrtab_builder_(".shstrtab", SHT_STRTAB, 0, nullptr, 0, 1, 1) {
-    SetupEhdr();
-    SetupDynamic();
-    SetupRequiredSymbols();
-    SetISA(isa);
+  // Base class of all sections.
+  class Section {
+   public:
+    Section(const std::string& name, Elf_Word type, Elf_Word flags,
+            const Section* link, Elf_Word info, Elf_Word align, Elf_Word entsize)
+        : header_(), section_index_(0), name_(name), link_(link) {
+      header_.sh_type = type;
+      header_.sh_flags = flags;
+      header_.sh_info = info;
+      header_.sh_addralign = align;
+      header_.sh_entsize = entsize;
+    }
+    virtual ~Section() {}
+
+    // Returns the size of the content of this section.  It is used to
+    // calculate file offsets of all sections before doing any writes.
+    virtual Elf_Word GetSize() const = 0;
+
+    // Write the content of this section to the given file.
+    // This must write exactly the number of bytes returned by GetSize().
+    // Offsets of all sections are known when this method is called.
+    virtual bool Write(File* elf_file) = 0;
+
+    Elf_Word GetLink() const {
+      return (link_ != nullptr) ? link_->GetSectionIndex() : 0;
+    }
+
+    const Elf_Shdr* GetHeader() const {
+      return &header_;
+    }
+
+    Elf_Shdr* GetHeader() {
+      return &header_;
+    }
+
+    Elf_Word GetSectionIndex() const {
+      DCHECK_NE(section_index_, 0u);
+      return section_index_;
+    }
+
+    void SetSectionIndex(Elf_Word section_index) {
+      section_index_ = section_index;
+    }
+
+    const std::string& GetName() const {
+      return name_;
+    }
+
+   private:
+    Elf_Shdr header_;
+    Elf_Word section_index_;
+    const std::string name_;
+    const Section* const link_;
+
+    DISALLOW_COPY_AND_ASSIGN(Section);
+  };
+
+  // Writer of .dynamic section.
+  class DynamicSection FINAL : public Section {
+   public:
+    void AddDynamicTag(Elf_Sword tag, Elf_Word value, const Section* section) {
+      DCHECK_NE(tag, static_cast<Elf_Sword>(DT_NULL));
+      dynamics_.push_back({tag, value, section});
+    }
+
+    DynamicSection(const std::string& name, Section* link)
+        : Section(name, SHT_DYNAMIC, SHF_ALLOC,
+                  link, 0, kPageSize, sizeof(Elf_Dyn)) {}
+
+    Elf_Word GetSize() const OVERRIDE {
+      return (dynamics_.size() + 1 /* DT_NULL */) * sizeof(Elf_Dyn);
+    }
+
+    bool Write(File* elf_file) OVERRIDE {
+      std::vector<Elf_Dyn> buffer;
+      buffer.reserve(dynamics_.size() + 1u);
+      for (const ElfDynamicState& it : dynamics_) {
+        if (it.section_ != nullptr) {
+          // We are adding an address relative to a section.
+          buffer.push_back(
+              {it.tag_, {it.value_ + it.section_->GetHeader()->sh_addr}});
+        } else {
+          buffer.push_back({it.tag_, {it.value_}});
+        }
+      }
+      buffer.push_back({DT_NULL, {0}});
+      return WriteArray(elf_file, buffer.data(), buffer.size());
+    }
+
+   private:
+    struct ElfDynamicState {
+      Elf_Sword tag_;
+      Elf_Word value_;
+      const Section* section_;
+    };
+    std::vector<ElfDynamicState> dynamics_;
+  };
+
+  // Section with content based on simple memory buffer.
+  // The buffer can be optionally patched before writing.
+  // The resulting address can be either absolute memory
+  // address or offset relative to the pointer location.
+  class RawSection FINAL : public Section {
+   public:
+    RawSection(const std::string& name, Elf_Word type, Elf_Word flags,
+               const Section* link, Elf_Word info, Elf_Word align, Elf_Word entsize,
+               const Section* patch_base = nullptr, bool patch_relative = false,
+               bool patch_64bit = (sizeof(Elf_Addr) == sizeof(Elf64_Addr)))
+        : Section(name, type, flags, link, info, align, entsize),
+          patched(false), patch_base_(patch_base),
+          patch_relative_(patch_relative), patch_64bit_(patch_64bit) {
+    }
+
+    Elf_Word GetSize() const OVERRIDE {
+      return buffer_.size();
+    }
+
+    bool Write(File* elf_file) OVERRIDE {
+      if (!patch_locations_.empty()) {
+        DCHECK(patch_base_ != nullptr);
+        DCHECK(!patched);  // Do not patch twice.
+        if (patch_relative_) {
+          if (patch_64bit_) {
+            Patch<true, uint64_t>();
+          } else {
+            Patch<true, uint32_t>();
+          }
+        } else {
+          if (patch_64bit_) {
+            Patch<false, uint64_t>();
+          } else {
+            Patch<false, uint32_t>();
+          }
+        }
+        patched = true;
+      }
+      return WriteArray(elf_file, buffer_.data(), buffer_.size());
+    }
+
+    bool IsEmpty() const {
+      return buffer_.size() == 0;
+    }
+
+    std::vector<uint8_t>* GetBuffer() {
+      return &buffer_;
+    }
+
+    void SetBuffer(const std::vector<uint8_t>& buffer) {
+      buffer_ = buffer;
+    }
+
+    std::vector<uintptr_t>* GetPatchLocations() {
+      return &patch_locations_;
+    }
+
+   private:
+    template <bool RelativeAddress = false, typename PatchedAddress = Elf_Addr>
+    void Patch() {
+      Elf_Addr base_addr = patch_base_->GetHeader()->sh_addr;
+      Elf_Addr addr = this->GetHeader()->sh_addr;
+      for (uintptr_t patch_location : patch_locations_) {
+        typedef __attribute__((__aligned__(1))) PatchedAddress UnalignedAddress;
+        auto* to_patch = reinterpret_cast<UnalignedAddress*>(buffer_.data() + patch_location);
+        *to_patch = (base_addr + *to_patch) - (RelativeAddress ? (addr + patch_location) : 0);
+      }
+    }
+
+    std::vector<uint8_t> buffer_;
+    std::vector<uintptr_t> patch_locations_;
+    bool patched;
+    const Section* patch_base_;
+    bool patch_relative_;
+    bool patch_64bit_;
+  };
+
+  // Writer of .rodata section or .text section.
+  // The write is done lazily using the provided CodeOutput.
+  class OatSection FINAL : public Section {
+   public:
+    OatSection(const std::string& name, Elf_Word type, Elf_Word flags,
+               const Section* link, Elf_Word info, Elf_Word align,
+               Elf_Word entsize, Elf_Word size, CodeOutput* code_output)
+        : Section(name, type, flags, link, info, align, entsize),
+          size_(size), code_output_(code_output) {
+    }
+
+    Elf_Word GetSize() const OVERRIDE {
+      return size_;
+    }
+
+    bool Write(File* elf_file) OVERRIDE {
+      // The BufferedOutputStream class contains the buffer as field,
+      // therefore it is too big to allocate on the stack.
+      std::unique_ptr<BufferedOutputStream> output_stream(
+          new BufferedOutputStream(new FileOutputStream(elf_file)));
+      return code_output_->Write(output_stream.get());
+    }
+
+   private:
+    Elf_Word size_;
+    CodeOutput* code_output_;
+  };
+
+  // Writer of .bss section.
+  class NoBitsSection FINAL : public Section {
+   public:
+    NoBitsSection(const std::string& name, Elf_Word size)
+        : Section(name, SHT_NOBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
+          size_(size) {
+    }
+
+    Elf_Word GetSize() const OVERRIDE {
+      return size_;
+    }
+
+    bool Write(File* elf_file ATTRIBUTE_UNUSED) OVERRIDE {
+      LOG(ERROR) << "This section should not be written to the ELF file";
+      return false;
+    }
+
+   private:
+    Elf_Word size_;
+  };
+
+  // Writer of .dynstr .strtab and .shstrtab sections.
+  class StrtabSection FINAL : public Section {
+   public:
+    StrtabSection(const std::string& name, Elf_Word flags)
+        : Section(name, SHT_STRTAB, flags, nullptr, 0, 1, 1) {
+      buffer_.reserve(4 * KB);
+      // The first entry of strtab must be empty string.
+      buffer_ += '\0';
+    }
+
+    Elf_Word AddName(const std::string& name) {
+      Elf_Word offset = buffer_.size();
+      buffer_ += name;
+      buffer_ += '\0';
+      return offset;
+    }
+
+    Elf_Word GetSize() const OVERRIDE {
+      return buffer_.size();
+    }
+
+    bool Write(File* elf_file) OVERRIDE {
+      return WriteArray(elf_file, buffer_.data(), buffer_.size());
+    }
+
+   private:
+    std::string buffer_;
+  };
+
+  class HashSection;
+
+  // Writer of .dynsym and .symtab sections.
+  class SymtabSection FINAL : public Section {
+   public:
+    // Add a symbol with given name to this symtab. The symbol refers to
+    // 'relative_addr' within the given section and has the given attributes.
+    void AddSymbol(const std::string& name, const Section* section,
+                   Elf_Addr addr, bool is_relative, Elf_Word size,
+                   uint8_t binding, uint8_t type, uint8_t other = 0) {
+      CHECK(section != nullptr);
+      Elf_Word name_idx = strtab_->AddName(name);
+      symbols_.push_back({ name, section, addr, size, is_relative,
+                           MakeStInfo(binding, type), other, name_idx });
+    }
+
+    SymtabSection(const std::string& name, Elf_Word type, Elf_Word flags,
+                  StrtabSection* strtab)
+        : Section(name, type, flags, strtab, 0, sizeof(Elf_Word), sizeof(Elf_Sym)),
+          strtab_(strtab) {
+    }
+
+    bool IsEmpty() const {
+      return symbols_.empty();
+    }
+
+    Elf_Word GetSize() const OVERRIDE {
+      return (1 /* NULL */ + symbols_.size()) * sizeof(Elf_Sym);
+    }
+
+    bool Write(File* elf_file) OVERRIDE {
+      std::vector<Elf_Sym> buffer;
+      buffer.reserve(1u + symbols_.size());
+      buffer.push_back(Elf_Sym());  // NULL.
+      for (const ElfSymbolState& it : symbols_) {
+        Elf_Sym sym = Elf_Sym();
+        sym.st_name = it.name_idx_;
+        if (it.is_relative_) {
+          sym.st_value = it.addr_ + it.section_->GetHeader()->sh_addr;
+        } else {
+          sym.st_value = it.addr_;
+        }
+        sym.st_size = it.size_;
+        sym.st_other = it.other_;
+        sym.st_shndx = it.section_->GetSectionIndex();
+        sym.st_info = it.info_;
+        buffer.push_back(sym);
+      }
+      return WriteArray(elf_file, buffer.data(), buffer.size());
+    }
+
+   private:
+    struct ElfSymbolState {
+      const std::string name_;
+      const Section* section_;
+      Elf_Addr addr_;
+      Elf_Word size_;
+      bool is_relative_;
+      uint8_t info_;
+      uint8_t other_;
+      Elf_Word name_idx_;  // index in the strtab.
+    };
+
+    static inline constexpr uint8_t MakeStInfo(uint8_t binding, uint8_t type) {
+      return ((binding) << 4) + ((type) & 0xf);
+    }
+
+    // The symbols in the same order they will be in the symbol table.
+    std::vector<ElfSymbolState> symbols_;
+    StrtabSection* strtab_;
+
+    friend class HashSection;
+  };
+
+  // TODO: Consider removing.
+  // We use it only for the dynsym section which has only 5 symbols.
+  // We do not use it for symtab, and we probably do not have to
+  // since we use those symbols only to print backtraces.
+  class HashSection FINAL : public Section {
+   public:
+    HashSection(const std::string& name, Elf_Word flags, SymtabSection* symtab)
+        : Section(name, SHT_HASH, flags, symtab,
+                  0, sizeof(Elf_Word), sizeof(Elf_Word)),
+          symtab_(symtab) {
+    }
+
+    Elf_Word GetSize() const OVERRIDE {
+      Elf_Word nbuckets = GetNumBuckets();
+      Elf_Word chain_size = symtab_->symbols_.size() + 1 /* NULL */;
+      return (2 /* header */ + nbuckets + chain_size) * sizeof(Elf_Word);
+    }
+
+    bool Write(File* const elf_file) OVERRIDE {
+      // Here is how The ELF hash table works.
+      // There are 3 arrays to worry about.
+      // * The symbol table where the symbol information is.
+      // * The bucket array which is an array of indexes into the symtab and chain.
+      // * The chain array which is also an array of indexes into the symtab and chain.
+      //
+      // Lets say the state is something like this.
+      // +--------+       +--------+      +-----------+
+      // | symtab |       | bucket |      |   chain   |
+      // |  null  |       | 1      |      | STN_UNDEF |
+      // | <sym1> |       | 4      |      | 2         |
+      // | <sym2> |       |        |      | 5         |
+      // | <sym3> |       |        |      | STN_UNDEF |
+      // | <sym4> |       |        |      | 3         |
+      // | <sym5> |       |        |      | STN_UNDEF |
+      // +--------+       +--------+      +-----------+
+      //
+      // The lookup process (in python psudocode) is
+      //
+      // def GetSym(name):
+      //     # NB STN_UNDEF == 0
+      //     indx = bucket[elfhash(name) % num_buckets]
+      //     while indx != STN_UNDEF:
+      //         if GetSymbolName(symtab[indx]) == name:
+      //             return symtab[indx]
+      //         indx = chain[indx]
+      //     return SYMBOL_NOT_FOUND
+      //
+      // Between bucket and chain arrays every symtab index must be present exactly
+      // once (except for STN_UNDEF, which must be present 1 + num_bucket times).
+      const auto& symbols = symtab_->symbols_;
+      // Select number of buckets.
+      // This is essentially arbitrary.
+      Elf_Word nbuckets = GetNumBuckets();
+      // 1 is for the implicit NULL symbol.
+      Elf_Word chain_size = (symbols.size() + 1);
+      std::vector<Elf_Word> hash;
+      hash.push_back(nbuckets);
+      hash.push_back(chain_size);
+      uint32_t bucket_offset = hash.size();
+      uint32_t chain_offset = bucket_offset + nbuckets;
+      hash.resize(hash.size() + nbuckets + chain_size, 0);
+
+      Elf_Word* buckets = hash.data() + bucket_offset;
+      Elf_Word* chain   = hash.data() + chain_offset;
+
+      // Set up the actual hash table.
+      for (Elf_Word i = 0; i < symbols.size(); i++) {
+        // Add 1 since we need to have the null symbol that is not in the symbols
+        // list.
+        Elf_Word index = i + 1;
+        Elf_Word hash_val = static_cast<Elf_Word>(elfhash(symbols[i].name_.c_str())) % nbuckets;
+        if (buckets[hash_val] == 0) {
+          buckets[hash_val] = index;
+        } else {
+          hash_val = buckets[hash_val];
+          CHECK_LT(hash_val, chain_size);
+          while (chain[hash_val] != 0) {
+            hash_val = chain[hash_val];
+            CHECK_LT(hash_val, chain_size);
+          }
+          chain[hash_val] = index;
+          // Check for loops. Works because if this is non-empty then there must be
+          // another cell which already contains the same symbol index as this one,
+          // which means some symbol has more then one name, which isn't allowed.
+          CHECK_EQ(chain[index], static_cast<Elf_Word>(0));
+        }
+      }
+      return WriteArray(elf_file, hash.data(), hash.size());
+    }
+
+   private:
+    Elf_Word GetNumBuckets() const {
+      const auto& symbols = symtab_->symbols_;
+      if (symbols.size() < 8) {
+        return 2;
+      } else if (symbols.size() < 32) {
+        return 4;
+      } else if (symbols.size() < 256) {
+        return 16;
+      } else {
+        // Have about 32 ids per bucket.
+        return RoundUp(symbols.size()/32, 2);
+      }
+    }
+
+    // from bionic
+    static inline unsigned elfhash(const char *_name) {
+      const unsigned char *name = (const unsigned char *) _name;
+      unsigned h = 0, g;
+
+      while (*name) {
+        h = (h << 4) + *name++;
+        g = h & 0xf0000000;
+        h ^= g;
+        h ^= g >> 24;
+      }
+      return h;
+    }
+
+    SymtabSection* symtab_;
+
+    DISALLOW_COPY_AND_ASSIGN(HashSection);
+  };
+
+  ElfBuilder(InstructionSet isa,
+             Elf_Word rodata_size, CodeOutput* rodata_writer,
+             Elf_Word text_size, CodeOutput* text_writer,
+             Elf_Word bss_size)
+    : isa_(isa),
+      dynstr_(".dynstr", SHF_ALLOC),
+      dynsym_(".dynsym", SHT_DYNSYM, SHF_ALLOC, &dynstr_),
+      hash_(".hash", SHF_ALLOC, &dynsym_),
+      rodata_(".rodata", SHT_PROGBITS, SHF_ALLOC,
+              nullptr, 0, kPageSize, 0, rodata_size, rodata_writer),
+      text_(".text", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR,
+            nullptr, 0, kPageSize, 0, text_size, text_writer),
+      bss_(".bss", bss_size),
+      dynamic_(".dynamic", &dynsym_),
+      strtab_(".strtab", 0),
+      symtab_(".symtab", SHT_SYMTAB, 0, &strtab_),
+      shstrtab_(".shstrtab", 0) {
   }
   ~ElfBuilder() {}
 
-  const ElfOatSectionBuilder<ElfTypes>& GetTextBuilder() const {
-    return text_builder_;
-  }
+  OatSection* GetText() { return &text_; }
+  SymtabSection* GetSymtab() { return &symtab_; }
 
-  ElfSymtabBuilder<ElfTypes>* GetSymtabBuilder() {
-    return &symtab_builder_;
-  }
-
-  bool Init() {
+  bool Write(File* elf_file) {
     // Since the .text section of an oat file contains relative references to .rodata
     // and (optionally) .bss, we keep these 2 or 3 sections together. This creates
     // a non-traditional layout where the .bss section is mapped independently of the
@@ -605,11 +536,12 @@
     // | Elf_Ehdr                |
     // +-------------------------+
     // | Elf_Phdr PHDR           |
-    // | Elf_Phdr LOAD R         | .dynsym .dynstr .hash .eh_frame .eh_frame_hdr .rodata
+    // | Elf_Phdr LOAD R         | .dynsym .dynstr .hash .rodata
     // | Elf_Phdr LOAD R X       | .text
     // | Elf_Phdr LOAD RW        | .bss (Optional)
     // | Elf_Phdr LOAD RW        | .dynamic
     // | Elf_Phdr DYNAMIC        | .dynamic
+    // | Elf_Phdr LOAD R         | .eh_frame .eh_frame_hdr
     // | Elf_Phdr EH_FRAME R     | .eh_frame_hdr
     // +-------------------------+
     // | .dynsym                 |
@@ -621,25 +553,10 @@
     // | Elf_Sym  oatbsslastword | (Optional)
     // +-------------------------+
     // | .dynstr                 |
-    // | \0                      |
-    // | oatdata\0               |
-    // | oatexec\0               |
-    // | oatlastword\0           |
-    // | boot.oat\0              |
+    // | names for .dynsym       |
     // +-------------------------+
     // | .hash                   |
-    // | Elf_Word nbucket = b    |
-    // | Elf_Word nchain  = c    |
-    // | Elf_Word bucket[0]      |
-    // |         ...             |
-    // | Elf_Word bucket[b - 1]  |
-    // | Elf_Word chain[0]       |
-    // |         ...             |
-    // | Elf_Word chain[c - 1]   |
-    // +-------------------------+
-    // | .eh_frame               |  (Optional)
-    // +-------------------------+
-    // | .eh_frame_hdr           |  (Optional)
+    // | hashtable for dynsym    |
     // +-------------------------+
     // | .rodata                 |
     // | oatdata..oatexec-4      |
@@ -648,38 +565,23 @@
     // | oatexec..oatlastword    |
     // +-------------------------+
     // | .dynamic                |
-    // | Elf_Dyn DT_SONAME       |
     // | Elf_Dyn DT_HASH         |
+    // | Elf_Dyn DT_STRTAB       |
     // | Elf_Dyn DT_SYMTAB       |
     // | Elf_Dyn DT_SYMENT       |
-    // | Elf_Dyn DT_STRTAB       |
     // | Elf_Dyn DT_STRSZ        |
+    // | Elf_Dyn DT_SONAME       |
     // | Elf_Dyn DT_NULL         |
     // +-------------------------+  (Optional)
-    // | .strtab                 |  (Optional)
-    // | program symbol names    |  (Optional)
-    // +-------------------------+  (Optional)
     // | .symtab                 |  (Optional)
     // | program symbols         |  (Optional)
-    // +-------------------------+
-    // | .shstrtab               |
-    // | \0                      |
-    // | .dynamic\0              |
-    // | .dynsym\0               |
-    // | .dynstr\0               |
-    // | .hash\0                 |
-    // | .rodata\0               |
-    // | .text\0                 |
-    // | .bss\0                  |  (Optional)
-    // | .shstrtab\0             |
-    // | .symtab\0               |  (Optional)
-    // | .strtab\0               |  (Optional)
-    // | .eh_frame\0             |  (Optional)
-    // | .eh_frame_hdr\0         |  (Optional)
-    // | .debug_info\0           |  (Optional)
-    // | .debug_abbrev\0         |  (Optional)
-    // | .debug_str\0            |  (Optional)
-    // | .debug_line\0           |  (Optional)
+    // +-------------------------+  (Optional)
+    // | .strtab                 |  (Optional)
+    // | names for .symtab       |  (Optional)
+    // +-------------------------+  (Optional)
+    // | .eh_frame               |  (Optional)
+    // +-------------------------+  (Optional)
+    // | .eh_frame_hdr           |  (Optional)
     // +-------------------------+  (Optional)
     // | .debug_info             |  (Optional)
     // +-------------------------+  (Optional)
@@ -688,7 +590,10 @@
     // | .debug_str              |  (Optional)
     // +-------------------------+  (Optional)
     // | .debug_line             |  (Optional)
-    // +-------------------------+  (Optional)
+    // +-------------------------+
+    // | .shstrtab               |
+    // | names of sections       |
+    // +-------------------------+
     // | Elf_Shdr null           |
     // | Elf_Shdr .dynsym        |
     // | Elf_Shdr .dynstr        |
@@ -697,552 +602,266 @@
     // | Elf_Shdr .text          |
     // | Elf_Shdr .bss           |  (Optional)
     // | Elf_Shdr .dynamic       |
-    // | Elf_Shdr .shstrtab      |
+    // | Elf_Shdr .symtab        |  (Optional)
+    // | Elf_Shdr .strtab        |  (Optional)
     // | Elf_Shdr .eh_frame      |  (Optional)
     // | Elf_Shdr .eh_frame_hdr  |  (Optional)
     // | Elf_Shdr .debug_info    |  (Optional)
     // | Elf_Shdr .debug_abbrev  |  (Optional)
     // | Elf_Shdr .debug_str     |  (Optional)
     // | Elf_Shdr .debug_line    |  (Optional)
+    // | Elf_Shdr .oat_patches   |  (Optional)
+    // | Elf_Shdr .shstrtab      |
     // +-------------------------+
+    constexpr bool debug_logging_ = false;
 
-    if (fatal_error_) {
-      return false;
+    // Create a list of all section which we want to write.
+    // This is the order in which they will be written.
+    std::vector<Section*> sections;
+    sections.push_back(&dynsym_);
+    sections.push_back(&dynstr_);
+    sections.push_back(&hash_);
+    sections.push_back(&rodata_);
+    sections.push_back(&text_);
+    if (bss_.GetSize() != 0u) {
+      sections.push_back(&bss_);
     }
-    // Step 1. Figure out all the offsets.
-
-    if (debug_logging_) {
-      LOG(INFO) << "phdr_offset=" << PHDR_OFFSET << std::hex << " " << PHDR_OFFSET;
-      LOG(INFO) << "phdr_size=" << PHDR_SIZE << std::hex << " " << PHDR_SIZE;
+    sections.push_back(&dynamic_);
+    if (!symtab_.IsEmpty()) {
+      sections.push_back(&symtab_);
+      sections.push_back(&strtab_);
+    }
+    for (Section* section : other_sections_) {
+      sections.push_back(section);
+    }
+    sections.push_back(&shstrtab_);
+    for (size_t i = 0; i < sections.size(); i++) {
+      // The first section index is 1.  Index 0 is reserved for NULL.
+      // Section index is used for relative symbols and for section links.
+      sections[i]->SetSectionIndex(i + 1);
+      // Add section name to .shstrtab.
+      Elf_Word name_offset = shstrtab_.AddName(sections[i]->GetName());
+      sections[i]->GetHeader()->sh_name = name_offset;
     }
 
-    memset(&program_headers_, 0, sizeof(program_headers_));
-    program_headers_[PH_PHDR].p_type    = PT_PHDR;
-    program_headers_[PH_PHDR].p_offset  = PHDR_OFFSET;
-    program_headers_[PH_PHDR].p_vaddr   = PHDR_OFFSET;
-    program_headers_[PH_PHDR].p_paddr   = PHDR_OFFSET;
-    program_headers_[PH_PHDR].p_filesz  = sizeof(program_headers_);
-    program_headers_[PH_PHDR].p_memsz   = sizeof(program_headers_);
-    program_headers_[PH_PHDR].p_flags   = PF_R;
-    program_headers_[PH_PHDR].p_align   = sizeof(Elf_Word);
+    // The running program does not have access to section headers
+    // and the loader is not supposed to use them either.
+    // The dynamic sections therefore replicates some of the layout
+    // information like the address and size of .rodata and .text.
+    // It also contains other metadata like the SONAME.
+    // The .dynamic section is found using the PT_DYNAMIC program header.
+    BuildDynsymSection();
+    BuildDynamicSection(elf_file->GetPath());
 
-    program_headers_[PH_LOAD_R__].p_type    = PT_LOAD;
-    program_headers_[PH_LOAD_R__].p_offset  = 0;
-    program_headers_[PH_LOAD_R__].p_vaddr   = 0;
-    program_headers_[PH_LOAD_R__].p_paddr   = 0;
-    program_headers_[PH_LOAD_R__].p_flags   = PF_R;
+    // We do not know the number of headers until the final stages of write.
+    // It is easiest to just reserve a fixed amount of space for them.
+    constexpr size_t kMaxProgramHeaders = 8;
+    constexpr size_t kProgramHeadersOffset = sizeof(Elf_Ehdr);
+    constexpr size_t kProgramHeadersSize = sizeof(Elf_Phdr) * kMaxProgramHeaders;
 
-    program_headers_[PH_LOAD_R_X].p_type    = PT_LOAD;
-    program_headers_[PH_LOAD_R_X].p_flags   = PF_R | PF_X;
-
-    program_headers_[PH_LOAD_RW_BSS].p_type    = PT_LOAD;
-    program_headers_[PH_LOAD_RW_BSS].p_flags   = PF_R | PF_W;
-
-    program_headers_[PH_LOAD_RW_DYNAMIC].p_type    = PT_LOAD;
-    program_headers_[PH_LOAD_RW_DYNAMIC].p_flags   = PF_R | PF_W;
-
-    program_headers_[PH_DYNAMIC].p_type    = PT_DYNAMIC;
-    program_headers_[PH_DYNAMIC].p_flags   = PF_R | PF_W;
-
-    program_headers_[PH_EH_FRAME_HDR].p_type = PT_NULL;
-    program_headers_[PH_EH_FRAME_HDR].p_flags = PF_R;
-
-    // Get the dynstr string.
-    dynstr_ = dynsym_builder_.GenerateStrtab();
-
-    // Add the SONAME to the dynstr.
-    dynstr_soname_offset_ = dynstr_.size();
-    std::string file_name(elf_file_->GetPath());
-    size_t directory_separator_pos = file_name.rfind('/');
-    if (directory_separator_pos != std::string::npos) {
-      file_name = file_name.substr(directory_separator_pos + 1);
-    }
-    dynstr_ += file_name;
-    dynstr_ += '\0';
-    if (debug_logging_) {
-      LOG(INFO) << "dynstr size (bytes)   =" << dynstr_.size()
-                << std::hex << " " << dynstr_.size();
-      LOG(INFO) << "dynsym size (elements)=" << dynsym_builder_.GetSize()
-                << std::hex << " " << dynsym_builder_.GetSize();
-    }
-
-    // Get the section header string table.
-    shstrtab_ += '\0';
-
-    // Setup sym_undef
-    memset(&null_hdr_, 0, sizeof(null_hdr_));
-    null_hdr_.sh_type = SHT_NULL;
-    null_hdr_.sh_link = SHN_UNDEF;
-    section_ptrs_.push_back(&null_hdr_);
-
-    section_index_ = 1;
-
-    // setup .dynsym
-    section_ptrs_.push_back(dynsym_builder_.GetSection());
-    AssignSectionStr(&dynsym_builder_, &shstrtab_);
-    dynsym_builder_.SetSectionIndex(section_index_);
-    section_index_++;
-
-    // Setup .dynstr
-    section_ptrs_.push_back(dynsym_builder_.GetStrTab()->GetSection());
-    AssignSectionStr(dynsym_builder_.GetStrTab(), &shstrtab_);
-    dynsym_builder_.GetStrTab()->SetSectionIndex(section_index_);
-    section_index_++;
-
-    // Setup .hash
-    section_ptrs_.push_back(hash_builder_.GetSection());
-    AssignSectionStr(&hash_builder_, &shstrtab_);
-    hash_builder_.SetSectionIndex(section_index_);
-    section_index_++;
-
-    // Setup .rodata
-    section_ptrs_.push_back(rodata_builder_.GetSection());
-    AssignSectionStr(&rodata_builder_, &shstrtab_);
-    rodata_builder_.SetSectionIndex(section_index_);
-    section_index_++;
-
-    // Setup .text
-    section_ptrs_.push_back(text_builder_.GetSection());
-    AssignSectionStr(&text_builder_, &shstrtab_);
-    text_builder_.SetSectionIndex(section_index_);
-    section_index_++;
-
-    // Setup .bss
-    if (bss_builder_.GetSize() != 0u) {
-      section_ptrs_.push_back(bss_builder_.GetSection());
-      AssignSectionStr(&bss_builder_, &shstrtab_);
-      bss_builder_.SetSectionIndex(section_index_);
-      section_index_++;
-    }
-
-    // Setup .dynamic
-    section_ptrs_.push_back(dynamic_builder_.GetSection());
-    AssignSectionStr(&dynamic_builder_, &shstrtab_);
-    dynamic_builder_.SetSectionIndex(section_index_);
-    section_index_++;
-
-    // Fill in the hash section.
-    hash_ = dynsym_builder_.GenerateHashContents();
-
-    if (debug_logging_) {
-      LOG(INFO) << ".hash size (bytes)=" << hash_.size() * sizeof(Elf_Word)
-                << std::hex << " " << hash_.size() * sizeof(Elf_Word);
-    }
-
-    Elf_Word base_offset = sizeof(Elf_Ehdr) + sizeof(program_headers_);
-
-    // Get the layout in the sections.
-    //
-    // Get the layout of the dynsym section.
-    dynsym_builder_.GetSection()->sh_offset =
-        RoundUp(base_offset, dynsym_builder_.GetSection()->sh_addralign);
-    dynsym_builder_.GetSection()->sh_addr = dynsym_builder_.GetSection()->sh_offset;
-    dynsym_builder_.GetSection()->sh_size = dynsym_builder_.GetSize() * sizeof(Elf_Sym);
-    dynsym_builder_.GetSection()->sh_link = dynsym_builder_.GetLink();
-
-    // Get the layout of the dynstr section.
-    dynsym_builder_.GetStrTab()->GetSection()->sh_offset =
-        NextOffset<Elf_Word, Elf_Shdr>(*dynsym_builder_.GetStrTab()->GetSection(),
-                                       *dynsym_builder_.GetSection());
-    dynsym_builder_.GetStrTab()->GetSection()->sh_addr =
-        dynsym_builder_.GetStrTab()->GetSection()->sh_offset;
-    dynsym_builder_.GetStrTab()->GetSection()->sh_size = dynstr_.size();
-    dynsym_builder_.GetStrTab()->GetSection()->sh_link = dynsym_builder_.GetStrTab()->GetLink();
-
-    // Get the layout of the hash section
-    hash_builder_.GetSection()->sh_offset =
-        NextOffset<Elf_Word, Elf_Shdr>(*hash_builder_.GetSection(),
-                                       *dynsym_builder_.GetStrTab()->GetSection());
-    hash_builder_.GetSection()->sh_addr = hash_builder_.GetSection()->sh_offset;
-    hash_builder_.GetSection()->sh_size = hash_.size() * sizeof(Elf_Word);
-    hash_builder_.GetSection()->sh_link = hash_builder_.GetLink();
-
-    // Get the layout of the extra sections with SHF_ALLOC flag.
-    // This will deal with .eh_frame and .eh_frame_hdr.
-    // .eh_frame contains relative pointers to .text which we
-    // want to fixup between the calls to Init() and Write().
-    // Therefore we handle those sections here as opposed to Write().
-    // It also has the nice side effect of including .eh_frame
-    // with the rest of LOAD_R segment.  It must come before .rodata
-    // because .rodata and .text must be next to each other.
-    Elf_Shdr* prev = hash_builder_.GetSection();
-    for (auto* it : other_builders_) {
-      if ((it->GetSection()->sh_flags & SHF_ALLOC) != 0) {
-        it->GetSection()->sh_offset = NextOffset<Elf_Word, Elf_Shdr>(*it->GetSection(), *prev);
-        it->GetSection()->sh_addr = it->GetSection()->sh_offset;
-        it->GetSection()->sh_size = it->GetBuffer()->size();
-        it->GetSection()->sh_link = it->GetLink();
-        prev = it->GetSection();
+    // Layout of all sections - determine the final file offsets and addresses.
+    // This must be done after we have built all sections and know their size.
+    Elf_Off file_offset = kProgramHeadersOffset + kProgramHeadersSize;
+    Elf_Addr load_address = file_offset;
+    std::vector<Elf_Shdr> section_headers;
+    section_headers.reserve(1u + sections.size());
+    section_headers.push_back(Elf_Shdr());  // NULL at index 0.
+    for (auto* section : sections) {
+      Elf_Shdr* header = section->GetHeader();
+      Elf_Off alignment = header->sh_addralign > 0 ? header->sh_addralign : 1;
+      header->sh_size = section->GetSize();
+      header->sh_link = section->GetLink();
+      // Allocate memory for the section in the file.
+      if (header->sh_type != SHT_NOBITS) {
+        header->sh_offset = RoundUp(file_offset, alignment);
+        file_offset = header->sh_offset + header->sh_size;
       }
+      // Allocate memory for the section during program execution.
+      if ((header->sh_flags & SHF_ALLOC) != 0) {
+        header->sh_addr = RoundUp(load_address, alignment);
+        load_address = header->sh_addr + header->sh_size;
+      }
+      if (debug_logging_) {
+        LOG(INFO) << "Section " << section->GetName() << ":" << std::hex
+                  << " offset=0x" << header->sh_offset
+                  << " addr=0x" << header->sh_addr
+                  << " size=0x" << header->sh_size;
+      }
+      // Collect section headers into continuous array for convenience.
+      section_headers.push_back(*header);
     }
-    // If the sections exist, check that they have been handled.
-    const auto* eh_frame = FindRawSection(".eh_frame");
+    Elf_Off section_headers_offset = RoundUp(file_offset, sizeof(Elf_Word));
+
+    // Create program headers now that we know the layout of the whole file.
+    // Each segment contains one or more sections which are mapped together.
+    // Not all sections are mapped during the execution of the program.
+    // PT_LOAD does the mapping.  Other PT_* types allow the program to locate
+    // interesting parts of memory and their addresses overlap with PT_LOAD.
+    std::vector<Elf_Phdr> program_headers;
+    program_headers.push_back(MakeProgramHeader(PT_PHDR, PF_R,
+      kProgramHeadersOffset, kProgramHeadersSize, sizeof(Elf_Word)));
+    // Create the main LOAD R segment which spans all sections up to .rodata.
+    const Elf_Shdr* rodata = rodata_.GetHeader();
+    program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R,
+      0, rodata->sh_offset + rodata->sh_size, rodata->sh_addralign));
+    program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R | PF_X, text_));
+    if (bss_.GetHeader()->sh_size != 0u) {
+      program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R | PF_W, bss_));
+    }
+    program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R | PF_W, dynamic_));
+    program_headers.push_back(MakeProgramHeader(PT_DYNAMIC, PF_R | PF_W, dynamic_));
+    const Section* eh_frame = FindSection(".eh_frame");
     if (eh_frame != nullptr) {
-      DCHECK_NE(eh_frame->GetSection()->sh_offset, 0u);
-    }
-    const auto* eh_frame_hdr = FindRawSection(".eh_frame_hdr");
-    if (eh_frame_hdr != nullptr) {
-      DCHECK_NE(eh_frame_hdr->GetSection()->sh_offset, 0u);
-    }
-
-    // Get the layout of the rodata section.
-    rodata_builder_.GetSection()->sh_offset =
-        NextOffset<Elf_Word, Elf_Shdr>(*rodata_builder_.GetSection(), *prev);
-    rodata_builder_.GetSection()->sh_addr = rodata_builder_.GetSection()->sh_offset;
-    rodata_builder_.GetSection()->sh_size = rodata_builder_.GetSize();
-    rodata_builder_.GetSection()->sh_link = rodata_builder_.GetLink();
-
-    // Get the layout of the text section.
-    text_builder_.GetSection()->sh_offset =
-        NextOffset<Elf_Word, Elf_Shdr>(*text_builder_.GetSection(),
-                                       *rodata_builder_.GetSection());
-    text_builder_.GetSection()->sh_addr = text_builder_.GetSection()->sh_offset;
-    text_builder_.GetSection()->sh_size = text_builder_.GetSize();
-    text_builder_.GetSection()->sh_link = text_builder_.GetLink();
-    CHECK_ALIGNED(rodata_builder_.GetSection()->sh_offset +
-                  rodata_builder_.GetSection()->sh_size, kPageSize);
-
-    // Get the layout of the .bss section.
-    bss_builder_.GetSection()->sh_offset =
-        NextOffset<Elf_Word, Elf_Shdr>(*bss_builder_.GetSection(),
-                                       *text_builder_.GetSection());
-    bss_builder_.GetSection()->sh_addr = bss_builder_.GetSection()->sh_offset;
-    bss_builder_.GetSection()->sh_size = bss_builder_.GetSize();
-    bss_builder_.GetSection()->sh_link = bss_builder_.GetLink();
-
-    // Get the layout of the dynamic section.
-    CHECK(IsAlignedParam(bss_builder_.GetSection()->sh_offset,
-                         dynamic_builder_.GetSection()->sh_addralign));
-    dynamic_builder_.GetSection()->sh_offset = bss_builder_.GetSection()->sh_offset;
-    dynamic_builder_.GetSection()->sh_addr =
-        NextOffset<Elf_Word, Elf_Shdr>(*dynamic_builder_.GetSection(), *bss_builder_.GetSection());
-    dynamic_builder_.GetSection()->sh_size = dynamic_builder_.GetSize() * sizeof(Elf_Dyn);
-    dynamic_builder_.GetSection()->sh_link = dynamic_builder_.GetLink();
-
-    if (debug_logging_) {
-      LOG(INFO) << "dynsym off=" << dynsym_builder_.GetSection()->sh_offset
-                << " dynsym size=" << dynsym_builder_.GetSection()->sh_size;
-      LOG(INFO) << "dynstr off=" << dynsym_builder_.GetStrTab()->GetSection()->sh_offset
-                << " dynstr size=" << dynsym_builder_.GetStrTab()->GetSection()->sh_size;
-      LOG(INFO) << "hash off=" << hash_builder_.GetSection()->sh_offset
-                << " hash size=" << hash_builder_.GetSection()->sh_size;
-      LOG(INFO) << "rodata off=" << rodata_builder_.GetSection()->sh_offset
-                << " rodata size=" << rodata_builder_.GetSection()->sh_size;
-      LOG(INFO) << "text off=" << text_builder_.GetSection()->sh_offset
-                << " text size=" << text_builder_.GetSection()->sh_size;
-      LOG(INFO) << "dynamic off=" << dynamic_builder_.GetSection()->sh_offset
-                << " dynamic size=" << dynamic_builder_.GetSection()->sh_size;
-    }
-
-    return true;
-  }
-
-  bool Write() {
-    std::vector<ElfFilePiece<Elf_Word>*> pieces;
-    Elf_Shdr* prev = dynamic_builder_.GetSection();
-    std::string strtab;
-
-    if (IncludingDebugSymbols()) {
-      // Setup .symtab
-      section_ptrs_.push_back(symtab_builder_.GetSection());
-      AssignSectionStr(&symtab_builder_, &shstrtab_);
-      symtab_builder_.SetSectionIndex(section_index_);
-      section_index_++;
-
-      // Setup .strtab
-      section_ptrs_.push_back(symtab_builder_.GetStrTab()->GetSection());
-      AssignSectionStr(symtab_builder_.GetStrTab(), &shstrtab_);
-      symtab_builder_.GetStrTab()->SetSectionIndex(section_index_);
-      section_index_++;
-
-      strtab = symtab_builder_.GenerateStrtab();
-      if (debug_logging_) {
-        LOG(INFO) << "strtab size (bytes)    =" << strtab.size()
-                  << std::hex << " " << strtab.size();
-        LOG(INFO) << "symtab size (elements) =" << symtab_builder_.GetSize()
-                  << std::hex << " " << symtab_builder_.GetSize();
+      program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R, *eh_frame));
+      const Section* eh_frame_hdr = FindSection(".eh_frame_hdr");
+      if (eh_frame_hdr != nullptr) {
+        // Check layout: eh_frame is before eh_frame_hdr and there is no gap.
+        CHECK_LE(eh_frame->GetHeader()->sh_offset, eh_frame_hdr->GetHeader()->sh_offset);
+        CHECK_EQ(eh_frame->GetHeader()->sh_offset + eh_frame->GetHeader()->sh_size,
+                 eh_frame_hdr->GetHeader()->sh_offset);
+        // Extend the PT_LOAD of .eh_frame to include the .eh_frame_hdr as well.
+        program_headers.back().p_filesz += eh_frame_hdr->GetHeader()->sh_size;
+        program_headers.back().p_memsz  += eh_frame_hdr->GetHeader()->sh_size;
+        program_headers.push_back(MakeProgramHeader(PT_GNU_EH_FRAME, PF_R, *eh_frame_hdr));
       }
     }
+    CHECK_LE(program_headers.size(), kMaxProgramHeaders);
 
-    // Setup all the other sections.
-    for (auto* builder : other_builders_) {
-      section_ptrs_.push_back(builder->GetSection());
-      AssignSectionStr(builder, &shstrtab_);
-      builder->SetSectionIndex(section_index_);
-      section_index_++;
-    }
+    // Create the main ELF header.
+    Elf_Ehdr elf_header = MakeElfHeader(isa_);
+    elf_header.e_phoff = kProgramHeadersOffset;
+    elf_header.e_shoff = section_headers_offset;
+    elf_header.e_phnum = program_headers.size();
+    elf_header.e_shnum = section_headers.size();
+    elf_header.e_shstrndx = shstrtab_.GetSectionIndex();
 
-    // Setup shstrtab
-    section_ptrs_.push_back(shstrtab_builder_.GetSection());
-    AssignSectionStr(&shstrtab_builder_, &shstrtab_);
-    shstrtab_builder_.SetSectionIndex(section_index_);
-    section_index_++;
-
-    if (debug_logging_) {
-      LOG(INFO) << ".shstrtab size    (bytes)   =" << shstrtab_.size()
-                << std::hex << " " << shstrtab_.size();
-      LOG(INFO) << "section list size (elements)=" << section_ptrs_.size()
-                << std::hex << " " << section_ptrs_.size();
-    }
-
-    if (IncludingDebugSymbols()) {
-      // Get the layout of the symtab section.
-      symtab_builder_.GetSection()->sh_offset =
-          NextOffset<Elf_Word, Elf_Shdr>(*symtab_builder_.GetSection(),
-                                         *dynamic_builder_.GetSection());
-      symtab_builder_.GetSection()->sh_addr = 0;
-      // Add to leave space for the null symbol.
-      symtab_builder_.GetSection()->sh_size = symtab_builder_.GetSize() * sizeof(Elf_Sym);
-      symtab_builder_.GetSection()->sh_link = symtab_builder_.GetLink();
-
-      // Get the layout of the dynstr section.
-      symtab_builder_.GetStrTab()->GetSection()->sh_offset =
-          NextOffset<Elf_Word, Elf_Shdr>(*symtab_builder_.GetStrTab()->GetSection(),
-                                         *symtab_builder_.GetSection());
-      symtab_builder_.GetStrTab()->GetSection()->sh_addr = 0;
-      symtab_builder_.GetStrTab()->GetSection()->sh_size = strtab.size();
-      symtab_builder_.GetStrTab()->GetSection()->sh_link = symtab_builder_.GetStrTab()->GetLink();
-
-      prev = symtab_builder_.GetStrTab()->GetSection();
-      if (debug_logging_) {
-        LOG(INFO) << "symtab off=" << symtab_builder_.GetSection()->sh_offset
-                  << " symtab size=" << symtab_builder_.GetSection()->sh_size;
-        LOG(INFO) << "strtab off=" << symtab_builder_.GetStrTab()->GetSection()->sh_offset
-                  << " strtab size=" << symtab_builder_.GetStrTab()->GetSection()->sh_size;
-      }
-    }
-
-    // Get the layout of the extra sections without SHF_ALLOC flag.
-    // (This will deal with the debug sections if they are there)
-    for (auto* it : other_builders_) {
-      if ((it->GetSection()->sh_flags & SHF_ALLOC) == 0) {
-        it->GetSection()->sh_offset = NextOffset<Elf_Word, Elf_Shdr>(*it->GetSection(), *prev);
-        it->GetSection()->sh_addr = 0;
-        it->GetSection()->sh_size = it->GetBuffer()->size();
-        it->GetSection()->sh_link = it->GetLink();
-
-        // We postpone adding an ElfFilePiece to keep the order in "pieces."
-
-        prev = it->GetSection();
-        if (debug_logging_) {
-          LOG(INFO) << it->GetName() << " off=" << it->GetSection()->sh_offset
-                    << " size=" << it->GetSection()->sh_size;
-        }
-      }
-    }
-
-    // Get the layout of the shstrtab section
-    shstrtab_builder_.GetSection()->sh_offset =
-        NextOffset<Elf_Word, Elf_Shdr>(*shstrtab_builder_.GetSection(), *prev);
-    shstrtab_builder_.GetSection()->sh_addr = 0;
-    shstrtab_builder_.GetSection()->sh_size = shstrtab_.size();
-    shstrtab_builder_.GetSection()->sh_link = shstrtab_builder_.GetLink();
-    if (debug_logging_) {
-        LOG(INFO) << "shstrtab off=" << shstrtab_builder_.GetSection()->sh_offset
-                  << " shstrtab size=" << shstrtab_builder_.GetSection()->sh_size;
-    }
-
-    // The section list comes after come after.
-    Elf_Word sections_offset = RoundUp(
-        shstrtab_builder_.GetSection()->sh_offset + shstrtab_builder_.GetSection()->sh_size,
-        sizeof(Elf_Word));
-
-    // Setup the actual symbol arrays.
-    std::vector<Elf_Sym> dynsym = dynsym_builder_.GenerateSymtab();
-    CHECK_EQ(dynsym.size() * sizeof(Elf_Sym), dynsym_builder_.GetSection()->sh_size);
-    std::vector<Elf_Sym> symtab;
-    if (IncludingDebugSymbols()) {
-      symtab = symtab_builder_.GenerateSymtab();
-      CHECK_EQ(symtab.size() * sizeof(Elf_Sym), symtab_builder_.GetSection()->sh_size);
-    }
-
-    // Setup the dynamic section.
-    // This will add the 2 values we cannot know until now time, namely the size
-    // and the soname_offset.
-    std::vector<Elf_Dyn> dynamic = dynamic_builder_.GetDynamics(dynstr_.size(),
-                                                                  dynstr_soname_offset_);
-    CHECK_EQ(dynamic.size() * sizeof(Elf_Dyn), dynamic_builder_.GetSection()->sh_size);
-
-    // Finish setup of the program headers now that we know the layout of the
-    // whole file.
-    Elf_Word load_r_size =
-        rodata_builder_.GetSection()->sh_offset + rodata_builder_.GetSection()->sh_size;
-    program_headers_[PH_LOAD_R__].p_filesz = load_r_size;
-    program_headers_[PH_LOAD_R__].p_memsz =  load_r_size;
-    program_headers_[PH_LOAD_R__].p_align =  rodata_builder_.GetSection()->sh_addralign;
-
-    Elf_Word load_rx_size = text_builder_.GetSection()->sh_size;
-    program_headers_[PH_LOAD_R_X].p_offset = text_builder_.GetSection()->sh_offset;
-    program_headers_[PH_LOAD_R_X].p_vaddr  = text_builder_.GetSection()->sh_offset;
-    program_headers_[PH_LOAD_R_X].p_paddr  = text_builder_.GetSection()->sh_offset;
-    program_headers_[PH_LOAD_R_X].p_filesz = load_rx_size;
-    program_headers_[PH_LOAD_R_X].p_memsz  = load_rx_size;
-    program_headers_[PH_LOAD_R_X].p_align  = text_builder_.GetSection()->sh_addralign;
-
-    program_headers_[PH_LOAD_RW_BSS].p_offset = bss_builder_.GetSection()->sh_offset;
-    program_headers_[PH_LOAD_RW_BSS].p_vaddr  = bss_builder_.GetSection()->sh_offset;
-    program_headers_[PH_LOAD_RW_BSS].p_paddr  = bss_builder_.GetSection()->sh_offset;
-    program_headers_[PH_LOAD_RW_BSS].p_filesz = 0;
-    program_headers_[PH_LOAD_RW_BSS].p_memsz  = bss_builder_.GetSection()->sh_size;
-    program_headers_[PH_LOAD_RW_BSS].p_align  = bss_builder_.GetSection()->sh_addralign;
-
-    program_headers_[PH_LOAD_RW_DYNAMIC].p_offset = dynamic_builder_.GetSection()->sh_offset;
-    program_headers_[PH_LOAD_RW_DYNAMIC].p_vaddr  = dynamic_builder_.GetSection()->sh_addr;
-    program_headers_[PH_LOAD_RW_DYNAMIC].p_paddr  = dynamic_builder_.GetSection()->sh_addr;
-    program_headers_[PH_LOAD_RW_DYNAMIC].p_filesz = dynamic_builder_.GetSection()->sh_size;
-    program_headers_[PH_LOAD_RW_DYNAMIC].p_memsz  = dynamic_builder_.GetSection()->sh_size;
-    program_headers_[PH_LOAD_RW_DYNAMIC].p_align  = dynamic_builder_.GetSection()->sh_addralign;
-
-    program_headers_[PH_DYNAMIC].p_offset = dynamic_builder_.GetSection()->sh_offset;
-    program_headers_[PH_DYNAMIC].p_vaddr  = dynamic_builder_.GetSection()->sh_addr;
-    program_headers_[PH_DYNAMIC].p_paddr  = dynamic_builder_.GetSection()->sh_addr;
-    program_headers_[PH_DYNAMIC].p_filesz = dynamic_builder_.GetSection()->sh_size;
-    program_headers_[PH_DYNAMIC].p_memsz  = dynamic_builder_.GetSection()->sh_size;
-    program_headers_[PH_DYNAMIC].p_align  = dynamic_builder_.GetSection()->sh_addralign;
-
-    const auto* eh_frame_hdr = FindRawSection(".eh_frame_hdr");
-    if (eh_frame_hdr != nullptr) {
-      const auto* eh_frame = FindRawSection(".eh_frame");
-      // Check layout:
-      // 1) eh_frame is before eh_frame_hdr.
-      // 2) There's no gap.
-      CHECK(eh_frame != nullptr);
-      CHECK_LE(eh_frame->GetSection()->sh_offset, eh_frame_hdr->GetSection()->sh_offset);
-      CHECK_EQ(eh_frame->GetSection()->sh_offset + eh_frame->GetSection()->sh_size,
-               eh_frame_hdr->GetSection()->sh_offset);
-
-      program_headers_[PH_EH_FRAME_HDR].p_type   = PT_GNU_EH_FRAME;
-      program_headers_[PH_EH_FRAME_HDR].p_offset = eh_frame_hdr->GetSection()->sh_offset;
-      program_headers_[PH_EH_FRAME_HDR].p_vaddr  = eh_frame_hdr->GetSection()->sh_addr;
-      program_headers_[PH_EH_FRAME_HDR].p_paddr  = eh_frame_hdr->GetSection()->sh_addr;
-      program_headers_[PH_EH_FRAME_HDR].p_filesz = eh_frame_hdr->GetSection()->sh_size;
-      program_headers_[PH_EH_FRAME_HDR].p_memsz  = eh_frame_hdr->GetSection()->sh_size;
-      program_headers_[PH_EH_FRAME_HDR].p_align  = eh_frame_hdr->GetSection()->sh_addralign;
-    }
-
-    // Finish setup of the Ehdr values.
-    elf_header_.e_phoff = PHDR_OFFSET;
-    elf_header_.e_shoff = sections_offset;
-    elf_header_.e_phnum = (bss_builder_.GetSection()->sh_size != 0u) ? PH_NUM : PH_NUM - 1;
-    elf_header_.e_shnum = section_ptrs_.size();
-    elf_header_.e_shstrndx = shstrtab_builder_.GetSectionIndex();
-
-    // Add the rest of the pieces to the list.
-    pieces.push_back(new ElfFileMemoryPiece<Elf_Word>("Elf Header", 0, &elf_header_,
-                                                      sizeof(elf_header_)));
-    if (bss_builder_.GetSection()->sh_size != 0u) {
-      pieces.push_back(new ElfFileMemoryPiece<Elf_Word>("Program headers", PHDR_OFFSET,
-                                                        &program_headers_[0],
-                                                        elf_header_.e_phnum * sizeof(Elf_Phdr)));
-    } else {
-      // Skip PH_LOAD_RW_BSS.
-      Elf_Word part1_size = PH_LOAD_RW_BSS * sizeof(Elf_Phdr);
-      Elf_Word part2_size = (PH_NUM - PH_LOAD_RW_BSS - 1) * sizeof(Elf_Phdr);
-      CHECK_EQ(part1_size + part2_size, elf_header_.e_phnum * sizeof(Elf_Phdr));
-      pieces.push_back(new ElfFileMemoryPiece<Elf_Word>("Program headers", PHDR_OFFSET,
-                                                        &program_headers_[0], part1_size));
-      pieces.push_back(new ElfFileMemoryPiece<Elf_Word>("Program headers part 2",
-                                                        PHDR_OFFSET + part1_size,
-                                                        &program_headers_[PH_LOAD_RW_BSS + 1],
-                                                        part2_size));
-    }
-    pieces.push_back(new ElfFileMemoryPiece<Elf_Word>(".dynamic",
-                                                      dynamic_builder_.GetSection()->sh_offset,
-                                                      dynamic.data(),
-                                                      dynamic_builder_.GetSection()->sh_size));
-    pieces.push_back(new ElfFileMemoryPiece<Elf_Word>(".dynsym", dynsym_builder_.GetSection()->sh_offset,
-                                                      dynsym.data(),
-                                                      dynsym.size() * sizeof(Elf_Sym)));
-    pieces.push_back(new ElfFileMemoryPiece<Elf_Word>(".dynstr",
-                                                    dynsym_builder_.GetStrTab()->GetSection()->sh_offset,
-                                                    dynstr_.c_str(), dynstr_.size()));
-    pieces.push_back(new ElfFileMemoryPiece<Elf_Word>(".hash", hash_builder_.GetSection()->sh_offset,
-                                                      hash_.data(),
-                                                      hash_.size() * sizeof(Elf_Word)));
-    pieces.push_back(new ElfFileRodataPiece<Elf_Word>(rodata_builder_.GetSection()->sh_offset,
-                                                      oat_writer_));
-    pieces.push_back(new ElfFileOatTextPiece<Elf_Word>(text_builder_.GetSection()->sh_offset,
-                                                       oat_writer_));
-    if (IncludingDebugSymbols()) {
-      pieces.push_back(new ElfFileMemoryPiece<Elf_Word>(".symtab",
-                                                        symtab_builder_.GetSection()->sh_offset,
-                                                        symtab.data(),
-                                                        symtab.size() * sizeof(Elf_Sym)));
-      pieces.push_back(new ElfFileMemoryPiece<Elf_Word>(".strtab",
-                                                    symtab_builder_.GetStrTab()->GetSection()->sh_offset,
-                                                    strtab.c_str(), strtab.size()));
-    }
-    pieces.push_back(new ElfFileMemoryPiece<Elf_Word>(".shstrtab",
-                                                      shstrtab_builder_.GetSection()->sh_offset,
-                                                      &shstrtab_[0], shstrtab_.size()));
-    for (uint32_t i = 0; i < section_ptrs_.size(); ++i) {
-      // Just add all the sections in induvidually since they are all over the
-      // place on the heap/stack.
-      Elf_Word cur_off = sections_offset + i * sizeof(Elf_Shdr);
-      pieces.push_back(new ElfFileMemoryPiece<Elf_Word>("section table piece", cur_off,
-                                                        section_ptrs_[i], sizeof(Elf_Shdr)));
-    }
-
-    // Postponed debug info.
-    for (auto* it : other_builders_) {
-      pieces.push_back(new ElfFileMemoryPiece<Elf_Word>(it->GetName(), it->GetSection()->sh_offset,
-                                                        it->GetBuffer()->data(),
-                                                        it->GetBuffer()->size()));
-    }
-
-    if (!WriteOutFile(pieces)) {
-      LOG(ERROR) << "Unable to write to file " << elf_file_->GetPath();
-
-      STLDeleteElements(&pieces);  // Have to manually clean pieces.
+    // Write all headers and section content to the file.
+    // Depending on the implementations of Section::Write, this
+    // might be just memory copies or some more elaborate operations.
+    if (!WriteArray(elf_file, &elf_header, 1)) {
+      LOG(INFO) << "Failed to write the ELF header";
       return false;
     }
-
-    STLDeleteElements(&pieces);  // Have to manually clean pieces.
+    if (!WriteArray(elf_file, program_headers.data(), program_headers.size())) {
+      LOG(INFO) << "Failed to write the program headers";
+      return false;
+    }
+    for (Section* section : sections) {
+      const Elf_Shdr* header = section->GetHeader();
+      if (header->sh_type != SHT_NOBITS) {
+        if (!SeekTo(elf_file, header->sh_offset) || !section->Write(elf_file)) {
+          LOG(INFO) << "Failed to write section " << section->GetName();
+          return false;
+        }
+        Elf_Word current_offset = lseek(elf_file->Fd(), 0, SEEK_CUR);
+        CHECK_EQ(current_offset, header->sh_offset + header->sh_size)
+          << "The number of bytes written does not match GetSize()";
+      }
+    }
+    if (!SeekTo(elf_file, section_headers_offset) ||
+        !WriteArray(elf_file, section_headers.data(), section_headers.size())) {
+      LOG(INFO) << "Failed to write the section headers";
+      return false;
+    }
     return true;
   }
 
-  // Adds the given raw section to the builder.  It does not take ownership.
-  void RegisterRawSection(ElfRawSectionBuilder<ElfTypes>* bld) {
-    other_builders_.push_back(bld);
+  // Adds the given section to the builder.  It does not take ownership.
+  void RegisterSection(Section* section) {
+    other_sections_.push_back(section);
   }
 
-  const ElfRawSectionBuilder<ElfTypes>* FindRawSection(const char* name) {
-    for (const auto* other_builder : other_builders_) {
-      if (other_builder->GetName() == name) {
-        return other_builder;
+  const Section* FindSection(const char* name) {
+    for (const auto* section : other_sections_) {
+      if (section->GetName() == name) {
+        return section;
       }
     }
     return nullptr;
   }
 
  private:
-  void SetISA(InstructionSet isa) {
+  static bool SeekTo(File* elf_file, Elf_Word offset) {
+    DCHECK_LE(lseek(elf_file->Fd(), 0, SEEK_CUR), static_cast<off_t>(offset))
+      << "Seeking backwards";
+    if (static_cast<off_t>(offset) != lseek(elf_file->Fd(), offset, SEEK_SET)) {
+      PLOG(ERROR) << "Failed to seek in file " << elf_file->GetPath();
+      return false;
+    }
+    return true;
+  }
+
+  template<typename T>
+  static bool WriteArray(File* elf_file, const T* data, size_t count) {
+    DCHECK(data != nullptr);
+    if (!elf_file->WriteFully(data, count * sizeof(T))) {
+      PLOG(ERROR) << "Failed to write to file " << elf_file->GetPath();
+      return false;
+    }
+    return true;
+  }
+
+  // Helper - create segment header based on memory range.
+  static Elf_Phdr MakeProgramHeader(Elf_Word type, Elf_Word flags,
+                                    Elf_Off offset, Elf_Word size, Elf_Word align) {
+    Elf_Phdr phdr = Elf_Phdr();
+    phdr.p_type    = type;
+    phdr.p_flags   = flags;
+    phdr.p_offset  = offset;
+    phdr.p_vaddr   = offset;
+    phdr.p_paddr   = offset;
+    phdr.p_filesz  = size;
+    phdr.p_memsz   = size;
+    phdr.p_align   = align;
+    return phdr;
+  }
+
+  // Helper - create segment header based on section header.
+  static Elf_Phdr MakeProgramHeader(Elf_Word type, Elf_Word flags,
+                                    const Section& section) {
+    const Elf_Shdr* shdr = section.GetHeader();
+    // Only run-time allocated sections should be in segment headers.
+    CHECK_NE(shdr->sh_flags & SHF_ALLOC, 0u);
+    Elf_Phdr phdr = Elf_Phdr();
+    phdr.p_type   = type;
+    phdr.p_flags  = flags;
+    phdr.p_offset = shdr->sh_offset;
+    phdr.p_vaddr  = shdr->sh_addr;
+    phdr.p_paddr  = shdr->sh_addr;
+    phdr.p_filesz = shdr->sh_type != SHT_NOBITS ? shdr->sh_size : 0u;
+    phdr.p_memsz  = shdr->sh_size;
+    phdr.p_align  = shdr->sh_addralign;
+    return phdr;
+  }
+
+  static Elf_Ehdr MakeElfHeader(InstructionSet isa) {
+    Elf_Ehdr elf_header = Elf_Ehdr();
     switch (isa) {
       case kArm:
         // Fall through.
       case kThumb2: {
-        elf_header_.e_machine = EM_ARM;
-        elf_header_.e_flags = EF_ARM_EABI_VER5;
+        elf_header.e_machine = EM_ARM;
+        elf_header.e_flags = EF_ARM_EABI_VER5;
         break;
       }
       case kArm64: {
-        elf_header_.e_machine = EM_AARCH64;
-        elf_header_.e_flags = 0;
+        elf_header.e_machine = EM_AARCH64;
+        elf_header.e_flags = 0;
         break;
       }
       case kX86: {
-        elf_header_.e_machine = EM_386;
-        elf_header_.e_flags = 0;
+        elf_header.e_machine = EM_386;
+        elf_header.e_flags = 0;
         break;
       }
       case kX86_64: {
-        elf_header_.e_machine = EM_X86_64;
-        elf_header_.e_flags = 0;
+        elf_header.e_machine = EM_X86_64;
+        elf_header.e_flags = 0;
         break;
       }
       case kMips: {
-        elf_header_.e_machine = EM_MIPS;
-        elf_header_.e_flags = (EF_MIPS_NOREORDER |
+        elf_header.e_machine = EM_MIPS;
+        elf_header.e_flags = (EF_MIPS_NOREORDER |
                                EF_MIPS_PIC       |
                                EF_MIPS_CPIC      |
                                EF_MIPS_ABI_O32   |
@@ -1250,147 +869,82 @@
         break;
       }
       case kMips64: {
-        elf_header_.e_machine = EM_MIPS;
-        elf_header_.e_flags = (EF_MIPS_NOREORDER |
+        elf_header.e_machine = EM_MIPS;
+        elf_header.e_flags = (EF_MIPS_NOREORDER |
                                EF_MIPS_PIC       |
                                EF_MIPS_CPIC      |
                                EF_MIPS_ARCH_64R6);
         break;
       }
-      default: {
-        fatal_error_ = true;
-        LOG(FATAL) << "Unknown instruction set: " << isa;
-        break;
+      case kNone: {
+        LOG(FATAL) << "No instruction set";
       }
     }
-  }
 
-  void SetupEhdr() {
-    memset(&elf_header_, 0, sizeof(elf_header_));
-    elf_header_.e_ident[EI_MAG0]       = ELFMAG0;
-    elf_header_.e_ident[EI_MAG1]       = ELFMAG1;
-    elf_header_.e_ident[EI_MAG2]       = ELFMAG2;
-    elf_header_.e_ident[EI_MAG3]       = ELFMAG3;
-    elf_header_.e_ident[EI_CLASS]      = (sizeof(Elf_Addr) == sizeof(Elf32_Addr))
+    elf_header.e_ident[EI_MAG0]       = ELFMAG0;
+    elf_header.e_ident[EI_MAG1]       = ELFMAG1;
+    elf_header.e_ident[EI_MAG2]       = ELFMAG2;
+    elf_header.e_ident[EI_MAG3]       = ELFMAG3;
+    elf_header.e_ident[EI_CLASS]      = (sizeof(Elf_Addr) == sizeof(Elf32_Addr))
                                          ? ELFCLASS32 : ELFCLASS64;;
-    elf_header_.e_ident[EI_DATA]       = ELFDATA2LSB;
-    elf_header_.e_ident[EI_VERSION]    = EV_CURRENT;
-    elf_header_.e_ident[EI_OSABI]      = ELFOSABI_LINUX;
-    elf_header_.e_ident[EI_ABIVERSION] = 0;
-    elf_header_.e_type = ET_DYN;
-    elf_header_.e_version = 1;
-    elf_header_.e_entry = 0;
-    elf_header_.e_ehsize = sizeof(Elf_Ehdr);
-    elf_header_.e_phentsize = sizeof(Elf_Phdr);
-    elf_header_.e_shentsize = sizeof(Elf_Shdr);
-    elf_header_.e_phoff = sizeof(Elf_Ehdr);
+    elf_header.e_ident[EI_DATA]       = ELFDATA2LSB;
+    elf_header.e_ident[EI_VERSION]    = EV_CURRENT;
+    elf_header.e_ident[EI_OSABI]      = ELFOSABI_LINUX;
+    elf_header.e_ident[EI_ABIVERSION] = 0;
+    elf_header.e_type = ET_DYN;
+    elf_header.e_version = 1;
+    elf_header.e_entry = 0;
+    elf_header.e_ehsize = sizeof(Elf_Ehdr);
+    elf_header.e_phentsize = sizeof(Elf_Phdr);
+    elf_header.e_shentsize = sizeof(Elf_Shdr);
+    elf_header.e_phoff = sizeof(Elf_Ehdr);
+    return elf_header;
   }
 
-  // Sets up a bunch of the required Dynamic Section entries.
-  // Namely it will initialize all the mandatory ones that it can.
-  // Specifically:
-  // DT_HASH
-  // DT_STRTAB
-  // DT_SYMTAB
-  // DT_SYMENT
-  //
-  // Some such as DT_SONAME, DT_STRSZ and DT_NULL will be put in later.
-  void SetupDynamic() {
-    dynamic_builder_.AddDynamicTag(DT_HASH, 0, &hash_builder_);
-    dynamic_builder_.AddDynamicTag(DT_STRTAB, 0, dynsym_builder_.GetStrTab());
-    dynamic_builder_.AddDynamicTag(DT_SYMTAB, 0, &dynsym_builder_);
-    dynamic_builder_.AddDynamicTag(DT_SYMENT, sizeof(Elf_Sym));
+  void BuildDynamicSection(const std::string& elf_file_path) {
+    std::string soname(elf_file_path);
+    size_t directory_separator_pos = soname.rfind('/');
+    if (directory_separator_pos != std::string::npos) {
+      soname = soname.substr(directory_separator_pos + 1);
+    }
+    // NB: We must add the name before adding DT_STRSZ.
+    Elf_Word soname_offset = dynstr_.AddName(soname);
+
+    dynamic_.AddDynamicTag(DT_HASH, 0, &hash_);
+    dynamic_.AddDynamicTag(DT_STRTAB, 0, &dynstr_);
+    dynamic_.AddDynamicTag(DT_SYMTAB, 0, &dynsym_);
+    dynamic_.AddDynamicTag(DT_SYMENT, sizeof(Elf_Sym), nullptr);
+    dynamic_.AddDynamicTag(DT_STRSZ, dynstr_.GetSize(), nullptr);
+    dynamic_.AddDynamicTag(DT_SONAME, soname_offset, nullptr);
   }
 
-  // Sets up the basic dynamic symbols that are needed, namely all those we
-  // can know already.
-  //
-  // Specifically adds:
-  // oatdata
-  // oatexec
-  // oatlastword
-  void SetupRequiredSymbols() {
-    dynsym_builder_.AddSymbol("oatdata", &rodata_builder_, 0, true,
-                              rodata_builder_.GetSize(), STB_GLOBAL, STT_OBJECT);
-    dynsym_builder_.AddSymbol("oatexec", &text_builder_, 0, true,
-                              text_builder_.GetSize(), STB_GLOBAL, STT_OBJECT);
-    dynsym_builder_.AddSymbol("oatlastword", &text_builder_, text_builder_.GetSize() - 4,
-                              true, 4, STB_GLOBAL, STT_OBJECT);
-    if (bss_builder_.GetSize() != 0u) {
-      dynsym_builder_.AddSymbol("oatbss", &bss_builder_, 0, true,
-                                bss_builder_.GetSize(), STB_GLOBAL, STT_OBJECT);
-      dynsym_builder_.AddSymbol("oatbsslastword", &bss_builder_, bss_builder_.GetSize() - 4,
-                                true, 4, STB_GLOBAL, STT_OBJECT);
+  void BuildDynsymSection() {
+    dynsym_.AddSymbol("oatdata", &rodata_, 0, true,
+                      rodata_.GetSize(), STB_GLOBAL, STT_OBJECT);
+    dynsym_.AddSymbol("oatexec", &text_, 0, true,
+                      text_.GetSize(), STB_GLOBAL, STT_OBJECT);
+    dynsym_.AddSymbol("oatlastword", &text_, text_.GetSize() - 4,
+                      true, 4, STB_GLOBAL, STT_OBJECT);
+    if (bss_.GetSize() != 0u) {
+      dynsym_.AddSymbol("oatbss", &bss_, 0, true,
+                        bss_.GetSize(), STB_GLOBAL, STT_OBJECT);
+      dynsym_.AddSymbol("oatbsslastword", &bss_, bss_.GetSize() - 4,
+                        true, 4, STB_GLOBAL, STT_OBJECT);
     }
   }
 
-  void AssignSectionStr(ElfSectionBuilder<ElfTypes>* builder, std::string* strtab) {
-    builder->GetSection()->sh_name = strtab->size();
-    *strtab += builder->GetName();
-    *strtab += '\0';
-    if (debug_logging_) {
-      LOG(INFO) << "adding section name \"" << builder->GetName() << "\" "
-                << "to shstrtab at offset " << builder->GetSection()->sh_name;
-    }
-  }
-
-
-  // Write each of the pieces out to the file.
-  bool WriteOutFile(const std::vector<ElfFilePiece<Elf_Word>*>& pieces) {
-    for (auto it = pieces.begin(); it != pieces.end(); ++it) {
-      if (!(*it)->Write(elf_file_)) {
-        return false;
-      }
-    }
-    return true;
-  }
-
-  bool IncludingDebugSymbols() const {
-    return add_symbols_ && symtab_builder_.GetSize() > 1;
-  }
-
-  CodeOutput* const oat_writer_;
-  File* const elf_file_;
-  const bool add_symbols_;
-  const bool debug_logging_;
-
-  bool fatal_error_ = false;
-
-  // What phdr is.
-  static const uint32_t PHDR_OFFSET = sizeof(Elf_Ehdr);
-  enum : uint8_t {
-    PH_PHDR             = 0,
-    PH_LOAD_R__         = 1,
-    PH_LOAD_R_X         = 2,
-    PH_LOAD_RW_BSS      = 3,
-    PH_LOAD_RW_DYNAMIC  = 4,
-    PH_DYNAMIC          = 5,
-    PH_EH_FRAME_HDR     = 6,
-    PH_NUM              = 7,
-  };
-  static const uint32_t PHDR_SIZE = sizeof(Elf_Phdr) * PH_NUM;
-  Elf_Phdr program_headers_[PH_NUM];
-
-  Elf_Ehdr elf_header_;
-
-  Elf_Shdr null_hdr_;
-  std::string shstrtab_;
-  // The index of the current section being built. The first being 1.
-  uint32_t section_index_;
-  std::string dynstr_;
-  uint32_t dynstr_soname_offset_;
-  std::vector<const Elf_Shdr*> section_ptrs_;
-  std::vector<Elf_Word> hash_;
-
-  ElfOatSectionBuilder<ElfTypes> text_builder_;
-  ElfOatSectionBuilder<ElfTypes> rodata_builder_;
-  ElfOatSectionBuilder<ElfTypes> bss_builder_;
-  ElfSymtabBuilder<ElfTypes> dynsym_builder_;
-  ElfSymtabBuilder<ElfTypes> symtab_builder_;
-  ElfSectionBuilder<ElfTypes> hash_builder_;
-  ElfDynamicBuilder<ElfTypes> dynamic_builder_;
-  ElfSectionBuilder<ElfTypes> shstrtab_builder_;
-  std::vector<ElfRawSectionBuilder<ElfTypes>*> other_builders_;
+  InstructionSet isa_;
+  StrtabSection dynstr_;
+  SymtabSection dynsym_;
+  HashSection hash_;
+  OatSection rodata_;
+  OatSection text_;
+  NoBitsSection bss_;
+  DynamicSection dynamic_;
+  StrtabSection strtab_;
+  SymtabSection symtab_;
+  std::vector<Section*> other_sections_;
+  StrtabSection shstrtab_;
 
   DISALLOW_COPY_AND_ASSIGN(ElfBuilder);
 };
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index 3b2ca94..178aa03 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -21,7 +21,6 @@
 
 #include "base/logging.h"
 #include "base/unix_file/fd_file.h"
-#include "buffered_output_stream.h"
 #include "compiled_method.h"
 #include "dex_file-inl.h"
 #include "driver/compiler_driver.h"
@@ -30,7 +29,6 @@
 #include "elf_file.h"
 #include "elf_utils.h"
 #include "elf_writer_debug.h"
-#include "file_output_stream.h"
 #include "globals.h"
 #include "leb128.h"
 #include "oat.h"
@@ -50,20 +48,6 @@
   return elf_writer.Write(oat_writer, dex_files, android_root, is_host);
 }
 
-class OatWriterWrapper FINAL : public CodeOutput {
- public:
-  explicit OatWriterWrapper(OatWriter* oat_writer) : oat_writer_(oat_writer) {}
-
-  void SetCodeOffset(size_t offset) {
-    oat_writer_->SetOatDataOffset(offset);
-  }
-  bool Write(OutputStream* out) OVERRIDE {
-    return oat_writer_->Write(out);
-  }
- private:
-  OatWriter* const oat_writer_;
-};
-
 template <typename ElfTypes>
 static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder, OatWriter* oat_writer);
 
@@ -99,18 +83,29 @@
   buffer->push_back(0);  // End of sections.
 }
 
-template<typename AddressType, bool SubtractPatchLocation = false>
-static void PatchAddresses(const std::vector<uintptr_t>* patch_locations,
-                           AddressType delta, std::vector<uint8_t>* buffer) {
-  // Addresses in .debug_* sections are unaligned.
-  typedef __attribute__((__aligned__(1))) AddressType UnalignedAddressType;
-  if (patch_locations != nullptr) {
-    for (uintptr_t patch_location : *patch_locations) {
-      *reinterpret_cast<UnalignedAddressType*>(buffer->data() + patch_location) +=
-          delta - (SubtractPatchLocation ? patch_location : 0);
-    }
+class RodataWriter FINAL : public CodeOutput {
+ public:
+  explicit RodataWriter(OatWriter* oat_writer) : oat_writer_(oat_writer) {}
+
+  bool Write(OutputStream* out) OVERRIDE {
+    return oat_writer_->WriteRodata(out);
   }
-}
+
+ private:
+  OatWriter* oat_writer_;
+};
+
+class TextWriter FINAL : public CodeOutput {
+ public:
+  explicit TextWriter(OatWriter* oat_writer) : oat_writer_(oat_writer) {}
+
+  bool Write(OutputStream* out) OVERRIDE {
+    return oat_writer_->WriteCode(out);
+  }
+
+ private:
+  OatWriter* oat_writer_;
+};
 
 template <typename ElfTypes>
 bool ElfWriterQuick<ElfTypes>::Write(
@@ -118,106 +113,81 @@
     const std::vector<const DexFile*>& dex_files_unused ATTRIBUTE_UNUSED,
     const std::string& android_root_unused ATTRIBUTE_UNUSED,
     bool is_host_unused ATTRIBUTE_UNUSED) {
-  constexpr bool debug = false;
-  const OatHeader& oat_header = oat_writer->GetOatHeader();
-  typename ElfTypes::Word oat_data_size = oat_header.GetExecutableOffset();
-  uint32_t oat_exec_size = oat_writer->GetSize() - oat_data_size;
-  uint32_t oat_bss_size = oat_writer->GetBssSize();
+  const InstructionSet isa = compiler_driver_->GetInstructionSet();
 
-  OatWriterWrapper wrapper(oat_writer);
-
+  // Setup the builder with the main OAT sections (.rodata .text .bss).
+  const size_t rodata_size = oat_writer->GetOatHeader().GetExecutableOffset();
+  const size_t text_size = oat_writer->GetSize() - rodata_size;
+  const size_t bss_size = oat_writer->GetBssSize();
+  RodataWriter rodata_writer(oat_writer);
+  TextWriter text_writer(oat_writer);
   std::unique_ptr<ElfBuilder<ElfTypes>> builder(new ElfBuilder<ElfTypes>(
-      &wrapper,
-      elf_file_,
-      compiler_driver_->GetInstructionSet(),
-      0,
-      oat_data_size,
-      oat_data_size,
-      oat_exec_size,
-      RoundUp(oat_data_size + oat_exec_size, kPageSize),
-      oat_bss_size,
-      compiler_driver_->GetCompilerOptions().GetIncludeDebugSymbols(),
-      debug));
+      isa, rodata_size, &rodata_writer, text_size, &text_writer, bss_size));
 
-  InstructionSet isa = compiler_driver_->GetInstructionSet();
-  int alignment = GetInstructionSetPointerSize(isa);
-  typedef ElfRawSectionBuilder<ElfTypes> RawSection;
-  RawSection eh_frame(".eh_frame", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, alignment, 0);
-  RawSection eh_frame_hdr(".eh_frame_hdr", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, 4, 0);
-  RawSection debug_info(".debug_info", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
-  RawSection debug_abbrev(".debug_abbrev", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
-  RawSection debug_str(".debug_str", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
-  RawSection debug_line(".debug_line", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
+  // Add debug sections.
+  // They are stack allocated here (in the same scope as the builder),
+  // but they are registred with the builder only if they are used.
+  using RawSection = typename ElfBuilder<ElfTypes>::RawSection;
+  const auto* text = builder->GetText();
+  constexpr bool absolute = false;  // patch to make absolute addresses.
+  constexpr bool relative = true;  // patch to make relative addresses.
+  const bool is64bit = Is64BitInstructionSet(isa);
+  RawSection eh_frame(".eh_frame", SHT_PROGBITS, SHF_ALLOC,
+                      nullptr, 0, kPageSize, 0, text, relative, is64bit);
+  RawSection eh_frame_hdr(".eh_frame_hdr", SHT_PROGBITS, SHF_ALLOC,
+                          nullptr, 0, 4, 0);
+  RawSection debug_info(".debug_info", SHT_PROGBITS, 0,
+                        nullptr, 0, 1, 0, text, absolute, false /* 32bit */);
+  RawSection debug_abbrev(".debug_abbrev", SHT_PROGBITS, 0,
+                          nullptr, 0, 1, 0);
+  RawSection debug_str(".debug_str", SHT_PROGBITS, 0,
+                       nullptr, 0, 1, 0);
+  RawSection debug_line(".debug_line", SHT_PROGBITS, 0,
+                        nullptr, 0, 1, 0, text, absolute, false /* 32bit */);
+  if (!oat_writer->GetMethodDebugInfo().empty()) {
+    if (compiler_driver_->GetCompilerOptions().GetIncludeCFI()) {
+      dwarf::WriteEhFrame(
+          compiler_driver_, oat_writer, dwarf::DW_EH_PE_pcrel,
+          eh_frame.GetBuffer(), eh_frame.GetPatchLocations(),
+          eh_frame_hdr.GetBuffer());
+      builder->RegisterSection(&eh_frame);
+      builder->RegisterSection(&eh_frame_hdr);
+    }
+    if (compiler_driver_->GetCompilerOptions().GetIncludeDebugSymbols()) {
+      // Add methods to .symtab.
+      WriteDebugSymbols(builder.get(), oat_writer);
+      // Generate DWARF .debug_* sections.
+      dwarf::WriteDebugSections(
+          compiler_driver_, oat_writer,
+          debug_info.GetBuffer(), debug_info.GetPatchLocations(),
+          debug_abbrev.GetBuffer(),
+          debug_str.GetBuffer(),
+          debug_line.GetBuffer(), debug_line.GetPatchLocations());
+      builder->RegisterSection(&debug_info);
+      builder->RegisterSection(&debug_abbrev);
+      builder->RegisterSection(&debug_str);
+      builder->RegisterSection(&debug_line);
+      *oat_writer->GetAbsolutePatchLocationsFor(".debug_info") =
+          *debug_info.GetPatchLocations();
+      *oat_writer->GetAbsolutePatchLocationsFor(".debug_line") =
+          *debug_line.GetPatchLocations();
+    }
+  }
+
+  // Add relocation section.
   RawSection oat_patches(".oat_patches", SHT_OAT_PATCH, 0, nullptr, 0, 1, 0);
-
-  // Do not add to .oat_patches since we will make the addresses relative.
-  std::vector<uintptr_t> eh_frame_patches;
-  if (compiler_driver_->GetCompilerOptions().GetIncludeCFI() &&
-      !oat_writer->GetMethodDebugInfo().empty()) {
-    dwarf::WriteEhFrame(compiler_driver_, oat_writer,
-                        dwarf::DW_EH_PE_pcrel,
-                        eh_frame.GetBuffer(), &eh_frame_patches,
-                        eh_frame_hdr.GetBuffer());
-    builder->RegisterRawSection(&eh_frame);
-    builder->RegisterRawSection(&eh_frame_hdr);
-  }
-
-  // Must be done after .eh_frame is created since it is used in the Elf layout.
-  if (!builder->Init()) {
-    return false;
-  }
-
-  std::vector<uintptr_t>* debug_info_patches = nullptr;
-  std::vector<uintptr_t>* debug_line_patches = nullptr;
-  if (compiler_driver_->GetCompilerOptions().GetIncludeDebugSymbols() &&
-      !oat_writer->GetMethodDebugInfo().empty()) {
-    // Add methods to .symtab.
-    WriteDebugSymbols(builder.get(), oat_writer);
-    // Generate DWARF .debug_* sections.
-    debug_info_patches = oat_writer->GetAbsolutePatchLocationsFor(".debug_info");
-    debug_line_patches = oat_writer->GetAbsolutePatchLocationsFor(".debug_line");
-    dwarf::WriteDebugSections(compiler_driver_, oat_writer,
-                              debug_info.GetBuffer(), debug_info_patches,
-                              debug_abbrev.GetBuffer(),
-                              debug_str.GetBuffer(),
-                              debug_line.GetBuffer(), debug_line_patches);
-    builder->RegisterRawSection(&debug_info);
-    builder->RegisterRawSection(&debug_abbrev);
-    builder->RegisterRawSection(&debug_str);
-    builder->RegisterRawSection(&debug_line);
-  }
-
   if (compiler_driver_->GetCompilerOptions().GetIncludePatchInformation() ||
       // ElfWriter::Fixup will be called regardless and it needs to be able
       // to patch debug sections so we have to include patches for them.
       compiler_driver_->GetCompilerOptions().GetIncludeDebugSymbols()) {
     EncodeOatPatches(oat_writer->GetAbsolutePatchLocations(), oat_patches.GetBuffer());
-    builder->RegisterRawSection(&oat_patches);
+    builder->RegisterSection(&oat_patches);
   }
 
-  // We know where .text and .eh_frame will be located, so patch the addresses.
-  typename ElfTypes::Addr text_addr = builder->GetTextBuilder().GetSection()->sh_addr;
-  // TODO: Simplify once we use Elf64 - we can use ElfTypes::Addr instead of branching.
-  if (Is64BitInstructionSet(compiler_driver_->GetInstructionSet())) {
-    // relative_address = (text_addr + address) - (eh_frame_addr + patch_location);
-    PatchAddresses<uint64_t, true>(&eh_frame_patches,
-        text_addr - eh_frame.GetSection()->sh_addr, eh_frame.GetBuffer());
-    PatchAddresses<uint64_t>(debug_info_patches, text_addr, debug_info.GetBuffer());
-    PatchAddresses<uint64_t>(debug_line_patches, text_addr, debug_line.GetBuffer());
-  } else {
-    // relative_address = (text_addr + address) - (eh_frame_addr + patch_location);
-    PatchAddresses<uint32_t, true>(&eh_frame_patches,
-        text_addr - eh_frame.GetSection()->sh_addr, eh_frame.GetBuffer());
-    PatchAddresses<uint32_t>(debug_info_patches, text_addr, debug_info.GetBuffer());
-    PatchAddresses<uint32_t>(debug_line_patches, text_addr, debug_line.GetBuffer());
-  }
-
-  return builder->Write();
+  return builder->Write(elf_file_);
 }
 
 template <typename ElfTypes>
-// Do not inline to avoid Clang stack frame problems. b/18738594
-NO_INLINE
 static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder, OatWriter* oat_writer) {
   const std::vector<OatWriter::DebugInfo>& method_info = oat_writer->GetMethodDebugInfo();
 
@@ -230,7 +200,7 @@
     }
   }
 
-  ElfSymtabBuilder<ElfTypes>* symtab = builder->GetSymtabBuilder();
+  auto* symtab = builder->GetSymtab();
   for (auto it = method_info.begin(); it != method_info.end(); ++it) {
     std::string name = PrettyMethod(it->dex_method_index_, *it->dex_file_, true);
     if (deduped_addresses.find(it->low_pc_) != deduped_addresses.end()) {
@@ -240,13 +210,13 @@
     uint32_t low_pc = it->low_pc_;
     // Add in code delta, e.g., thumb bit 0 for Thumb2 code.
     low_pc += it->compiled_method_->CodeDelta();
-    symtab->AddSymbol(name, &builder->GetTextBuilder(), low_pc,
+    symtab->AddSymbol(name, builder->GetText(), low_pc,
                       true, it->high_pc_ - it->low_pc_, STB_GLOBAL, STT_FUNC);
 
     // Conforming to aaelf, add $t mapping symbol to indicate start of a sequence of thumb2
     // instructions, so that disassembler tools can correctly disassemble.
     if (it->compiled_method_->GetInstructionSet() == kThumb2) {
-      symtab->AddSymbol("$t", &builder->GetTextBuilder(), it->low_pc_ & ~1, true,
+      symtab->AddSymbol("$t", builder->GetText(), it->low_pc_ & ~1, true,
                         0, STB_LOCAL, STT_NOTYPE);
     }
   }
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index fc70d8f..4dc7509 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -89,7 +89,12 @@
     Thread::Current()->TransitionFromSuspendedToRunnable();
     PruneNonImageClasses();  // Remove junk
     ComputeLazyFieldsForImageClasses();  // Add useful information
-    ProcessStrings();
+
+    // Calling this can in theory fill in some resolved strings. However, in practice it seems to
+    // never resolve any.
+    if (kComputeEagerResolvedStrings) {
+      ComputeEagerResolvedStrings();
+    }
     Thread::Current()->TransitionFromRunnableToSuspended(kNative);
   }
   gc::Heap* heap = Runtime::Current()->GetHeap();
@@ -529,14 +534,6 @@
   return true;
 }
 
-// Count the number of strings in the heap and put the result in arg as a size_t pointer.
-static void CountStringsCallback(Object* obj, void* arg)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  if (obj->GetClass()->IsStringClass()) {
-    ++*reinterpret_cast<size_t*>(arg);
-  }
-}
-
 // Collect all the java.lang.String in the heap and put them in the output strings_ array.
 class StringCollector {
  public:
@@ -566,99 +563,19 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::String* lhs_s = lhs.AsMirrorPtr();
     mirror::String* rhs_s = rhs.AsMirrorPtr();
-    uint16_t* lhs_begin = lhs_s->GetCharArray()->GetData() + lhs_s->GetOffset();
-    uint16_t* rhs_begin = rhs_s->GetCharArray()->GetData() + rhs_s->GetOffset();
+    uint16_t* lhs_begin = lhs_s->GetValue();
+    uint16_t* rhs_begin = rhs_s->GetValue();
     return std::lexicographical_compare(lhs_begin, lhs_begin + lhs_s->GetLength(),
                                         rhs_begin, rhs_begin + rhs_s->GetLength());
   }
 };
 
-static bool IsPrefix(mirror::String* pref, mirror::String* full)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  if (pref->GetLength() > full->GetLength()) {
-    return false;
-  }
-  uint16_t* pref_begin = pref->GetCharArray()->GetData() + pref->GetOffset();
-  uint16_t* full_begin = full->GetCharArray()->GetData() + full->GetOffset();
-  return std::equal(pref_begin, pref_begin + pref->GetLength(), full_begin);
-}
-
-void ImageWriter::ProcessStrings() {
-  size_t total_strings = 0;
-  gc::Heap* heap = Runtime::Current()->GetHeap();
-  ClassLinker* cl = Runtime::Current()->GetClassLinker();
-  // Count the strings.
-  heap->VisitObjects(CountStringsCallback, &total_strings);
-  Thread* self = Thread::Current();
-  StackHandleScope<1> hs(self);
-  auto strings = hs.NewHandle(cl->AllocStringArray(self, total_strings));
-  StringCollector string_collector(strings, 0U);
-  // Read strings into the array.
-  heap->VisitObjects(StringCollector::Callback, &string_collector);
-  // Some strings could have gotten freed if AllocStringArray caused a GC.
-  CHECK_LE(string_collector.GetIndex(), total_strings);
-  total_strings = string_collector.GetIndex();
-  auto* strings_begin = reinterpret_cast<mirror::HeapReference<mirror::String>*>(
-          strings->GetRawData(sizeof(mirror::HeapReference<mirror::String>), 0));
-  std::sort(strings_begin, strings_begin + total_strings, LexicographicalStringComparator());
-  // Characters of strings which are non equal prefix of another string (not the same string).
-  // We don't count the savings from equal strings since these would get interned later anyways.
-  size_t prefix_saved_chars = 0;
-  // Count characters needed for the strings.
-  size_t num_chars = 0u;
-  mirror::String* prev_s = nullptr;
-  for (size_t idx = 0; idx != total_strings; ++idx) {
-    mirror::String* s = strings->GetWithoutChecks(idx);
-    size_t length = s->GetLength();
-    num_chars += length;
-    if (prev_s != nullptr && IsPrefix(prev_s, s)) {
-      size_t prev_length = prev_s->GetLength();
-      num_chars -= prev_length;
-      if (prev_length != length) {
-        prefix_saved_chars += prev_length;
-      }
-    }
-    prev_s = s;
-  }
-  // Create character array, copy characters and point the strings there.
-  mirror::CharArray* array = mirror::CharArray::Alloc(self, num_chars);
-  string_data_array_ = array;
-  uint16_t* array_data = array->GetData();
-  size_t pos = 0u;
-  prev_s = nullptr;
-  for (size_t idx = 0; idx != total_strings; ++idx) {
-    mirror::String* s = strings->GetWithoutChecks(idx);
-    uint16_t* s_data = s->GetCharArray()->GetData() + s->GetOffset();
-    int32_t s_length = s->GetLength();
-    int32_t prefix_length = 0u;
-    if (idx != 0u && IsPrefix(prev_s, s)) {
-      prefix_length = prev_s->GetLength();
-    }
-    memcpy(array_data + pos, s_data + prefix_length, (s_length - prefix_length) * sizeof(*s_data));
-    s->SetOffset(pos - prefix_length);
-    s->SetArray(array);
-    pos += s_length - prefix_length;
-    prev_s = s;
-  }
-  CHECK_EQ(pos, num_chars);
-
-  if (kIsDebugBuild || VLOG_IS_ON(compiler)) {
-    LOG(INFO) << "Total # image strings=" << total_strings << " combined length="
-        << num_chars << " prefix saved chars=" << prefix_saved_chars;
-  }
-  // Calling this can in theory fill in some resolved strings. However, in practice it seems to
-  // never resolve any.
-  if (kComputeEagerResolvedStrings) {
-    ComputeEagerResolvedStrings();
-  }
-}
-
 void ImageWriter::ComputeEagerResolvedStringsCallback(Object* obj, void* arg ATTRIBUTE_UNUSED) {
   if (!obj->GetClass()->IsStringClass()) {
     return;
   }
   mirror::String* string = obj->AsString();
-  const uint16_t* utf16_string = string->GetCharArray()->GetData() + string->GetOffset();
+  const uint16_t* utf16_string = string->GetValue();
   size_t utf16_length = static_cast<size_t>(string->GetLength());
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   ReaderMutexLock mu(Thread::Current(), *class_linker->DexLock());
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index a2d99ee..c0cffa5 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -220,9 +220,6 @@
   static void ComputeEagerResolvedStringsCallback(mirror::Object* obj, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  // Combine string char arrays.
-  void ProcessStrings() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   // Remove unwanted classes from various roots.
   void PruneNonImageClasses() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static bool NonImageClassesVisitor(mirror::Class* c, void* arg)
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 925b507..dbdcc96 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -176,7 +176,7 @@
   EXPECT_EQ(72U, sizeof(OatHeader));
   EXPECT_EQ(4U, sizeof(OatMethodOffsets));
   EXPECT_EQ(28U, sizeof(OatQuickMethodHeader));
-  EXPECT_EQ(92 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
+  EXPECT_EQ(111 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
 }
 
 TEST_F(OatTest, OatHeaderIsValid) {
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index d2d38da..15b4017 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -1112,13 +1112,14 @@
   return offset;
 }
 
-bool OatWriter::Write(OutputStream* out) {
+bool OatWriter::WriteRodata(OutputStream* out) {
   const off_t raw_file_offset = out->Seek(0, kSeekCurrent);
   if (raw_file_offset == (off_t) -1) {
     LOG(ERROR) << "Failed to get file offset in " << out->GetLocation();
     return false;
   }
   const size_t file_offset = static_cast<size_t>(raw_file_offset);
+  oat_data_offset_ = file_offset;
 
   // Reserve space for header. It will be written last - after updating the checksum.
   size_t header_size = oat_header_->GetHeaderSize();
@@ -1146,6 +1147,27 @@
     return false;
   }
 
+  // Write padding.
+  off_t new_offset = out->Seek(size_executable_offset_alignment_, kSeekCurrent);
+  relative_offset += size_executable_offset_alignment_;
+  DCHECK_EQ(relative_offset, oat_header_->GetExecutableOffset());
+  size_t expected_file_offset = file_offset + relative_offset;
+  if (static_cast<uint32_t>(new_offset) != expected_file_offset) {
+    PLOG(ERROR) << "Failed to seek to oat code section. Actual: " << new_offset
+                << " Expected: " << expected_file_offset << " File: " << out->GetLocation();
+    return 0;
+  }
+  DCHECK_OFFSET();
+
+  return true;
+}
+
+bool OatWriter::WriteCode(OutputStream* out) {
+  size_t header_size = oat_header_->GetHeaderSize();
+  const size_t file_offset = oat_data_offset_;
+  size_t relative_offset = oat_header_->GetExecutableOffset();
+  DCHECK_OFFSET();
+
   relative_offset = WriteCode(out, file_offset, relative_offset);
   if (relative_offset == 0) {
     LOG(ERROR) << "Failed to write oat code to " << out->GetLocation();
@@ -1215,7 +1237,7 @@
     PLOG(ERROR) << "Failed to seek to oat header position in " << out->GetLocation();
     return false;
   }
-  DCHECK_EQ(raw_file_offset, out->Seek(0, kSeekCurrent));
+  DCHECK_EQ(file_offset, static_cast<size_t>(out->Seek(0, kSeekCurrent)));
   if (!out->WriteFully(oat_header_, header_size)) {
     PLOG(ERROR) << "Failed to write oat header to " << out->GetLocation();
     return false;
@@ -1290,16 +1312,6 @@
 }
 
 size_t OatWriter::WriteCode(OutputStream* out, const size_t file_offset, size_t relative_offset) {
-  off_t new_offset = out->Seek(size_executable_offset_alignment_, kSeekCurrent);
-  relative_offset += size_executable_offset_alignment_;
-  DCHECK_EQ(relative_offset, oat_header_->GetExecutableOffset());
-  size_t expected_file_offset = file_offset + relative_offset;
-  if (static_cast<uint32_t>(new_offset) != expected_file_offset) {
-    PLOG(ERROR) << "Failed to seek to oat code section. Actual: " << new_offset
-                << " Expected: " << expected_file_offset << " File: " << out->GetLocation();
-    return 0;
-  }
-  DCHECK_OFFSET();
   if (compiler_driver_->IsImage()) {
     InstructionSet instruction_set = compiler_driver_->GetInstructionSet();
 
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index 8c79b44..6f1b4ec 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -118,11 +118,8 @@
     return it.first->second.get();
   }
 
-  void SetOatDataOffset(size_t oat_data_offset) {
-    oat_data_offset_ = oat_data_offset;
-  }
-
-  bool Write(OutputStream* out);
+  bool WriteRodata(OutputStream* out);
+  bool WriteCode(OutputStream* out);
 
   ~OatWriter();
 
diff --git a/compiler/optimizing/boolean_simplifier.cc b/compiler/optimizing/boolean_simplifier.cc
index 9a92151..8100a29 100644
--- a/compiler/optimizing/boolean_simplifier.cc
+++ b/compiler/optimizing/boolean_simplifier.cc
@@ -18,6 +18,26 @@
 
 namespace art {
 
+void HBooleanSimplifier::TryRemovingNegatedCondition(HBasicBlock* block) {
+  DCHECK(block->EndsWithIf());
+
+  // Check if the condition is a Boolean negation.
+  HIf* if_instruction = block->GetLastInstruction()->AsIf();
+  HInstruction* boolean_not = if_instruction->InputAt(0);
+  if (!boolean_not->IsBooleanNot()) {
+    return;
+  }
+
+  // Make BooleanNot's input the condition of the If and swap branches.
+  if_instruction->ReplaceInput(boolean_not->InputAt(0), 0);
+  block->SwapSuccessors();
+
+  // Remove the BooleanNot if it is now unused.
+  if (!boolean_not->HasUses()) {
+    boolean_not->GetBlock()->RemoveInstruction(boolean_not);
+  }
+}
+
 // Returns true if 'block1' and 'block2' are empty, merge into the same single
 // successor and the successor can only be reached from them.
 static bool BlocksDoMergeTogether(HBasicBlock* block1, HBasicBlock* block2) {
@@ -78,58 +98,69 @@
   }
 }
 
+void HBooleanSimplifier::TryRemovingBooleanSelection(HBasicBlock* block) {
+  DCHECK(block->EndsWithIf());
+
+  // Find elements of the pattern.
+  HIf* if_instruction = block->GetLastInstruction()->AsIf();
+  HBasicBlock* true_block = if_instruction->IfTrueSuccessor();
+  HBasicBlock* false_block = if_instruction->IfFalseSuccessor();
+  if (!BlocksDoMergeTogether(true_block, false_block)) {
+    return;
+  }
+  HBasicBlock* merge_block = true_block->GetSuccessors().Get(0);
+  if (!merge_block->HasSinglePhi()) {
+    return;
+  }
+  HPhi* phi = merge_block->GetFirstPhi()->AsPhi();
+  HInstruction* true_value = phi->InputAt(merge_block->GetPredecessorIndexOf(true_block));
+  HInstruction* false_value = phi->InputAt(merge_block->GetPredecessorIndexOf(false_block));
+
+  // Check if the selection negates/preserves the value of the condition and
+  // if so, generate a suitable replacement instruction.
+  HInstruction* if_condition = if_instruction->InputAt(0);
+  HInstruction* replacement;
+  if (NegatesCondition(true_value, false_value)) {
+    replacement = GetOppositeCondition(if_condition);
+    if (replacement->GetBlock() == nullptr) {
+      block->InsertInstructionBefore(replacement, if_instruction);
+    }
+  } else if (PreservesCondition(true_value, false_value)) {
+    replacement = if_condition;
+  } else {
+    return;
+  }
+
+  // Replace the selection outcome with the new instruction.
+  phi->ReplaceWith(replacement);
+  merge_block->RemovePhi(phi);
+
+  // Delete the true branch and merge the resulting chain of blocks
+  // 'block->false_block->merge_block' into one.
+  true_block->DisconnectAndDelete();
+  block->MergeWith(false_block);
+  block->MergeWith(merge_block);
+
+  // Remove the original condition if it is now unused.
+  if (!if_condition->HasUses()) {
+    if_condition->GetBlock()->RemoveInstructionOrPhi(if_condition);
+  }
+}
+
 void HBooleanSimplifier::Run() {
   // Iterate in post order in the unlikely case that removing one occurrence of
-  // the pattern empties a branch block of another occurrence. Otherwise the
-  // order does not matter.
+  // the selection pattern empties a branch block of another occurrence.
+  // Otherwise the order does not matter.
   for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
     if (!block->EndsWithIf()) continue;
 
-    // Find elements of the pattern.
-    HIf* if_instruction = block->GetLastInstruction()->AsIf();
-    HBasicBlock* true_block = if_instruction->IfTrueSuccessor();
-    HBasicBlock* false_block = if_instruction->IfFalseSuccessor();
-    if (!BlocksDoMergeTogether(true_block, false_block)) {
-      continue;
-    }
-    HBasicBlock* merge_block = true_block->GetSuccessors().Get(0);
-    if (!merge_block->HasSinglePhi()) {
-      continue;
-    }
-    HPhi* phi = merge_block->GetFirstPhi()->AsPhi();
-    HInstruction* true_value = phi->InputAt(merge_block->GetPredecessorIndexOf(true_block));
-    HInstruction* false_value = phi->InputAt(merge_block->GetPredecessorIndexOf(false_block));
+    // If condition is negated, remove the negation and swap the branches.
+    TryRemovingNegatedCondition(block);
 
-    // Check if the selection negates/preserves the value of the condition and
-    // if so, generate a suitable replacement instruction.
-    HInstruction* if_condition = if_instruction->InputAt(0);
-    HInstruction* replacement;
-    if (NegatesCondition(true_value, false_value)) {
-      replacement = GetOppositeCondition(if_condition);
-      if (replacement->GetBlock() == nullptr) {
-        block->InsertInstructionBefore(replacement, if_instruction);
-      }
-    } else if (PreservesCondition(true_value, false_value)) {
-      replacement = if_condition;
-    } else {
-      continue;
-    }
-
-    // Replace the selection outcome with the new instruction.
-    phi->ReplaceWith(replacement);
-    merge_block->RemovePhi(phi);
-
-    // Delete the true branch and merge the resulting chain of blocks
-    // 'block->false_block->merge_block' into one.
-    true_block->DisconnectAndDelete();
-    block->MergeWith(false_block);
-    block->MergeWith(merge_block);
-
-    // Remove the original condition if it is now unused.
-    if (!if_condition->HasUses()) {
-      if_condition->GetBlock()->RemoveInstructionOrPhi(if_condition);
-    }
+    // If this is a boolean-selection diamond pattern, replace its result with
+    // the condition value (or its negation) and simplify the graph.
+    TryRemovingBooleanSelection(block);
   }
 }
 
diff --git a/compiler/optimizing/boolean_simplifier.h b/compiler/optimizing/boolean_simplifier.h
index a88733e..733ebaa 100644
--- a/compiler/optimizing/boolean_simplifier.h
+++ b/compiler/optimizing/boolean_simplifier.h
@@ -14,11 +14,15 @@
  * limitations under the License.
  */
 
-// This optimization recognizes a common pattern where a boolean value is
-// either cast to an integer or negated by selecting from zero/one integer
-// constants with an If statement. Because boolean values are internally
-// represented as zero/one, we can safely replace the pattern with a suitable
-// condition instruction.
+// This optimization recognizes two common patterns:
+//  (a) Boolean selection: Casting a boolean to an integer or negating it is
+//      carried out with an If statement selecting from zero/one integer
+//      constants. Because Boolean values are represented as zero/one, the
+//      pattern can be replaced with the condition instruction itself or its
+//      negation, depending on the layout.
+//  (b) Negated condition: Instruction simplifier may replace an If's condition
+//      with a boolean value. If this value is the result of a Boolean negation,
+//      the true/false branches can be swapped and negation removed.
 
 // Example: Negating a boolean value
 //     B1:
@@ -66,6 +70,9 @@
   static constexpr const char* kBooleanSimplifierPassName = "boolean_simplifier";
 
  private:
+  void TryRemovingNegatedCondition(HBasicBlock* block);
+  void TryRemovingBooleanSelection(HBasicBlock* block);
+
   DISALLOW_COPY_AND_ASSIGN(HBooleanSimplifier);
 };
 
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index 3645f19..92fa6db 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -246,6 +246,141 @@
   int32_t constant_;
 };
 
+// Collect array access data for a loop.
+// TODO: make it work for multiple arrays inside the loop.
+class ArrayAccessInsideLoopFinder : public ValueObject {
+ public:
+  explicit ArrayAccessInsideLoopFinder(HInstruction* induction_variable)
+      : induction_variable_(induction_variable),
+        found_array_length_(nullptr),
+        offset_low_(INT_MAX),
+        offset_high_(INT_MIN) {
+    Run();
+  }
+
+  HArrayLength* GetFoundArrayLength() const { return found_array_length_; }
+  bool HasFoundArrayLength() const { return found_array_length_ != nullptr; }
+  int32_t GetOffsetLow() const { return offset_low_; }
+  int32_t GetOffsetHigh() const { return offset_high_; }
+
+  // Returns if `block` that is in loop_info may exit the loop, unless it's
+  // the loop header for loop_info.
+  static bool EarlyExit(HBasicBlock* block, HLoopInformation* loop_info) {
+    DCHECK(loop_info->Contains(*block));
+    if (block == loop_info->GetHeader()) {
+      // Loop header of loop_info. Exiting loop is normal.
+      return false;
+    }
+    const GrowableArray<HBasicBlock*> successors = block->GetSuccessors();
+    for (size_t i = 0; i < successors.Size(); i++) {
+      if (!loop_info->Contains(*successors.Get(i))) {
+        // One of the successors exits the loop.
+        return true;
+      }
+    }
+    return false;
+  }
+
+  void Run() {
+    HLoopInformation* loop_info = induction_variable_->GetBlock()->GetLoopInformation();
+    // Must be simplified loop.
+    DCHECK_EQ(loop_info->GetBackEdges().Size(), 1U);
+    for (HBlocksInLoopIterator it_loop(*loop_info); !it_loop.Done(); it_loop.Advance()) {
+      HBasicBlock* block = it_loop.Current();
+      DCHECK(block->IsInLoop());
+      HBasicBlock* back_edge = loop_info->GetBackEdges().Get(0);
+      if (!block->Dominates(back_edge)) {
+        // In order not to trigger deoptimization unnecessarily, make sure
+        // that all array accesses collected are really executed in the loop.
+        // For array accesses in a branch inside the loop, don't collect the
+        // access. The bounds check in that branch might not be eliminated.
+        continue;
+      }
+      if (EarlyExit(block, loop_info)) {
+        // If the loop body can exit loop (like break, return, etc.), it's not guaranteed
+        // that the loop will loop through the full monotonic value range from
+        // initial_ to end_. So adding deoptimization might be too aggressive and can
+        // trigger deoptimization unnecessarily even if the loop won't actually throw
+        // AIOOBE. Otherwise, the loop induction variable is going to cover the full
+        // monotonic value range from initial_ to end_, and deoptimizations are added
+        // iff the loop will throw AIOOBE.
+        found_array_length_ = nullptr;
+        return;
+      }
+      for (HInstruction* instruction = block->GetFirstInstruction();
+           instruction != nullptr;
+           instruction = instruction->GetNext()) {
+        if (!instruction->IsArrayGet() && !instruction->IsArraySet()) {
+          continue;
+        }
+        HInstruction* index = instruction->InputAt(1);
+        if (!index->IsBoundsCheck()) {
+          continue;
+        }
+
+        HArrayLength* array_length = index->InputAt(1)->AsArrayLength();
+        if (array_length == nullptr) {
+          DCHECK(index->InputAt(1)->IsIntConstant());
+          // TODO: may optimize for constant case.
+          continue;
+        }
+
+        HInstruction* array = array_length->InputAt(0);
+        if (array->IsNullCheck()) {
+          array = array->AsNullCheck()->InputAt(0);
+        }
+        if (loop_info->Contains(*array->GetBlock())) {
+          // Array is defined inside the loop. Skip.
+          continue;
+        }
+
+        if (found_array_length_ != nullptr && found_array_length_ != array_length) {
+          // There is already access for another array recorded for the loop.
+          // TODO: handle multiple arrays.
+          continue;
+        }
+
+        index = index->AsBoundsCheck()->InputAt(0);
+        HInstruction* left = index;
+        int32_t right = 0;
+        if (left == induction_variable_ ||
+            (ValueBound::IsAddOrSubAConstant(index, &left, &right) &&
+             left == induction_variable_)) {
+          // For patterns like array[i] or array[i + 2].
+          if (right < offset_low_) {
+            offset_low_ = right;
+          }
+          if (right > offset_high_) {
+            offset_high_ = right;
+          }
+        } else {
+          // Access not in induction_variable/(induction_variable_ + constant)
+          // format. Skip.
+          continue;
+        }
+        // Record this array.
+        found_array_length_ = array_length;
+      }
+    }
+  }
+
+ private:
+  // The instruction that corresponds to a MonotonicValueRange.
+  HInstruction* induction_variable_;
+
+  // The array length of the array that's accessed inside the loop.
+  HArrayLength* found_array_length_;
+
+  // The lowest and highest constant offsets relative to induction variable
+  // instruction_ in all array accesses.
+  // If array access are: array[i-1], array[i], array[i+1],
+  // offset_low_ is -1 and offset_high is 1.
+  int32_t offset_low_;
+  int32_t offset_high_;
+
+  DISALLOW_COPY_AND_ASSIGN(ArrayAccessInsideLoopFinder);
+};
+
 /**
  * Represent a range of lower bound and upper bound, both being inclusive.
  * Currently a ValueRange may be generated as a result of the following:
@@ -332,21 +467,31 @@
 class MonotonicValueRange : public ValueRange {
  public:
   MonotonicValueRange(ArenaAllocator* allocator,
+                      HPhi* induction_variable,
                       HInstruction* initial,
                       int32_t increment,
                       ValueBound bound)
       // To be conservative, give it full range [INT_MIN, INT_MAX] in case it's
       // used as a regular value range, due to possible overflow/underflow.
       : ValueRange(allocator, ValueBound::Min(), ValueBound::Max()),
+        induction_variable_(induction_variable),
         initial_(initial),
+        end_(nullptr),
+        inclusive_(false),
         increment_(increment),
         bound_(bound) {}
 
   virtual ~MonotonicValueRange() {}
 
+  HInstruction* GetInductionVariable() const { return induction_variable_; }
   int32_t GetIncrement() const { return increment_; }
-
   ValueBound GetBound() const { return bound_; }
+  void SetEnd(HInstruction* end) { end_ = end; }
+  void SetInclusive(bool inclusive) { inclusive_ = inclusive; }
+  HBasicBlock* GetLoopHead() const {
+    DCHECK(induction_variable_->GetBlock()->IsLoopHeader());
+    return induction_variable_->GetBlock();
+  }
 
   MonotonicValueRange* AsMonotonicValueRange() OVERRIDE { return this; }
 
@@ -371,6 +516,10 @@
     if (increment_ > 0) {
       // Monotonically increasing.
       ValueBound lower = ValueBound::NarrowLowerBound(bound_, range->GetLower());
+      if (!lower.IsConstant() || lower.GetConstant() == INT_MIN) {
+        // Lower bound isn't useful. Leave it to deoptimization.
+        return this;
+      }
 
       // We currently conservatively assume max array length is INT_MAX. If we can
       // make assumptions about the max array length, e.g. due to the max heap size,
@@ -417,6 +566,11 @@
       DCHECK_NE(increment_, 0);
       // Monotonically decreasing.
       ValueBound upper = ValueBound::NarrowUpperBound(bound_, range->GetUpper());
+      if ((!upper.IsConstant() || upper.GetConstant() == INT_MAX) &&
+          !upper.IsRelatedToArrayLength()) {
+        // Upper bound isn't useful. Leave it to deoptimization.
+        return this;
+      }
 
       // Need to take care of underflow. Try to prove underflow won't happen
       // for common cases.
@@ -432,10 +586,217 @@
     }
   }
 
+  // Returns true if adding a (constant >= value) check for deoptimization
+  // is allowed and will benefit compiled code.
+  bool CanAddDeoptimizationConstant(HInstruction* value,
+                                    int32_t constant,
+                                    bool* is_proven) {
+    *is_proven = false;
+    // See if we can prove the relationship first.
+    if (value->IsIntConstant()) {
+      if (value->AsIntConstant()->GetValue() >= constant) {
+        // Already true.
+        *is_proven = true;
+        return true;
+      } else {
+        // May throw exception. Don't add deoptimization.
+        // Keep bounds checks in the loops.
+        return false;
+      }
+    }
+    // Can benefit from deoptimization.
+    return true;
+  }
+
+  // Adds a check that (value >= constant), and HDeoptimize otherwise.
+  void AddDeoptimizationConstant(HInstruction* value,
+                                 int32_t constant) {
+    HBasicBlock* block = induction_variable_->GetBlock();
+    DCHECK(block->IsLoopHeader());
+    HGraph* graph = block->GetGraph();
+    HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader();
+    HSuspendCheck* suspend_check = block->GetLoopInformation()->GetSuspendCheck();
+    HIntConstant* const_instr = graph->GetIntConstant(constant);
+    HCondition* cond = new (graph->GetArena()) HLessThan(value, const_instr);
+    HDeoptimize* deoptimize = new (graph->GetArena())
+        HDeoptimize(cond, suspend_check->GetDexPc());
+    pre_header->InsertInstructionBefore(cond, pre_header->GetLastInstruction());
+    pre_header->InsertInstructionBefore(deoptimize, pre_header->GetLastInstruction());
+    deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
+        suspend_check->GetEnvironment(), block);
+  }
+
+  // Returns true if adding a (value <= array_length + offset) check for deoptimization
+  // is allowed and will benefit compiled code.
+  bool CanAddDeoptimizationArrayLength(HInstruction* value,
+                                       HArrayLength* array_length,
+                                       int32_t offset,
+                                       bool* is_proven) {
+    *is_proven = false;
+    if (offset > 0) {
+      // There might be overflow issue.
+      // TODO: handle this, possibly with some distance relationship between
+      // offset_low and offset_high, or using another deoptimization to make
+      // sure (array_length + offset) doesn't overflow.
+      return false;
+    }
+
+    // See if we can prove the relationship first.
+    if (value == array_length) {
+      if (offset >= 0) {
+        // Already true.
+        *is_proven = true;
+        return true;
+      } else {
+        // May throw exception. Don't add deoptimization.
+        // Keep bounds checks in the loops.
+        return false;
+      }
+    }
+    // Can benefit from deoptimization.
+    return true;
+  }
+
+  // Adds a check that (value <= array_length + offset), and HDeoptimize otherwise.
+  void AddDeoptimizationArrayLength(HInstruction* value,
+                                    HArrayLength* array_length,
+                                    int32_t offset) {
+    HBasicBlock* block = induction_variable_->GetBlock();
+    DCHECK(block->IsLoopHeader());
+    HGraph* graph = block->GetGraph();
+    HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader();
+    HSuspendCheck* suspend_check = block->GetLoopInformation()->GetSuspendCheck();
+
+    // We may need to hoist null-check and array_length out of loop first.
+    if (!array_length->GetBlock()->Dominates(pre_header)) {
+      HInstruction* array = array_length->InputAt(0);
+      HNullCheck* null_check = array->AsNullCheck();
+      if (null_check != nullptr) {
+        array = null_check->InputAt(0);
+      }
+      // We've already made sure array is defined before the loop when collecting
+      // array accesses for the loop.
+      DCHECK(array->GetBlock()->Dominates(pre_header));
+      if (null_check != nullptr && !null_check->GetBlock()->Dominates(pre_header)) {
+        // Hoist null check out of loop with a deoptimization.
+        HNullConstant* null_constant = graph->GetNullConstant();
+        HCondition* null_check_cond = new (graph->GetArena()) HEqual(array, null_constant);
+        // TODO: for one dex_pc, share the same deoptimization slow path.
+        HDeoptimize* null_check_deoptimize = new (graph->GetArena())
+            HDeoptimize(null_check_cond, suspend_check->GetDexPc());
+        pre_header->InsertInstructionBefore(null_check_cond, pre_header->GetLastInstruction());
+        pre_header->InsertInstructionBefore(
+            null_check_deoptimize, pre_header->GetLastInstruction());
+        // Eliminate null check in the loop.
+        null_check->ReplaceWith(array);
+        null_check->GetBlock()->RemoveInstruction(null_check);
+        null_check_deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
+            suspend_check->GetEnvironment(), block);
+      }
+      // Hoist array_length out of loop.
+      array_length->MoveBefore(pre_header->GetLastInstruction());
+    }
+
+    HIntConstant* offset_instr = graph->GetIntConstant(offset);
+    HAdd* add = new (graph->GetArena()) HAdd(Primitive::kPrimInt, array_length, offset_instr);
+    HCondition* cond = new (graph->GetArena()) HGreaterThan(value, add);
+    HDeoptimize* deoptimize = new (graph->GetArena())
+        HDeoptimize(cond, suspend_check->GetDexPc());
+    pre_header->InsertInstructionBefore(add, pre_header->GetLastInstruction());
+    pre_header->InsertInstructionBefore(cond, pre_header->GetLastInstruction());
+    pre_header->InsertInstructionBefore(deoptimize, pre_header->GetLastInstruction());
+    deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
+        suspend_check->GetEnvironment(), block);
+  }
+
+  // Add deoptimizations in loop pre-header with the collected array access
+  // data so that value ranges can be established in loop body.
+  // Returns true if deoptimizations are successfully added, or if it's proven
+  // it's not necessary.
+  bool AddDeoptimization(const ArrayAccessInsideLoopFinder& finder) {
+    int32_t offset_low = finder.GetOffsetLow();
+    int32_t offset_high = finder.GetOffsetHigh();
+    HArrayLength* array_length = finder.GetFoundArrayLength();
+
+    HBasicBlock* pre_header =
+        induction_variable_->GetBlock()->GetLoopInformation()->GetPreHeader();
+    if (!initial_->GetBlock()->Dominates(pre_header) ||
+        !end_->GetBlock()->Dominates(pre_header)) {
+      // Can't move initial_ or end_ into pre_header for comparisons.
+      return false;
+    }
+
+    bool is_constant_proven, is_length_proven;
+    if (increment_ == 1) {
+      // Increasing from initial_ to end_.
+      int32_t offset = inclusive_ ? -offset_high - 1 : -offset_high;
+      if (CanAddDeoptimizationConstant(initial_, -offset_low, &is_constant_proven) &&
+          CanAddDeoptimizationArrayLength(end_, array_length, offset, &is_length_proven)) {
+        if (!is_constant_proven) {
+          AddDeoptimizationConstant(initial_, -offset_low);
+        }
+        if (!is_length_proven) {
+          AddDeoptimizationArrayLength(end_, array_length, offset);
+        }
+        return true;
+      }
+    } else if (increment_ == -1) {
+      // Decreasing from initial_ to end_.
+      int32_t constant = inclusive_ ? -offset_low : -offset_low - 1;
+      if (CanAddDeoptimizationConstant(end_, constant, &is_constant_proven) &&
+          CanAddDeoptimizationArrayLength(
+              initial_, array_length, -offset_high - 1, &is_length_proven)) {
+        if (!is_constant_proven) {
+          AddDeoptimizationConstant(end_, constant);
+        }
+        if (!is_length_proven) {
+          AddDeoptimizationArrayLength(initial_, array_length, -offset_high - 1);
+        }
+        return true;
+      }
+    }
+    return false;
+  }
+
+  // Try to add HDeoptimize's in the loop pre-header first to narrow this range.
+  ValueRange* NarrowWithDeoptimization() {
+    if (increment_ != 1 && increment_ != -1) {
+      // TODO: possibly handle overflow/underflow issues with deoptimization.
+      return this;
+    }
+
+    if (end_ == nullptr) {
+      // No full info to add deoptimization.
+      return this;
+    }
+
+    ArrayAccessInsideLoopFinder finder(induction_variable_);
+
+    if (!finder.HasFoundArrayLength()) {
+      // No array access was found inside the loop that can benefit
+      // from deoptimization.
+      return this;
+    }
+
+    if (!AddDeoptimization(finder)) {
+      return this;
+    }
+
+    // After added deoptimizations, induction variable fits in
+    // [-offset_low, array.length-1-offset_high], adjusted with collected offsets.
+    ValueBound lower = ValueBound(0, -finder.GetOffsetLow());
+    ValueBound upper = ValueBound(finder.GetFoundArrayLength(), -1 - finder.GetOffsetHigh());
+    // We've narrowed the range after added deoptimizations.
+    return new (GetAllocator()) ValueRange(GetAllocator(), lower, upper);
+  }
+
  private:
-  HInstruction* const initial_;
-  const int32_t increment_;
-  ValueBound bound_;  // Additional value bound info for initial_;
+  HPhi* const induction_variable_;  // Induction variable for this monotonic value range.
+  HInstruction* const initial_;     // Initial value.
+  HInstruction* end_;               // End value.
+  bool inclusive_;                  // Whether end value is inclusive.
+  const int32_t increment_;         // Increment for each loop iteration.
+  const ValueBound bound_;          // Additional value bound info for initial_.
 
   DISALLOW_COPY_AND_ASSIGN(MonotonicValueRange);
 };
@@ -598,6 +959,20 @@
     // There should be no critical edge at this point.
     DCHECK_EQ(false_successor->GetPredecessors().Size(), 1u);
 
+    ValueRange* left_range = LookupValueRange(left, block);
+    MonotonicValueRange* left_monotonic_range = nullptr;
+    if (left_range != nullptr) {
+      left_monotonic_range = left_range->AsMonotonicValueRange();
+      if (left_monotonic_range != nullptr) {
+        HBasicBlock* loop_head = left_monotonic_range->GetLoopHead();
+        if (instruction->GetBlock() != loop_head) {
+          // For monotonic value range, don't handle `instruction`
+          // if it's not defined in the loop header.
+          return;
+        }
+      }
+    }
+
     bool found;
     ValueBound bound = ValueBound::DetectValueBoundFromValue(right, &found);
     // Each comparison can establish a lower bound and an upper bound
@@ -610,7 +985,6 @@
       ValueRange* right_range = LookupValueRange(right, block);
       if (right_range != nullptr) {
         if (right_range->IsMonotonicValueRange()) {
-          ValueRange* left_range = LookupValueRange(left, block);
           if (left_range != nullptr && left_range->IsMonotonicValueRange()) {
             HandleIfBetweenTwoMonotonicValueRanges(instruction, left, right, cond,
                                                    left_range->AsMonotonicValueRange(),
@@ -628,6 +1002,17 @@
 
     bool overflow, underflow;
     if (cond == kCondLT || cond == kCondLE) {
+      if (left_monotonic_range != nullptr) {
+        // Update the info for monotonic value range.
+        if (left_monotonic_range->GetInductionVariable() == left &&
+            left_monotonic_range->GetIncrement() < 0 &&
+            block == left_monotonic_range->GetLoopHead() &&
+            instruction->IfFalseSuccessor()->GetLoopInformation() == block->GetLoopInformation()) {
+          left_monotonic_range->SetEnd(right);
+          left_monotonic_range->SetInclusive(cond == kCondLT);
+        }
+      }
+
       if (!upper.Equals(ValueBound::Max())) {
         int32_t compensation = (cond == kCondLT) ? -1 : 0;  // upper bound is inclusive
         ValueBound new_upper = upper.Add(compensation, &overflow, &underflow);
@@ -651,6 +1036,17 @@
         ApplyRangeFromComparison(left, block, false_successor, new_range);
       }
     } else if (cond == kCondGT || cond == kCondGE) {
+      if (left_monotonic_range != nullptr) {
+        // Update the info for monotonic value range.
+        if (left_monotonic_range->GetInductionVariable() == left &&
+            left_monotonic_range->GetIncrement() > 0 &&
+            block == left_monotonic_range->GetLoopHead() &&
+            instruction->IfFalseSuccessor()->GetLoopInformation() == block->GetLoopInformation()) {
+          left_monotonic_range->SetEnd(right);
+          left_monotonic_range->SetInclusive(cond == kCondGT);
+        }
+      }
+
       // array.length as a lower bound isn't considered useful.
       if (!lower.Equals(ValueBound::Min()) && !lower.IsRelatedToArrayLength()) {
         int32_t compensation = (cond == kCondGT) ? 1 : 0;  // lower bound is inclusive
@@ -790,6 +1186,7 @@
             }
             range = new (GetGraph()->GetArena()) MonotonicValueRange(
                 GetGraph()->GetArena(),
+                phi,
                 initial_value,
                 increment,
                 bound);
@@ -809,6 +1206,36 @@
         HInstruction* left = cond->GetLeft();
         HInstruction* right = cond->GetRight();
         HandleIf(instruction, left, right, cmp);
+
+        HBasicBlock* block = instruction->GetBlock();
+        ValueRange* left_range = LookupValueRange(left, block);
+        if (left_range == nullptr) {
+          return;
+        }
+
+        if (left_range->IsMonotonicValueRange() &&
+            block == left_range->AsMonotonicValueRange()->GetLoopHead()) {
+          // The comparison is for an induction variable in the loop header.
+          DCHECK(left == left_range->AsMonotonicValueRange()->GetInductionVariable());
+          HBasicBlock* loop_body_successor;
+          if (LIKELY(block->GetLoopInformation()->
+              Contains(*instruction->IfFalseSuccessor()))) {
+            loop_body_successor = instruction->IfFalseSuccessor();
+          } else {
+            loop_body_successor = instruction->IfTrueSuccessor();
+          }
+          ValueRange* new_left_range = LookupValueRange(left, loop_body_successor);
+          if (new_left_range == left_range) {
+            // We are not successful in narrowing the monotonic value range to
+            // a regular value range. Try using deoptimization.
+            new_left_range = left_range->AsMonotonicValueRange()->
+                NarrowWithDeoptimization();
+            if (new_left_range != left_range) {
+              GetValueRangeMap(instruction->IfFalseSuccessor())->
+                  Overwrite(left->GetId(), new_left_range);
+            }
+          }
+        }
       }
     }
   }
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 96e08fd..0f44af0 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -19,6 +19,7 @@
 #include "art_field-inl.h"
 #include "base/logging.h"
 #include "class_linker.h"
+#include "dex/verified_method.h"
 #include "dex_file-inl.h"
 #include "dex_instruction-inl.h"
 #include "dex/verified_method.h"
@@ -612,6 +613,16 @@
       HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit;
   // Potential class initialization check, in the case of a static method call.
   HClinitCheck* clinit_check = nullptr;
+  // Replace calls to String.<init> with StringFactory.
+  int32_t string_init_offset = 0;
+  bool is_string_init = compiler_driver_->IsStringInit(method_idx, dex_file_, &string_init_offset);
+  if (is_string_init) {
+    return_type = Primitive::kPrimNot;
+    is_instance_call = false;
+    number_of_arguments--;
+    invoke_type = kStatic;
+    optimized_invoke_type = kStatic;
+  }
 
   HInvoke* invoke = nullptr;
 
@@ -691,14 +702,14 @@
           current_block_->AddInstruction(load_class);
           clinit_check = new (arena_) HClinitCheck(load_class, dex_pc);
           current_block_->AddInstruction(clinit_check);
-          ++number_of_arguments;
         }
       }
     }
 
     invoke = new (arena_) HInvokeStaticOrDirect(
         arena_, number_of_arguments, return_type, dex_pc, target_method.dex_method_index,
-        is_recursive, invoke_type, optimized_invoke_type, clinit_check_requirement);
+        is_recursive, string_init_offset, invoke_type, optimized_invoke_type,
+        clinit_check_requirement);
   }
 
   size_t start_index = 0;
@@ -714,6 +725,9 @@
 
   uint32_t descriptor_index = 1;
   uint32_t argument_index = start_index;
+  if (is_string_init) {
+    start_index = 1;
+  }
   for (size_t i = start_index; i < number_of_vreg_arguments; i++, argument_index++) {
     Primitive::Type type = Primitive::GetType(descriptor[descriptor_index++]);
     bool is_wide = (type == Primitive::kPrimLong) || (type == Primitive::kPrimDouble);
@@ -730,16 +744,38 @@
       i++;
     }
   }
+  DCHECK_EQ(argument_index, number_of_arguments);
 
   if (clinit_check_requirement == HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit) {
     // Add the class initialization check as last input of `invoke`.
     DCHECK(clinit_check != nullptr);
-    invoke->SetArgumentAt(argument_index++, clinit_check);
+    invoke->SetArgumentAt(argument_index, clinit_check);
   }
 
-  DCHECK_EQ(argument_index, number_of_arguments);
   current_block_->AddInstruction(invoke);
   latest_result_ = invoke;
+
+  // Add move-result for StringFactory method.
+  if (is_string_init) {
+    uint32_t orig_this_reg = is_range ? register_index : args[0];
+    const VerifiedMethod* verified_method =
+        compiler_driver_->GetVerifiedMethod(dex_file_, dex_compilation_unit_->GetDexMethodIndex());
+    if (verified_method == nullptr) {
+      LOG(WARNING) << "No verified method for method calling String.<init>: "
+                   << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_);
+      return false;
+    }
+    const SafeMap<uint32_t, std::set<uint32_t>>& string_init_map =
+        verified_method->GetStringInitPcRegMap();
+    auto map_it = string_init_map.find(dex_pc);
+    if (map_it != string_init_map.end()) {
+      std::set<uint32_t> reg_set = map_it->second;
+      for (auto set_it = reg_set.begin(); set_it != reg_set.end(); ++set_it) {
+        UpdateLocal(*set_it, invoke);
+      }
+    }
+    UpdateLocal(orig_this_reg, invoke);
+  }
   return true;
 }
 
@@ -1916,12 +1952,19 @@
 
     case Instruction::NEW_INSTANCE: {
       uint16_t type_index = instruction.VRegB_21c();
-      QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index)
-          ? kQuickAllocObjectWithAccessCheck
-          : kQuickAllocObject;
+      if (compiler_driver_->IsStringTypeIndex(type_index, dex_file_)) {
+        // Turn new-instance of string into a const 0.
+        int32_t register_index = instruction.VRegA();
+        HNullConstant* constant = graph_->GetNullConstant();
+        UpdateLocal(register_index, constant);
+      } else {
+        QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index)
+            ? kQuickAllocObjectWithAccessCheck
+            : kQuickAllocObject;
 
-      current_block_->AddInstruction(new (arena_) HNewInstance(dex_pc, type_index, entrypoint));
-      UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+        current_block_->AddInstruction(new (arena_) HNewInstance(dex_pc, type_index, entrypoint));
+        UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+      }
       break;
     }
 
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 5163395..cfe121e 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -153,6 +153,7 @@
 }
 
 void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline) {
+  is_baseline_ = is_baseline;
   HGraphVisitor* instruction_visitor = GetInstructionVisitor();
   DCHECK_EQ(current_block_index_, 0u);
   GenerateFrameEntry();
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index e536b2d..bdbd571 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -105,6 +105,25 @@
   DISALLOW_COPY_AND_ASSIGN(SlowPathCode);
 };
 
+class InvokeDexCallingConventionVisitor {
+ public:
+  virtual Location GetNextLocation(Primitive::Type type) = 0;
+
+ protected:
+  InvokeDexCallingConventionVisitor() {}
+  virtual ~InvokeDexCallingConventionVisitor() {}
+
+  // The current index for core registers.
+  uint32_t gp_index_ = 0u;
+  // The current index for floating-point registers.
+  uint32_t float_index_ = 0u;
+  // The current stack index.
+  uint32_t stack_index_ = 0u;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
+};
+
 class CodeGenerator {
  public:
   // Compiles the graph to executable instructions. Returns whether the compilation
@@ -212,6 +231,10 @@
       std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const;
   void BuildStackMaps(std::vector<uint8_t>* vector);
 
+  bool IsBaseline() const {
+    return is_baseline_;
+  }
+
   bool IsLeafMethod() const {
     return is_leaf_;
   }
@@ -304,6 +327,7 @@
     return GetFpuSpillSize() + GetCoreSpillSize();
   }
 
+  virtual ParallelMoveResolver* GetMoveResolver() = 0;
 
  protected:
   CodeGenerator(HGraph* graph,
@@ -325,6 +349,7 @@
         number_of_register_pairs_(number_of_register_pairs),
         core_callee_save_mask_(core_callee_save_mask),
         fpu_callee_save_mask_(fpu_callee_save_mask),
+        is_baseline_(false),
         graph_(graph),
         compiler_options_(compiler_options),
         pc_infos_(graph->GetArena(), 32),
@@ -346,7 +371,6 @@
 
   virtual Location GetStackLocation(HLoadLocal* load) const = 0;
 
-  virtual ParallelMoveResolver* GetMoveResolver() = 0;
   virtual HGraphVisitor* GetLocationBuilder() = 0;
   virtual HGraphVisitor* GetInstructionVisitor() = 0;
 
@@ -404,6 +428,9 @@
   const uint32_t core_callee_save_mask_;
   const uint32_t fpu_callee_save_mask_;
 
+  // Whether we are using baseline.
+  bool is_baseline_;
+
  private:
   void InitLocationsBaseline(HInstruction* instruction);
   size_t GetStackOffsetOfSavedRegister(size_t index);
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 01748a9..e4c37de 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -605,7 +605,7 @@
   UNREACHABLE();
 }
 
-Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) {
+Location InvokeDexCallingConventionVisitorARM::GetNextLocation(Primitive::Type type) {
   switch (type) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte:
@@ -680,7 +680,7 @@
   return Location();
 }
 
-Location InvokeDexCallingConventionVisitor::GetReturnLocation(Primitive::Type type) {
+Location InvokeDexCallingConventionVisitorARM::GetReturnLocation(Primitive::Type type) {
   switch (type) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte:
@@ -1241,13 +1241,9 @@
 }
 
 void LocationsBuilderARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // Explicit clinit checks triggered by static invokes must have been
-  // pruned by art::PrepareForRegisterAllocation, but this step is not
-  // run in baseline. So we remove them manually here if we find them.
-  // TODO: Instead of this local workaround, address this properly.
-  if (invoke->IsStaticWithExplicitClinitCheck()) {
-    invoke->RemoveClinitCheckOrLoadClassAsLastInput();
-  }
+  // When we do not run baseline, explicit clinit checks triggered by static
+  // invokes must have been pruned by art::PrepareForRegisterAllocation.
+  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
 
   IntrinsicLocationsBuilderARM intrinsic(GetGraph()->GetArena(),
                                          codegen_->GetInstructionSetFeatures());
@@ -1273,9 +1269,9 @@
 }
 
 void InstructionCodeGeneratorARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // Explicit clinit checks triggered by static invokes must have been
-  // pruned by art::PrepareForRegisterAllocation.
-  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
+  // When we do not run baseline, explicit clinit checks triggered by static
+  // invokes must have been pruned by art::PrepareForRegisterAllocation.
+  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
 
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
     return;
@@ -1292,8 +1288,8 @@
       new (GetGraph()->GetArena()) LocationSummary(invoke, LocationSummary::kCall);
   locations->AddTemp(Location::RegisterLocation(R0));
 
-  InvokeDexCallingConventionVisitor calling_convention_visitor;
-  for (size_t i = 0; i < invoke->InputCount(); i++) {
+  InvokeDexCallingConventionVisitorARM calling_convention_visitor;
+  for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) {
     HInstruction* input = invoke->InputAt(i);
     locations->SetInAt(i, calling_convention_visitor.GetNextLocation(input->GetType()));
   }
@@ -4071,15 +4067,9 @@
   //
   // Currently we implement the app -> app logic, which looks up in the resolve cache.
 
-  // temp = method;
-  LoadCurrentMethod(temp);
-  if (!invoke->IsRecursive()) {
-    // temp = temp->dex_cache_resolved_methods_;
-    __ LoadFromOffset(
-        kLoadWord, temp, temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value());
-    // temp = temp[index_in_cache]
-    __ LoadFromOffset(
-        kLoadWord, temp, temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex()));
+  if (invoke->IsStringInit()) {
+    // temp = thread->string_init_entrypoint
+    __ LoadFromOffset(kLoadWord, temp, TR, invoke->GetStringInitOffset());
     // LR = temp[offset_of_quick_compiled_code]
     __ LoadFromOffset(kLoadWord, LR, temp,
                       mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
@@ -4087,7 +4077,24 @@
     // LR()
     __ blx(LR);
   } else {
-    __ bl(GetFrameEntryLabel());
+    // temp = method;
+    LoadCurrentMethod(temp);
+    if (!invoke->IsRecursive()) {
+      // temp = temp->dex_cache_resolved_methods_;
+      __ LoadFromOffset(
+          kLoadWord, temp, temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value());
+      // temp = temp[index_in_cache]
+      __ LoadFromOffset(
+          kLoadWord, temp, temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex()));
+      // LR = temp[offset_of_quick_compiled_code]
+      __ LoadFromOffset(kLoadWord, LR, temp,
+                        mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+                            kArmWordSize).Int32Value());
+      // LR()
+      __ blx(LR);
+    } else {
+      __ bl(GetFrameEntryLabel());
+    }
   }
 
   DCHECK(!IsLeafMethod());
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 6009036..1a498e1 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -78,22 +78,19 @@
   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
 };
 
-class InvokeDexCallingConventionVisitor {
+class InvokeDexCallingConventionVisitorARM : public InvokeDexCallingConventionVisitor {
  public:
-  InvokeDexCallingConventionVisitor()
-      : gp_index_(0), float_index_(0), double_index_(0), stack_index_(0) {}
+  InvokeDexCallingConventionVisitorARM() {}
+  virtual ~InvokeDexCallingConventionVisitorARM() {}
 
-  Location GetNextLocation(Primitive::Type type);
+  Location GetNextLocation(Primitive::Type type) OVERRIDE;
   Location GetReturnLocation(Primitive::Type type);
 
  private:
   InvokeDexCallingConvention calling_convention;
-  uint32_t gp_index_;
-  uint32_t float_index_;
-  uint32_t double_index_;
-  uint32_t stack_index_;
+  uint32_t double_index_ = 0;
 
-  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
+  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorARM);
 };
 
 class ParallelMoveResolverARM : public ParallelMoveResolverWithSwap {
@@ -151,7 +148,7 @@
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
 
   CodeGeneratorARM* const codegen_;
-  InvokeDexCallingConventionVisitor parameter_visitor_;
+  InvokeDexCallingConventionVisitorARM parameter_visitor_;
 
   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderARM);
 };
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index dada4ce..9e02a1d 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -372,15 +372,15 @@
 
 #undef __
 
-Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) {
+Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(Primitive::Type type) {
   Location next_location;
   if (type == Primitive::kPrimVoid) {
     LOG(FATAL) << "Unreachable type " << type;
   }
 
   if (Primitive::IsFloatingPointType(type) &&
-      (fp_index_ < calling_convention.GetNumberOfFpuRegisters())) {
-    next_location = LocationFrom(calling_convention.GetFpuRegisterAt(fp_index_++));
+      (float_index_ < calling_convention.GetNumberOfFpuRegisters())) {
+    next_location = LocationFrom(calling_convention.GetFpuRegisterAt(float_index_++));
   } else if (!Primitive::IsFloatingPointType(type) &&
              (gp_index_ < calling_convention.GetNumberOfRegisters())) {
     next_location = LocationFrom(calling_convention.GetRegisterAt(gp_index_++));
@@ -1907,8 +1907,8 @@
       new (GetGraph()->GetArena()) LocationSummary(invoke, LocationSummary::kCall);
   locations->AddTemp(LocationFrom(x0));
 
-  InvokeDexCallingConventionVisitor calling_convention_visitor;
-  for (size_t i = 0; i < invoke->InputCount(); i++) {
+  InvokeDexCallingConventionVisitorARM64 calling_convention_visitor;
+  for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) {
     HInstruction* input = invoke->InputAt(i);
     locations->SetInAt(i, calling_convention_visitor.GetNextLocation(input->GetType()));
   }
@@ -1968,13 +1968,9 @@
 }
 
 void LocationsBuilderARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // Explicit clinit checks triggered by static invokes must have been
-  // pruned by art::PrepareForRegisterAllocation, but this step is not
-  // run in baseline. So we remove them manually here if we find them.
-  // TODO: Instead of this local workaround, address this properly.
-  if (invoke->IsStaticWithExplicitClinitCheck()) {
-    invoke->RemoveClinitCheckOrLoadClassAsLastInput();
-  }
+  // When we do not run baseline, explicit clinit checks triggered by static
+  // invokes must have been pruned by art::PrepareForRegisterAllocation.
+  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
 
   IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena());
   if (intrinsic.TryDispatch(invoke)) {
@@ -2006,29 +2002,39 @@
   //
   // Currently we implement the app -> app logic, which looks up in the resolve cache.
 
-  // temp = method;
-  LoadCurrentMethod(temp);
-  if (!invoke->IsRecursive()) {
-    // temp = temp->dex_cache_resolved_methods_;
-    __ Ldr(temp, HeapOperand(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset()));
-    // temp = temp[index_in_cache];
-    __ Ldr(temp, HeapOperand(temp, index_in_cache));
-    // lr = temp->entry_point_from_quick_compiled_code_;
+  if (invoke->IsStringInit()) {
+    // temp = thread->string_init_entrypoint
+    __ Ldr(temp, HeapOperand(tr, invoke->GetStringInitOffset()));
+    // LR = temp->entry_point_from_quick_compiled_code_;
     __ Ldr(lr, HeapOperand(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
         kArm64WordSize)));
-    // lr();
+    // lr()
     __ Blr(lr);
   } else {
-    __ Bl(&frame_entry_label_);
+    // temp = method;
+    LoadCurrentMethod(temp);
+    if (!invoke->IsRecursive()) {
+      // temp = temp->dex_cache_resolved_methods_;
+      __ Ldr(temp, HeapOperand(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset()));
+      // temp = temp[index_in_cache];
+      __ Ldr(temp, HeapOperand(temp, index_in_cache));
+      // lr = temp->entry_point_from_quick_compiled_code_;
+      __ Ldr(lr, HeapOperand(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+          kArm64WordSize)));
+      // lr();
+      __ Blr(lr);
+    } else {
+      __ Bl(&frame_entry_label_);
+    }
   }
 
   DCHECK(!IsLeafMethod());
 }
 
 void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // Explicit clinit checks triggered by static invokes must have been
-  // pruned by art::PrepareForRegisterAllocation.
-  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
+  // When we do not run baseline, explicit clinit checks triggered by static
+  // invokes must have been pruned by art::PrepareForRegisterAllocation.
+  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
 
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
     return;
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 5a35867..8aeea54 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -119,25 +119,20 @@
   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
 };
 
-class InvokeDexCallingConventionVisitor {
+class InvokeDexCallingConventionVisitorARM64 : public InvokeDexCallingConventionVisitor {
  public:
-  InvokeDexCallingConventionVisitor() : gp_index_(0), fp_index_(0), stack_index_(0) {}
+  InvokeDexCallingConventionVisitorARM64() {}
+  virtual ~InvokeDexCallingConventionVisitorARM64() {}
 
-  Location GetNextLocation(Primitive::Type type);
+  Location GetNextLocation(Primitive::Type type) OVERRIDE;
   Location GetReturnLocation(Primitive::Type return_type) {
     return calling_convention.GetReturnLocation(return_type);
   }
 
  private:
   InvokeDexCallingConvention calling_convention;
-  // The current index for core registers.
-  uint32_t gp_index_;
-  // The current index for floating-point registers.
-  uint32_t fp_index_;
-  // The current stack index.
-  uint32_t stack_index_;
 
-  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
+  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorARM64);
 };
 
 class InstructionCodeGeneratorARM64 : public HGraphVisitor {
@@ -193,7 +188,7 @@
   void HandleShift(HBinaryOperation* instr);
 
   CodeGeneratorARM64* const codegen_;
-  InvokeDexCallingConventionVisitor parameter_visitor_;
+  InvokeDexCallingConventionVisitorARM64 parameter_visitor_;
 
   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderARM64);
 };
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 04999be..5ee091f 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -551,7 +551,7 @@
   UNREACHABLE();
 }
 
-Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) {
+Location InvokeDexCallingConventionVisitorX86::GetNextLocation(Primitive::Type type) {
   switch (type) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte:
@@ -582,7 +582,7 @@
     }
 
     case Primitive::kPrimFloat: {
-      uint32_t index = fp_index_++;
+      uint32_t index = float_index_++;
       stack_index_++;
       if (index < calling_convention.GetNumberOfFpuRegisters()) {
         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
@@ -592,7 +592,7 @@
     }
 
     case Primitive::kPrimDouble: {
-      uint32_t index = fp_index_++;
+      uint32_t index = float_index_++;
       stack_index_ += 2;
       if (index < calling_convention.GetNumberOfFpuRegisters()) {
         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
@@ -1194,13 +1194,9 @@
 }
 
 void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // Explicit clinit checks triggered by static invokes must have been
-  // pruned by art::PrepareForRegisterAllocation, but this step is not
-  // run in baseline. So we remove them manually here if we find them.
-  // TODO: Instead of this local workaround, address this properly.
-  if (invoke->IsStaticWithExplicitClinitCheck()) {
-    invoke->RemoveClinitCheckOrLoadClassAsLastInput();
-  }
+  // When we do not run baseline, explicit clinit checks triggered by static
+  // invokes must have been pruned by art::PrepareForRegisterAllocation.
+  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
 
   IntrinsicLocationsBuilderX86 intrinsic(codegen_);
   if (intrinsic.TryDispatch(invoke)) {
@@ -1220,9 +1216,9 @@
 }
 
 void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // Explicit clinit checks triggered by static invokes must have been
-  // pruned by art::PrepareForRegisterAllocation.
-  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
+  // When we do not run baseline, explicit clinit checks triggered by static
+  // invokes must have been pruned by art::PrepareForRegisterAllocation.
+  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
 
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
     return;
@@ -1242,8 +1238,8 @@
       new (GetGraph()->GetArena()) LocationSummary(invoke, LocationSummary::kCall);
   locations->AddTemp(Location::RegisterLocation(EAX));
 
-  InvokeDexCallingConventionVisitor calling_convention_visitor;
-  for (size_t i = 0; i < invoke->InputCount(); i++) {
+  InvokeDexCallingConventionVisitorX86 calling_convention_visitor;
+  for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) {
     HInstruction* input = invoke->InputAt(i);
     locations->SetInAt(i, calling_convention_visitor.GetNextLocation(input->GetType()));
   }
@@ -3114,18 +3110,27 @@
   // 3) app -> app
   //
   // Currently we implement the app -> app logic, which looks up in the resolve cache.
-  // temp = method;
-  LoadCurrentMethod(temp);
-  if (!invoke->IsRecursive()) {
-    // temp = temp->dex_cache_resolved_methods_;
-    __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value()));
-    // temp = temp[index_in_cache]
-    __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex())));
+
+  if (invoke->IsStringInit()) {
+    // temp = thread->string_init_entrypoint
+    __ fs()->movl(temp, Address::Absolute(invoke->GetStringInitOffset()));
     // (temp + offset_of_quick_compiled_code)()
     __ call(Address(
         temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value()));
   } else {
-    __ call(GetFrameEntryLabel());
+    // temp = method;
+    LoadCurrentMethod(temp);
+    if (!invoke->IsRecursive()) {
+      // temp = temp->dex_cache_resolved_methods_;
+      __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value()));
+      // temp = temp[index_in_cache]
+      __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex())));
+      // (temp + offset_of_quick_compiled_code)()
+      __ call(Address(temp,
+          mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value()));
+    } else {
+      __ call(GetFrameEntryLabel());
+    }
   }
 
   DCHECK(!IsLeafMethod());
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 8bd3cd3..79dec7a 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -75,22 +75,17 @@
   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
 };
 
-class InvokeDexCallingConventionVisitor {
+class InvokeDexCallingConventionVisitorX86 : public InvokeDexCallingConventionVisitor {
  public:
-  InvokeDexCallingConventionVisitor() : gp_index_(0), fp_index_(0), stack_index_(0) {}
+  InvokeDexCallingConventionVisitorX86() {}
+  virtual ~InvokeDexCallingConventionVisitorX86() {}
 
-  Location GetNextLocation(Primitive::Type type);
+  Location GetNextLocation(Primitive::Type type) OVERRIDE;
 
  private:
   InvokeDexCallingConvention calling_convention;
-  // The current index for cpu registers.
-  uint32_t gp_index_;
-  // The current index for fpu registers.
-  uint32_t fp_index_;
-  // The current stack index.
-  uint32_t stack_index_;
 
-  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
+  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorX86);
 };
 
 class ParallelMoveResolverX86 : public ParallelMoveResolverWithSwap {
@@ -137,7 +132,7 @@
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
 
   CodeGeneratorX86* const codegen_;
-  InvokeDexCallingConventionVisitor parameter_visitor_;
+  InvokeDexCallingConventionVisitorX86 parameter_visitor_;
 
   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86);
 };
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 5ce9329..5ac6866 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -366,18 +366,26 @@
   //
   // Currently we implement the app -> app logic, which looks up in the resolve cache.
 
-  // temp = method;
-  LoadCurrentMethod(temp);
-  if (!invoke->IsRecursive()) {
-    // temp = temp->dex_cache_resolved_methods_;
-    __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().SizeValue()));
-    // temp = temp[index_in_cache]
-    __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex())));
+  if (invoke->IsStringInit()) {
+    // temp = thread->string_init_entrypoint
+    __ gs()->movl(temp, Address::Absolute(invoke->GetStringInitOffset()));
     // (temp + offset_of_quick_compiled_code)()
     __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
         kX86_64WordSize).SizeValue()));
   } else {
-    __ call(&frame_entry_label_);
+    // temp = method;
+    LoadCurrentMethod(temp);
+    if (!invoke->IsRecursive()) {
+      // temp = temp->dex_cache_resolved_methods_;
+      __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().SizeValue()));
+      // temp = temp[index_in_cache]
+      __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex())));
+      // (temp + offset_of_quick_compiled_code)()
+      __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+          kX86_64WordSize).SizeValue()));
+    } else {
+      __ call(&frame_entry_label_);
+    }
   }
 
   DCHECK(!IsLeafMethod());
@@ -1232,7 +1240,7 @@
   codegen_->GenerateFrameExit();
 }
 
-Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) {
+Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(Primitive::Type type) {
   switch (type) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte:
@@ -1262,7 +1270,7 @@
     }
 
     case Primitive::kPrimFloat: {
-      uint32_t index = fp_index_++;
+      uint32_t index = float_index_++;
       stack_index_++;
       if (index < calling_convention.GetNumberOfFpuRegisters()) {
         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
@@ -1272,7 +1280,7 @@
     }
 
     case Primitive::kPrimDouble: {
-      uint32_t index = fp_index_++;
+      uint32_t index = float_index_++;
       stack_index_ += 2;
       if (index < calling_convention.GetNumberOfFpuRegisters()) {
         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
@@ -1289,13 +1297,9 @@
 }
 
 void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // Explicit clinit checks triggered by static invokes must have been
-  // pruned by art::PrepareForRegisterAllocation, but this step is not
-  // run in baseline. So we remove them manually here if we find them.
-  // TODO: Instead of this local workaround, address this properly.
-  if (invoke->IsStaticWithExplicitClinitCheck()) {
-    invoke->RemoveClinitCheckOrLoadClassAsLastInput();
-  }
+  // When we do not run baseline, explicit clinit checks triggered by static
+  // invokes must have been pruned by art::PrepareForRegisterAllocation.
+  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
 
   IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
   if (intrinsic.TryDispatch(invoke)) {
@@ -1315,9 +1319,9 @@
 }
 
 void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  // Explicit clinit checks triggered by static invokes must have been
-  // pruned by art::PrepareForRegisterAllocation.
-  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
+  // When we do not run baseline, explicit clinit checks triggered by static
+  // invokes must have been pruned by art::PrepareForRegisterAllocation.
+  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
 
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
     return;
@@ -1334,8 +1338,8 @@
       new (GetGraph()->GetArena()) LocationSummary(invoke, LocationSummary::kCall);
   locations->AddTemp(Location::RegisterLocation(RDI));
 
-  InvokeDexCallingConventionVisitor calling_convention_visitor;
-  for (size_t i = 0; i < invoke->InputCount(); i++) {
+  InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
+  for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) {
     HInstruction* input = invoke->InputAt(i);
     locations->SetInAt(i, calling_convention_visitor.GetNextLocation(input->GetType()));
   }
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 6cdc822..13f9c46 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -37,7 +37,7 @@
 static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
 static constexpr size_t kParameterFloatRegistersLength = arraysize(kParameterFloatRegisters);
 
-static constexpr Register kRuntimeParameterCoreRegisters[] = { RDI, RSI, RDX };
+static constexpr Register kRuntimeParameterCoreRegisters[] = { RDI, RSI, RDX, RCX };
 static constexpr size_t kRuntimeParameterCoreRegistersLength =
     arraysize(kRuntimeParameterCoreRegisters);
 static constexpr FloatRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1 };
@@ -68,22 +68,17 @@
   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
 };
 
-class InvokeDexCallingConventionVisitor {
+class InvokeDexCallingConventionVisitorX86_64 : public InvokeDexCallingConventionVisitor {
  public:
-  InvokeDexCallingConventionVisitor() : gp_index_(0), fp_index_(0), stack_index_(0) {}
+  InvokeDexCallingConventionVisitorX86_64() {}
+  virtual ~InvokeDexCallingConventionVisitorX86_64() {}
 
-  Location GetNextLocation(Primitive::Type type);
+  Location GetNextLocation(Primitive::Type type) OVERRIDE;
 
  private:
   InvokeDexCallingConvention calling_convention;
-  // The current index for cpu registers.
-  uint32_t gp_index_;
-  // The current index for fpu registers.
-  uint32_t fp_index_;
-  // The current stack index.
-  uint32_t stack_index_;
 
-  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
+  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorX86_64);
 };
 
 class CodeGeneratorX86_64;
@@ -147,7 +142,7 @@
   void HandleFieldGet(HInstruction* instruction);
 
   CodeGeneratorX86_64* const codegen_;
-  InvokeDexCallingConventionVisitor parameter_visitor_;
+  InvokeDexCallingConventionVisitorX86_64 parameter_visitor_;
 
   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86_64);
 };
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index dc3124b..8ea8f3c 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -170,7 +170,8 @@
     }
   }
 
-  // Ensure the uses of `instruction` are defined in a block of the graph.
+  // Ensure the uses of `instruction` are defined in a block of the graph,
+  // and the entry in the use list is consistent.
   for (HUseIterator<HInstruction*> use_it(instruction->GetUses());
        !use_it.Done(); use_it.Advance()) {
     HInstruction* use = use_it.Current()->GetUser();
@@ -184,6 +185,27 @@
                             use->GetId(),
                             instruction->GetId()));
     }
+    size_t use_index = use_it.Current()->GetIndex();
+    if ((use_index >= use->InputCount()) || (use->InputAt(use_index) != instruction)) {
+      AddError(StringPrintf("User %s:%d of instruction %d has a wrong "
+                            "UseListNode index.",
+                            use->DebugName(),
+                            use->GetId(),
+                            instruction->GetId()));
+    }
+  }
+
+  // Ensure the environment uses entries are consistent.
+  for (HUseIterator<HEnvironment*> use_it(instruction->GetEnvUses());
+       !use_it.Done(); use_it.Advance()) {
+    HEnvironment* use = use_it.Current()->GetUser();
+    size_t use_index = use_it.Current()->GetIndex();
+    if ((use_index >= use->Size()) || (use->GetInstructionAt(use_index) != instruction)) {
+      AddError(StringPrintf("Environment user of %s:%d has a wrong "
+                            "UseListNode index.",
+                            instruction->DebugName(),
+                            instruction->GetId()));
+    }
   }
 
   // Ensure 'instruction' has pointers to its inputs' use entries.
@@ -191,7 +213,11 @@
     HUserRecord<HInstruction*> input_record = instruction->InputRecordAt(i);
     HInstruction* input = input_record.GetInstruction();
     HUseListNode<HInstruction*>* use_node = input_record.GetUseNode();
-    if (use_node == nullptr || !input->GetUses().Contains(use_node)) {
+    size_t use_index = use_node->GetIndex();
+    if ((use_node == nullptr)
+        || !input->GetUses().Contains(use_node)
+        || (use_index >= e)
+        || (use_index != i)) {
       AddError(StringPrintf("Instruction %s:%d has an invalid pointer to use entry "
                             "at input %u (%s:%d).",
                             instruction->DebugName(),
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 2df7c16..e79d4f4 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -377,15 +377,42 @@
     return;
   }
 
-  if ((input_cst != nullptr) && input_cst->IsMinusOne() &&
-      (Primitive::IsFloatingPointType(type) || Primitive::IsIntOrLongType(type))) {
+  if ((input_cst != nullptr) && input_cst->IsMinusOne()) {
     // Replace code looking like
     //    DIV dst, src, -1
     // with
     //    NEG dst, src
     instruction->GetBlock()->ReplaceAndRemoveInstructionWith(
-        instruction, (new (GetGraph()->GetArena()) HNeg(type, input_other)));
+        instruction, new (GetGraph()->GetArena()) HNeg(type, input_other));
     RecordSimplification();
+    return;
+  }
+
+  if ((input_cst != nullptr) && Primitive::IsFloatingPointType(type)) {
+    // Try replacing code looking like
+    //    DIV dst, src, constant
+    // with
+    //    MUL dst, src, 1 / constant
+    HConstant* reciprocal = nullptr;
+    if (type == Primitive::Primitive::kPrimDouble) {
+      double value = input_cst->AsDoubleConstant()->GetValue();
+      if (CanDivideByReciprocalMultiplyDouble(bit_cast<int64_t, double>(value))) {
+        reciprocal = GetGraph()->GetDoubleConstant(1.0 / value);
+      }
+    } else {
+      DCHECK_EQ(type, Primitive::kPrimFloat);
+      float value = input_cst->AsFloatConstant()->GetValue();
+      if (CanDivideByReciprocalMultiplyFloat(bit_cast<int32_t, float>(value))) {
+        reciprocal = GetGraph()->GetFloatConstant(1.0f / value);
+      }
+    }
+
+    if (reciprocal != nullptr) {
+      instruction->GetBlock()->ReplaceAndRemoveInstructionWith(
+          instruction, new (GetGraph()->GetArena()) HMul(type, input_other, reciprocal));
+      RecordSimplification();
+      return;
+    }
   }
 }
 
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index 20aa45f..43fe374 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -186,6 +186,8 @@
       return Intrinsics::kStringCharAt;
     case kIntrinsicCompareTo:
       return Intrinsics::kStringCompareTo;
+    case kIntrinsicGetCharsNoCheck:
+      return Intrinsics::kStringGetCharsNoCheck;
     case kIntrinsicIsEmptyOrLength:
       // The inliner can handle these two cases - and this is the preferred approach
       // since after inlining the call is no longer visible (as opposed to waiting
@@ -194,6 +196,12 @@
     case kIntrinsicIndexOf:
       return ((method.d.data & kIntrinsicFlagBase0) == 0) ?
           Intrinsics::kStringIndexOfAfter : Intrinsics::kStringIndexOf;
+    case kIntrinsicNewStringFromBytes:
+      return Intrinsics::kStringNewStringFromBytes;
+    case kIntrinsicNewStringFromChars:
+      return Intrinsics::kStringNewStringFromChars;
+    case kIntrinsicNewStringFromString:
+      return Intrinsics::kStringNewStringFromString;
 
     case kIntrinsicCas:
       switch (GetType(method.d.data, false)) {
@@ -280,6 +288,11 @@
     case kInlineOpIPut:
       return Intrinsics::kNone;
 
+    // String init cases, not intrinsics.
+
+    case kInlineStringInit:
+      return Intrinsics::kNone;
+
     // No default case to make the compiler warn on missing cases.
   }
   return Intrinsics::kNone;
@@ -361,4 +374,3 @@
 }
 
 }  // namespace art
-
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index dbb7cba..c243ef3 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -17,8 +17,10 @@
 #ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_H_
 #define ART_COMPILER_OPTIMIZING_INTRINSICS_H_
 
+#include "code_generator.h"
 #include "nodes.h"
 #include "optimization.h"
+#include "parallel_move_resolver.h"
 
 namespace art {
 
@@ -76,6 +78,38 @@
 #undef INTRINSICS_LIST
 #undef OPTIMIZING_INTRINSICS
 
+  static void MoveArguments(HInvoke* invoke,
+                            CodeGenerator* codegen,
+                            InvokeDexCallingConventionVisitor* calling_convention_visitor) {
+    if (kIsDebugBuild && invoke->IsInvokeStaticOrDirect()) {
+      HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
+      // When we do not run baseline, explicit clinit checks triggered by static
+      // invokes must have been pruned by art::PrepareForRegisterAllocation.
+      DCHECK(codegen->IsBaseline() || !invoke_static_or_direct->IsStaticWithExplicitClinitCheck());
+    }
+
+    if (invoke->GetNumberOfArguments() == 0) {
+      // No argument to move.
+      return;
+    }
+
+    LocationSummary* locations = invoke->GetLocations();
+
+    // We're moving potentially two or more locations to locations that could overlap, so we need
+    // a parallel move resolver.
+    HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+
+    for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) {
+      HInstruction* input = invoke->InputAt(i);
+      Location cc_loc = calling_convention_visitor->GetNextLocation(input->GetType());
+      Location actual_loc = locations->InAt(i);
+
+      parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr);
+    }
+
+    codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+  }
+
  protected:
   IntrinsicVisitor() {}
 
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index abdf04e..7f7b450 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -48,7 +48,7 @@
 
   DCHECK_NE(type, Primitive::kPrimVoid);
 
-  if (Primitive::IsIntegralType(type)) {
+  if (Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) {
     if (type == Primitive::kPrimLong) {
       Register trg_reg_lo = trg.AsRegisterPairLow<Register>();
       Register trg_reg_hi = trg.AsRegisterPairHigh<Register>();
@@ -77,28 +77,9 @@
   }
 }
 
-static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorARM* codegen) {
-  if (invoke->InputCount() == 0) {
-    // No argument to move.
-    return;
-  }
-
-  LocationSummary* locations = invoke->GetLocations();
-  InvokeDexCallingConventionVisitor calling_convention_visitor;
-
-  // We're moving potentially two or more locations to locations that could overlap, so we need
-  // a parallel move resolver.
-  HParallelMove parallel_move(arena);
-
-  for (size_t i = 0; i < invoke->InputCount(); i++) {
-    HInstruction* input = invoke->InputAt(i);
-    Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType());
-    Location actual_loc = locations->InAt(i);
-
-    parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr);
-  }
-
-  codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+static void MoveArguments(HInvoke* invoke, CodeGeneratorARM* codegen) {
+  InvokeDexCallingConventionVisitorARM calling_convention_visitor;
+  IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
 }
 
 // Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
@@ -117,7 +98,7 @@
 
     SaveLiveRegisters(codegen, invoke_->GetLocations());
 
-    MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen);
+    MoveArguments(invoke_, codegen);
 
     if (invoke_->IsInvokeStaticOrDirect()) {
       codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), kArtMethodRegister);
@@ -810,10 +791,6 @@
   const MemberOffset value_offset = mirror::String::ValueOffset();
   // Location of count
   const MemberOffset count_offset = mirror::String::CountOffset();
-  // Starting offset within data array
-  const MemberOffset offset_offset = mirror::String::OffsetOffset();
-  // Start of char data with array_
-  const MemberOffset data_offset = mirror::Array::DataOffset(sizeof(uint16_t));
 
   Register obj = locations->InAt(0).AsRegister<Register>();  // String object pointer.
   Register idx = locations->InAt(1).AsRegister<Register>();  // Index of character.
@@ -835,15 +812,10 @@
   __ cmp(idx, ShifterOperand(temp));
   __ b(slow_path->GetEntryLabel(), CS);
 
-  // Index computation.
-  __ ldr(temp, Address(obj, offset_offset.Int32Value()));         // temp := str.offset.
-  __ ldr(array_temp, Address(obj, value_offset.Int32Value()));    // array_temp := str.offset.
-  __ add(temp, temp, ShifterOperand(idx));
-  DCHECK_EQ(data_offset.Int32Value() % 2, 0);                     // We'll compensate by shifting.
-  __ add(temp, temp, ShifterOperand(data_offset.Int32Value() / 2));
+  __ add(array_temp, obj, ShifterOperand(value_offset.Int32Value()));  // array_temp := str.value.
 
   // Load the value.
-  __ ldrh(out, Address(array_temp, temp, LSL, 1));                // out := array_temp[temp].
+  __ ldrh(out, Address(array_temp, idx, LSL, 1));                 // out := array_temp[idx].
 
   __ Bind(slow_path->GetExitLabel());
 }
@@ -878,6 +850,81 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+void IntrinsicLocationsBuilderARM::VisitStringNewStringFromBytes(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCall,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+  locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
+  locations->SetOut(Location::RegisterLocation(R0));
+}
+
+void IntrinsicCodeGeneratorARM::VisitStringNewStringFromBytes(HInvoke* invoke) {
+  ArmAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register byte_array = locations->InAt(0).AsRegister<Register>();
+  __ cmp(byte_array, ShifterOperand(0));
+  SlowPathCodeARM* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
+  codegen_->AddSlowPath(slow_path);
+  __ b(slow_path->GetEntryLabel(), EQ);
+
+  __ LoadFromOffset(
+      kLoadWord, LR, TR, QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pAllocStringFromBytes).Int32Value());
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+  __ blx(LR);
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void IntrinsicLocationsBuilderARM::VisitStringNewStringFromChars(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCall,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+  locations->SetOut(Location::RegisterLocation(R0));
+}
+
+void IntrinsicCodeGeneratorARM::VisitStringNewStringFromChars(HInvoke* invoke) {
+  ArmAssembler* assembler = GetAssembler();
+
+  __ LoadFromOffset(
+      kLoadWord, LR, TR, QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pAllocStringFromChars).Int32Value());
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+  __ blx(LR);
+}
+
+void IntrinsicLocationsBuilderARM::VisitStringNewStringFromString(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCall,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetOut(Location::RegisterLocation(R0));
+}
+
+void IntrinsicCodeGeneratorARM::VisitStringNewStringFromString(HInvoke* invoke) {
+  ArmAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register string_to_copy = locations->InAt(0).AsRegister<Register>();
+  __ cmp(string_to_copy, ShifterOperand(0));
+  SlowPathCodeARM* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
+  codegen_->AddSlowPath(slow_path);
+  __ b(slow_path->GetEntryLabel(), EQ);
+
+  __ LoadFromOffset(kLoadWord,
+      LR, TR, QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pAllocStringFromString).Int32Value());
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+  __ blx(LR);
+  __ Bind(slow_path->GetExitLabel());
+}
+
 // Unimplemented intrinsics.
 
 #define UNIMPLEMENTED_INTRINSIC(Name)                                                  \
@@ -907,6 +954,7 @@
 UNIMPLEMENTED_INTRINSIC(StringIndexOf)
 UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
+UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
 
 }  // namespace arm
 }  // namespace art
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 7a753b2..ca3de99 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -75,7 +75,7 @@
 
   DCHECK_NE(type, Primitive::kPrimVoid);
 
-  if (Primitive::IsIntegralType(type)) {
+  if (Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) {
     Register trg_reg = RegisterFrom(trg, type);
     Register res_reg = RegisterFrom(ARM64ReturnLocation(type), type);
     __ Mov(trg_reg, res_reg, kDiscardForSameWReg);
@@ -86,28 +86,9 @@
   }
 }
 
-static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorARM64* codegen) {
-  if (invoke->InputCount() == 0) {
-    // No argument to move.
-    return;
-  }
-
-  LocationSummary* locations = invoke->GetLocations();
-  InvokeDexCallingConventionVisitor calling_convention_visitor;
-
-  // We're moving potentially two or more locations to locations that could overlap, so we need
-  // a parallel move resolver.
-  HParallelMove parallel_move(arena);
-
-  for (size_t i = 0; i < invoke->InputCount(); i++) {
-    HInstruction* input = invoke->InputAt(i);
-    Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType());
-    Location actual_loc = locations->InAt(i);
-
-    parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr);
-  }
-
-  codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+static void MoveArguments(HInvoke* invoke, CodeGeneratorARM64* codegen) {
+  InvokeDexCallingConventionVisitorARM64 calling_convention_visitor;
+  IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
 }
 
 // Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
@@ -126,7 +107,7 @@
 
     SaveLiveRegisters(codegen, invoke_->GetLocations());
 
-    MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen);
+    MoveArguments(invoke_, codegen);
 
     if (invoke_->IsInvokeStaticOrDirect()) {
       codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), kArtMethodRegister);
@@ -953,10 +934,6 @@
   const MemberOffset value_offset = mirror::String::ValueOffset();
   // Location of count
   const MemberOffset count_offset = mirror::String::CountOffset();
-  // Starting offset within data array
-  const MemberOffset offset_offset = mirror::String::OffsetOffset();
-  // Start of char data with array_
-  const MemberOffset data_offset = mirror::Array::DataOffset(sizeof(uint16_t));
 
   Register obj = WRegisterFrom(locations->InAt(0));  // String object pointer.
   Register idx = WRegisterFrom(locations->InAt(1));  // Index of character.
@@ -979,21 +956,15 @@
   __ Cmp(idx, temp);
   __ B(hs, slow_path->GetEntryLabel());
 
-  // Index computation.
-  __ Ldr(temp, HeapOperand(obj, offset_offset));         // temp := str.offset.
-  __ Ldr(array_temp, HeapOperand(obj, value_offset));    // array_temp := str.offset.
-  __ Add(temp, temp, idx);
-  DCHECK_EQ(data_offset.Int32Value() % 2, 0);            // We'll compensate by shifting.
-  __ Add(temp, temp, Operand(data_offset.Int32Value() / 2));
+  __ Add(array_temp, obj, Operand(value_offset.Int32Value()));  // array_temp := str.value.
 
   // Load the value.
-  __ Ldrh(out, MemOperand(array_temp.X(), temp, UXTW, 1));  // out := array_temp[temp].
+  __ Ldrh(out, MemOperand(array_temp.X(), idx, UXTW, 1));  // out := array_temp[idx].
 
   __ Bind(slow_path->GetExitLabel());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) {
-  // The inputs plus one temp.
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
                                                             LocationSummary::kCall,
                                                             kIntrinsified);
@@ -1022,6 +993,84 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCall,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
+  locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3)));
+  locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
+}
+
+void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
+  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register byte_array = WRegisterFrom(locations->InAt(0));
+  __ Cmp(byte_array, 0);
+  SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
+  codegen_->AddSlowPath(slow_path);
+  __ B(eq, slow_path->GetEntryLabel());
+
+  __ Ldr(lr,
+      MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pAllocStringFromBytes).Int32Value()));
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+  __ Blr(lr);
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromChars(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCall,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
+  locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
+}
+
+void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromChars(HInvoke* invoke) {
+  vixl::MacroAssembler* masm = GetVIXLAssembler();
+
+  __ Ldr(lr,
+      MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pAllocStringFromChars).Int32Value()));
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+  __ Blr(lr);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromString(HInvoke* invoke) {
+  // The inputs plus one temp.
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCall,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
+  locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
+}
+
+void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromString(HInvoke* invoke) {
+  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register string_to_copy = WRegisterFrom(locations->InAt(0));
+  __ Cmp(string_to_copy, 0);
+  SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
+  codegen_->AddSlowPath(slow_path);
+  __ B(eq, slow_path->GetEntryLabel());
+
+  __ Ldr(lr,
+      MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pAllocStringFromString).Int32Value()));
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+  __ Blr(lr);
+  __ Bind(slow_path->GetExitLabel());
+}
+
 // Unimplemented intrinsics.
 
 #define UNIMPLEMENTED_INTRINSIC(Name)                                                  \
@@ -1034,6 +1083,7 @@
 UNIMPLEMENTED_INTRINSIC(StringIndexOf)
 UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
+UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
 
 }  // namespace arm64
 }  // namespace art
diff --git a/compiler/optimizing/intrinsics_list.h b/compiler/optimizing/intrinsics_list.h
index 10f6e1d..2c9248f 100644
--- a/compiler/optimizing/intrinsics_list.h
+++ b/compiler/optimizing/intrinsics_list.h
@@ -60,8 +60,12 @@
   V(MemoryPokeShortNative, kStatic) \
   V(StringCharAt, kDirect) \
   V(StringCompareTo, kDirect) \
+  V(StringGetCharsNoCheck, kDirect) \
   V(StringIndexOf, kDirect) \
   V(StringIndexOfAfter, kDirect) \
+  V(StringNewStringFromBytes, kStatic) \
+  V(StringNewStringFromChars, kStatic) \
+  V(StringNewStringFromString, kStatic) \
   V(UnsafeCASInt, kDirect) \
   V(UnsafeCASLong, kDirect) \
   V(UnsafeCASObject, kDirect) \
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 7275edb..1eef1ef 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -111,28 +111,9 @@
   }
 }
 
-static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX86* codegen) {
-  if (invoke->InputCount() == 0) {
-    // No argument to move.
-    return;
-  }
-
-  LocationSummary* locations = invoke->GetLocations();
-  InvokeDexCallingConventionVisitor calling_convention_visitor;
-
-  // We're moving potentially two or more locations to locations that could overlap, so we need
-  // a parallel move resolver.
-  HParallelMove parallel_move(arena);
-
-  for (size_t i = 0; i < invoke->InputCount(); i++) {
-    HInstruction* input = invoke->InputAt(i);
-    Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType());
-    Location actual_loc = locations->InAt(i);
-
-    parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr);
-  }
-
-  codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+static void MoveArguments(HInvoke* invoke, CodeGeneratorX86* codegen) {
+  InvokeDexCallingConventionVisitorX86 calling_convention_visitor;
+  IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
 }
 
 // Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
@@ -155,7 +136,7 @@
 
     SaveLiveRegisters(codegen, invoke_->GetLocations());
 
-    MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen);
+    MoveArguments(invoke_, codegen);
 
     if (invoke_->IsInvokeStaticOrDirect()) {
       codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), EAX);
@@ -749,7 +730,7 @@
 }
 
 static void InvokeOutOfLineIntrinsic(CodeGeneratorX86* codegen, HInvoke* invoke) {
-  MoveArguments(invoke, codegen->GetGraph()->GetArena(), codegen);
+  MoveArguments(invoke, codegen);
 
   DCHECK(invoke->IsInvokeStaticOrDirect());
   codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), EAX);
@@ -910,23 +891,18 @@
   const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
   // Location of count
   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
-  // Starting offset within data array
-  const int32_t offset_offset = mirror::String::OffsetOffset().Int32Value();
-  // Start of char data with array_
-  const int32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value();
 
   Register obj = locations->InAt(0).AsRegister<Register>();
   Register idx = locations->InAt(1).AsRegister<Register>();
   Register out = locations->Out().AsRegister<Register>();
-  Location temp_loc = locations->GetTemp(0);
-  Register temp = temp_loc.AsRegister<Register>();
 
   // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
   //       the cost.
   // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
   //       we will not optimize the code for constants (which would save a register).
 
-  SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke, temp);
+  SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(
+      invoke, locations->GetTemp(0).AsRegister<Register>());
   codegen_->AddSlowPath(slow_path);
 
   X86Assembler* assembler = GetAssembler();
@@ -935,12 +911,8 @@
   codegen_->MaybeRecordImplicitNullCheck(invoke);
   __ j(kAboveEqual, slow_path->GetEntryLabel());
 
-  // Get the actual element.
-  __ movl(temp, idx);                          // temp := idx.
-  __ addl(temp, Address(obj, offset_offset));  // temp := offset + idx.
-  __ movl(out, Address(obj, value_offset));    // obj := obj.array.
-  // out = out[2*temp].
-  __ movzxw(out, Address(out, temp, ScaleFactor::TIMES_2, data_offset));
+  // out = out[2*idx].
+  __ movzxw(out, Address(out, idx, ScaleFactor::TIMES_2, value_offset));
 
   __ Bind(slow_path->GetExitLabel());
 }
@@ -976,6 +948,81 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCall,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+  locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
+  locations->SetOut(Location::RegisterLocation(EAX));
+  // Needs to be EAX for the invoke.
+  locations->AddTemp(Location::RegisterLocation(EAX));
+}
+
+void IntrinsicCodeGeneratorX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
+  X86Assembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register byte_array = locations->InAt(0).AsRegister<Register>();
+  __ testl(byte_array, byte_array);
+  SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(
+      invoke, locations->GetTemp(0).AsRegister<Register>());
+  codegen_->AddSlowPath(slow_path);
+  __ j(kEqual, slow_path->GetEntryLabel());
+
+  __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocStringFromBytes)));
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void IntrinsicLocationsBuilderX86::VisitStringNewStringFromChars(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCall,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+  locations->SetOut(Location::RegisterLocation(EAX));
+}
+
+void IntrinsicCodeGeneratorX86::VisitStringNewStringFromChars(HInvoke* invoke) {
+  X86Assembler* assembler = GetAssembler();
+
+  __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocStringFromChars)));
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+}
+
+void IntrinsicLocationsBuilderX86::VisitStringNewStringFromString(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCall,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetOut(Location::RegisterLocation(EAX));
+  // Needs to be EAX for the invoke.
+  locations->AddTemp(Location::RegisterLocation(EAX));
+}
+
+void IntrinsicCodeGeneratorX86::VisitStringNewStringFromString(HInvoke* invoke) {
+  X86Assembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register string_to_copy = locations->InAt(0).AsRegister<Register>();
+  __ testl(string_to_copy, string_to_copy);
+  SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(
+      invoke, locations->GetTemp(0).AsRegister<Register>());
+  codegen_->AddSlowPath(slow_path);
+  __ j(kEqual, slow_path->GetEntryLabel());
+
+  __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocStringFromString)));
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+  __ Bind(slow_path->GetExitLabel());
+}
+
 static void GenPeek(LocationSummary* locations, Primitive::Type size, X86Assembler* assembler) {
   Register address = locations->InAt(0).AsRegisterPairLow<Register>();
   Location out_loc = locations->Out();
@@ -1536,6 +1583,7 @@
 }
 
 UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
+UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
 UNIMPLEMENTED_INTRINSIC(StringIndexOf)
 UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 35daaf6..1fc5432 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -103,28 +103,9 @@
   }
 }
 
-static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX86_64* codegen) {
-  if (invoke->InputCount() == 0) {
-    // No argument to move.
-    return;
-  }
-
-  LocationSummary* locations = invoke->GetLocations();
-  InvokeDexCallingConventionVisitor calling_convention_visitor;
-
-  // We're moving potentially two or more locations to locations that could overlap, so we need
-  // a parallel move resolver.
-  HParallelMove parallel_move(arena);
-
-  for (size_t i = 0; i < invoke->InputCount(); i++) {
-    HInstruction* input = invoke->InputAt(i);
-    Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType());
-    Location actual_loc = locations->InAt(i);
-
-    parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr);
-  }
-
-  codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+static void MoveArguments(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
+  InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
+  IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
 }
 
 // Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
@@ -143,7 +124,7 @@
 
     SaveLiveRegisters(codegen, invoke_->GetLocations());
 
-    MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen);
+    MoveArguments(invoke_, codegen);
 
     if (invoke_->IsInvokeStaticOrDirect()) {
       codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), CpuRegister(RDI));
@@ -623,7 +604,7 @@
 }
 
 static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invoke) {
-  MoveArguments(invoke, codegen->GetGraph()->GetArena(), codegen);
+  MoveArguments(invoke, codegen);
 
   DCHECK(invoke->IsInvokeStaticOrDirect());
   codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), CpuRegister(RDI));
@@ -824,16 +805,10 @@
   const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
   // Location of count
   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
-  // Starting offset within data array
-  const int32_t offset_offset = mirror::String::OffsetOffset().Int32Value();
-  // Start of char data with array_
-  const int32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value();
 
   CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
   CpuRegister idx = locations->InAt(1).AsRegister<CpuRegister>();
   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
-  Location temp_loc = locations->GetTemp(0);
-  CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
 
   // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
   //       the cost.
@@ -849,12 +824,8 @@
   codegen_->MaybeRecordImplicitNullCheck(invoke);
   __ j(kAboveEqual, slow_path->GetEntryLabel());
 
-  // Get the actual element.
-  __ movl(temp, idx);                          // temp := idx.
-  __ addl(temp, Address(obj, offset_offset));  // temp := offset + idx.
-  __ movl(out, Address(obj, value_offset));    // obj := obj.array.
-  // out = out[2*temp].
-  __ movzxw(out, Address(out, temp, ScaleFactor::TIMES_2, data_offset));
+  // out = out[2*idx].
+  __ movzxw(out, Address(out, idx, ScaleFactor::TIMES_2, value_offset));
 
   __ Bind(slow_path->GetExitLabel());
 }
@@ -887,6 +858,78 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCall,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+  locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
+  locations->SetOut(Location::RegisterLocation(RAX));
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
+  X86_64Assembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  CpuRegister byte_array = locations->InAt(0).AsRegister<CpuRegister>();
+  __ testl(byte_array, byte_array);
+  SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
+  codegen_->AddSlowPath(slow_path);
+  __ j(kEqual, slow_path->GetEntryLabel());
+
+  __ gs()->call(Address::Absolute(
+        QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromBytes), true));
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCall,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+  locations->SetOut(Location::RegisterLocation(RAX));
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
+  X86_64Assembler* assembler = GetAssembler();
+
+  __ gs()->call(Address::Absolute(
+        QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromChars), true));
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCall,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetOut(Location::RegisterLocation(RAX));
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
+  X86_64Assembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  CpuRegister string_to_copy = locations->InAt(0).AsRegister<CpuRegister>();
+  __ testl(string_to_copy, string_to_copy);
+  SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
+  codegen_->AddSlowPath(slow_path);
+  __ j(kEqual, slow_path->GetEntryLabel());
+
+  __ gs()->call(Address::Absolute(
+        QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromString), true));
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+  __ Bind(slow_path->GetExitLabel());
+}
+
 static void GenPeek(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
   CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
   CpuRegister out = locations->Out().AsRegister<CpuRegister>();  // == address, here for clarity.
@@ -1390,6 +1433,7 @@
 void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
 }
 
+UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
 UNIMPLEMENTED_INTRINSIC(StringIndexOf)
 UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 286ffa8..d3ee770 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -303,25 +303,6 @@
   return cached_null_constant_;
 }
 
-template <class InstructionType, typename ValueType>
-InstructionType* HGraph::CreateConstant(ValueType value,
-                                        ArenaSafeMap<ValueType, InstructionType*>* cache) {
-  // Try to find an existing constant of the given value.
-  InstructionType* constant = nullptr;
-  auto cached_constant = cache->find(value);
-  if (cached_constant != cache->end()) {
-    constant = cached_constant->second;
-  }
-
-  // If not found or previously deleted, create and cache a new instruction.
-  if (constant == nullptr || constant->GetBlock() == nullptr) {
-    constant = new (arena_) InstructionType(value);
-    cache->Overwrite(value, constant);
-    InsertConstant(constant);
-  }
-  return constant;
-}
-
 HConstant* HGraph::GetConstant(Primitive::Type type, int64_t value) {
   switch (type) {
     case Primitive::Type::kPrimBoolean:
@@ -343,6 +324,18 @@
   }
 }
 
+void HGraph::CacheFloatConstant(HFloatConstant* constant) {
+  int32_t value = bit_cast<int32_t, float>(constant->GetValue());
+  DCHECK(cached_float_constants_.find(value) == cached_float_constants_.end());
+  cached_float_constants_.Overwrite(value, constant);
+}
+
+void HGraph::CacheDoubleConstant(HDoubleConstant* constant) {
+  int64_t value = bit_cast<int64_t, double>(constant->GetValue());
+  DCHECK(cached_double_constants_.find(value) == cached_double_constants_.end());
+  cached_double_constants_.Overwrite(value, constant);
+}
+
 void HLoopInformation::Add(HBasicBlock* block) {
   blocks_.SetBit(block->GetBlockId());
 }
@@ -455,6 +448,20 @@
   instructions_.InsertInstructionBefore(instruction, cursor);
 }
 
+void HBasicBlock::InsertInstructionAfter(HInstruction* instruction, HInstruction* cursor) {
+  DCHECK(!cursor->IsPhi());
+  DCHECK(!instruction->IsPhi());
+  DCHECK_EQ(instruction->GetId(), -1);
+  DCHECK_NE(cursor->GetId(), -1);
+  DCHECK_EQ(cursor->GetBlock(), this);
+  DCHECK(!instruction->IsControlFlow());
+  DCHECK(!cursor->IsControlFlow());
+  instruction->SetBlock(this);
+  instruction->SetId(GetGraph()->GetNextInstructionId());
+  UpdateInputsUsers(instruction);
+  instructions_.InsertInstructionAfter(instruction, cursor);
+}
+
 void HBasicBlock::InsertPhiAfter(HPhi* phi, HPhi* cursor) {
   DCHECK_EQ(phi->GetId(), -1);
   DCHECK_NE(cursor->GetId(), -1);
@@ -506,6 +513,28 @@
   }
 }
 
+void HEnvironment::CopyFromWithLoopPhiAdjustment(HEnvironment* env,
+                                                 HBasicBlock* loop_header) {
+  DCHECK(loop_header->IsLoopHeader());
+  for (size_t i = 0; i < env->Size(); i++) {
+    HInstruction* instruction = env->GetInstructionAt(i);
+    SetRawEnvAt(i, instruction);
+    if (instruction == nullptr) {
+      continue;
+    }
+    if (instruction->IsLoopHeaderPhi() && (instruction->GetBlock() == loop_header)) {
+      // At the end of the loop pre-header, the corresponding value for instruction
+      // is the first input of the phi.
+      HInstruction* initial = instruction->AsPhi()->InputAt(0);
+      DCHECK(initial->GetBlock()->Dominates(loop_header));
+      SetRawEnvAt(i, initial);
+      initial->AddEnvUseAt(this, i);
+    } else {
+      instruction->AddEnvUseAt(this, i);
+    }
+  }
+}
+
 void HEnvironment::RemoveAsUserOfInput(size_t index) const {
   const HUserRecord<HEnvironment*> user_record = vregs_.Get(index);
   user_record.GetInstruction()->RemoveEnvironmentUser(user_record.GetUseNode());
@@ -683,6 +712,9 @@
 void HPhi::RemoveInputAt(size_t index) {
   RemoveAsUserOfInput(index);
   inputs_.DeleteAt(index);
+  for (size_t i = index, e = InputCount(); i < e; ++i) {
+    InputRecordAt(i).GetUseNode()->SetIndex(i);
+  }
 }
 
 #define DEFINE_ACCEPT(name, super)                                             \
@@ -1303,9 +1335,10 @@
       current->ReplaceWith(outer_graph->GetIntConstant(current->AsIntConstant()->GetValue()));
     } else if (current->IsLongConstant()) {
       current->ReplaceWith(outer_graph->GetLongConstant(current->AsLongConstant()->GetValue()));
-    } else if (current->IsFloatConstant() || current->IsDoubleConstant()) {
-      // TODO: Don't duplicate floating-point constants.
-      current->MoveBefore(outer_graph->GetEntryBlock()->GetLastInstruction());
+    } else if (current->IsFloatConstant()) {
+      current->ReplaceWith(outer_graph->GetFloatConstant(current->AsFloatConstant()->GetValue()));
+    } else if (current->IsDoubleConstant()) {
+      current->ReplaceWith(outer_graph->GetDoubleConstant(current->AsDoubleConstant()->GetValue()));
     } else if (current->IsParameterValue()) {
       if (kIsDebugBuild
           && invoke->IsInvokeStaticOrDirect()
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index a5da615..63f3c95 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -132,7 +132,9 @@
         current_instruction_id_(start_instruction_id),
         cached_null_constant_(nullptr),
         cached_int_constants_(std::less<int32_t>(), arena->Adapter()),
-        cached_long_constants_(std::less<int64_t>(), arena->Adapter()) {}
+        cached_float_constants_(std::less<int32_t>(), arena->Adapter()),
+        cached_long_constants_(std::less<int64_t>(), arena->Adapter()),
+        cached_double_constants_(std::less<int64_t>(), arena->Adapter()) {}
 
   ArenaAllocator* GetArena() const { return arena_; }
   const GrowableArray<HBasicBlock*>& GetBlocks() const { return blocks_; }
@@ -241,8 +243,8 @@
   bool IsDebuggable() const { return debuggable_; }
 
   // Returns a constant of the given type and value. If it does not exist
-  // already, it is created and inserted into the graph. Only integral types
-  // are currently supported.
+  // already, it is created and inserted into the graph. This method is only for
+  // integral types.
   HConstant* GetConstant(Primitive::Type type, int64_t value);
   HNullConstant* GetNullConstant();
   HIntConstant* GetIntConstant(int32_t value) {
@@ -251,6 +253,12 @@
   HLongConstant* GetLongConstant(int64_t value) {
     return CreateConstant(value, &cached_long_constants_);
   }
+  HFloatConstant* GetFloatConstant(float value) {
+    return CreateConstant(bit_cast<int32_t, float>(value), &cached_float_constants_);
+  }
+  HDoubleConstant* GetDoubleConstant(double value) {
+    return CreateConstant(bit_cast<int64_t, double>(value), &cached_double_constants_);
+  }
 
   HBasicBlock* FindCommonDominator(HBasicBlock* first, HBasicBlock* second) const;
 
@@ -265,10 +273,34 @@
   void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const;
   void RemoveDeadBlocks(const ArenaBitVector& visited);
 
-  template <class InstType, typename ValueType>
-  InstType* CreateConstant(ValueType value, ArenaSafeMap<ValueType, InstType*>* cache);
+  template <class InstructionType, typename ValueType>
+  InstructionType* CreateConstant(ValueType value,
+                                  ArenaSafeMap<ValueType, InstructionType*>* cache) {
+    // Try to find an existing constant of the given value.
+    InstructionType* constant = nullptr;
+    auto cached_constant = cache->find(value);
+    if (cached_constant != cache->end()) {
+      constant = cached_constant->second;
+    }
+
+    // If not found or previously deleted, create and cache a new instruction.
+    if (constant == nullptr || constant->GetBlock() == nullptr) {
+      constant = new (arena_) InstructionType(value);
+      cache->Overwrite(value, constant);
+      InsertConstant(constant);
+    }
+    return constant;
+  }
+
   void InsertConstant(HConstant* instruction);
 
+  // Cache a float constant into the graph. This method should only be
+  // called by the SsaBuilder when creating "equivalent" instructions.
+  void CacheFloatConstant(HFloatConstant* constant);
+
+  // See CacheFloatConstant comment.
+  void CacheDoubleConstant(HDoubleConstant* constant);
+
   ArenaAllocator* const arena_;
 
   // List of blocks in insertion order.
@@ -306,11 +338,14 @@
   // The current id to assign to a newly added instruction. See HInstruction.id_.
   int32_t current_instruction_id_;
 
-  // Cached common constants often needed by optimization passes.
+  // Cached constants.
   HNullConstant* cached_null_constant_;
   ArenaSafeMap<int32_t, HIntConstant*> cached_int_constants_;
+  ArenaSafeMap<int32_t, HFloatConstant*> cached_float_constants_;
   ArenaSafeMap<int64_t, HLongConstant*> cached_long_constants_;
+  ArenaSafeMap<int64_t, HDoubleConstant*> cached_double_constants_;
 
+  friend class SsaBuilder;           // For caching constants.
   friend class SsaLivenessAnalysis;  // For the linear order.
   ART_FRIEND_TEST(GraphTest, IfSuccessorSimpleJoinBlock1);
   DISALLOW_COPY_AND_ASSIGN(HGraph);
@@ -362,6 +397,11 @@
     return back_edges_;
   }
 
+  HBasicBlock* GetSingleBackEdge() const {
+    DCHECK_EQ(back_edges_.Size(), 1u);
+    return back_edges_.Get(0);
+  }
+
   void ClearBackEdges() {
     back_edges_.Reset();
   }
@@ -526,6 +566,13 @@
     predecessors_.Put(1, temp);
   }
 
+  void SwapSuccessors() {
+    DCHECK_EQ(successors_.Size(), 2u);
+    HBasicBlock* temp = successors_.Get(0);
+    successors_.Put(0, successors_.Get(1));
+    successors_.Put(1, temp);
+  }
+
   size_t GetPredecessorIndexOf(HBasicBlock* predecessor) {
     for (size_t i = 0, e = predecessors_.Size(); i < e; ++i) {
       if (predecessors_.Get(i) == predecessor) {
@@ -578,7 +625,9 @@
   void DisconnectAndDelete();
 
   void AddInstruction(HInstruction* instruction);
+  // Insert `instruction` before/after an existing instruction `cursor`.
   void InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor);
+  void InsertInstructionAfter(HInstruction* instruction, HInstruction* cursor);
   // Replace instruction `initial` with `replacement` within this block.
   void ReplaceAndRemoveInstructionWith(HInstruction* initial,
                                        HInstruction* replacement);
@@ -801,13 +850,14 @@
   HUseListNode* GetNext() const { return next_; }
   T GetUser() const { return user_; }
   size_t GetIndex() const { return index_; }
+  void SetIndex(size_t index) { index_ = index; }
 
  private:
   HUseListNode(T user, size_t index)
       : user_(user), index_(index), prev_(nullptr), next_(nullptr) {}
 
   T const user_;
-  const size_t index_;
+  size_t index_;
   HUseListNode<T>* prev_;
   HUseListNode<T>* next_;
 
@@ -1013,6 +1063,10 @@
   }
 
   void CopyFrom(HEnvironment* env);
+  // Copy from `env`. If it's a loop phi for `loop_header`, copy the first
+  // input to the loop phi instead. This is for inserting instructions that
+  // require an environment (like HDeoptimization) in the loop pre-header.
+  void CopyFromWithLoopPhiAdjustment(HEnvironment* env, HBasicBlock* loop_header);
 
   void SetRawEnvAt(size_t index, HInstruction* instruction) {
     vregs_.Put(index, HUserRecord<HEnvironment*>(instruction));
@@ -1037,7 +1091,7 @@
 
   GrowableArray<HUserRecord<HEnvironment*> > vregs_;
 
-  friend HInstruction;
+  friend class HInstruction;
 
   DISALLOW_COPY_AND_ASSIGN(HEnvironment);
 };
@@ -1248,6 +1302,13 @@
     environment_->CopyFrom(environment);
   }
 
+  void CopyEnvironmentFromWithLoopPhiAdjustment(HEnvironment* environment,
+                                                HBasicBlock* block) {
+    ArenaAllocator* allocator = GetBlock()->GetGraph()->GetArena();
+    environment_ = new (allocator) HEnvironment(allocator, environment->Size());
+    environment_->CopyFromWithLoopPhiAdjustment(environment, block);
+  }
+
   // Returns the number of entries in the environment. Typically, that is the
   // number of dex registers in a method. It could be more in case of inlining.
   size_t EnvironmentSize() const;
@@ -2040,13 +2101,14 @@
 
  private:
   explicit HFloatConstant(float value) : HConstant(Primitive::kPrimFloat), value_(value) {}
+  explicit HFloatConstant(int32_t value)
+      : HConstant(Primitive::kPrimFloat), value_(bit_cast<float, int32_t>(value)) {}
 
   const float value_;
 
-  // Only the SsaBuilder can currently create floating-point constants. If we
-  // ever need to create them later in the pipeline, we will have to handle them
-  // the same way as integral constants.
+  // Only the SsaBuilder and HGraph can create floating-point constants.
   friend class SsaBuilder;
+  friend class HGraph;
   DISALLOW_COPY_AND_ASSIGN(HFloatConstant);
 };
 
@@ -2077,13 +2139,14 @@
 
  private:
   explicit HDoubleConstant(double value) : HConstant(Primitive::kPrimDouble), value_(value) {}
+  explicit HDoubleConstant(int64_t value)
+      : HConstant(Primitive::kPrimDouble), value_(bit_cast<double, int64_t>(value)) {}
 
   const double value_;
 
-  // Only the SsaBuilder can currently create floating-point constants. If we
-  // ever need to create them later in the pipeline, we will have to handle them
-  // the same way as integral constants.
+  // Only the SsaBuilder and HGraph can create floating-point constants.
   friend class SsaBuilder;
+  friend class HGraph;
   DISALLOW_COPY_AND_ASSIGN(HDoubleConstant);
 };
 
@@ -2180,6 +2243,12 @@
     SetRawInputAt(index, argument);
   }
 
+  // Return the number of arguments.  This number can be lower than
+  // the number of inputs returned by InputCount(), as some invoke
+  // instructions (e.g. HInvokeStaticOrDirect) can have non-argument
+  // inputs at the end of their list of inputs.
+  uint32_t GetNumberOfArguments() const { return number_of_arguments_; }
+
   Primitive::Type GetType() const OVERRIDE { return return_type_; }
 
   uint32_t GetDexPc() const { return dex_pc_; }
@@ -2199,16 +2268,19 @@
  protected:
   HInvoke(ArenaAllocator* arena,
           uint32_t number_of_arguments,
+          uint32_t number_of_other_inputs,
           Primitive::Type return_type,
           uint32_t dex_pc,
           uint32_t dex_method_index)
     : HInstruction(SideEffects::All()),
+      number_of_arguments_(number_of_arguments),
       inputs_(arena, number_of_arguments),
       return_type_(return_type),
       dex_pc_(dex_pc),
       dex_method_index_(dex_method_index),
       intrinsic_(Intrinsics::kNone) {
-    inputs_.SetSize(number_of_arguments);
+    uint32_t number_of_inputs = number_of_arguments + number_of_other_inputs;
+    inputs_.SetSize(number_of_inputs);
   }
 
   const HUserRecord<HInstruction*> InputRecordAt(size_t i) const OVERRIDE { return inputs_.Get(i); }
@@ -2216,6 +2288,7 @@
     inputs_.Put(index, input);
   }
 
+  uint32_t number_of_arguments_;
   GrowableArray<HUserRecord<HInstruction*> > inputs_;
   const Primitive::Type return_type_;
   const uint32_t dex_pc_;
@@ -2242,14 +2315,21 @@
                         uint32_t dex_pc,
                         uint32_t dex_method_index,
                         bool is_recursive,
+                        int32_t string_init_offset,
                         InvokeType original_invoke_type,
                         InvokeType invoke_type,
                         ClinitCheckRequirement clinit_check_requirement)
-      : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index),
+      : HInvoke(arena,
+                number_of_arguments,
+                clinit_check_requirement == ClinitCheckRequirement::kExplicit ? 1u : 0u,
+                return_type,
+                dex_pc,
+                dex_method_index),
         original_invoke_type_(original_invoke_type),
         invoke_type_(invoke_type),
         is_recursive_(is_recursive),
-        clinit_check_requirement_(clinit_check_requirement) {}
+        clinit_check_requirement_(clinit_check_requirement),
+        string_init_offset_(string_init_offset) {}
 
   bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE {
     UNUSED(obj);
@@ -2262,21 +2342,24 @@
   InvokeType GetInvokeType() const { return invoke_type_; }
   bool IsRecursive() const { return is_recursive_; }
   bool NeedsDexCache() const OVERRIDE { return !IsRecursive(); }
+  bool IsStringInit() const { return string_init_offset_ != 0; }
+  int32_t GetStringInitOffset() const { return string_init_offset_; }
 
   // Is this instruction a call to a static method?
   bool IsStatic() const {
     return GetInvokeType() == kStatic;
   }
 
-  // Remove the art::HClinitCheck or art::HLoadClass instruction as
-  // last input (only relevant for static calls with explicit clinit
-  // check).
-  void RemoveClinitCheckOrLoadClassAsLastInput() {
+  // Remove the art::HLoadClass instruction set as last input by
+  // art::PrepareForRegisterAllocation::VisitClinitCheck in lieu of
+  // the initial art::HClinitCheck instruction (only relevant for
+  // static calls with explicit clinit check).
+  void RemoveLoadClassAsLastInput() {
     DCHECK(IsStaticWithExplicitClinitCheck());
     size_t last_input_index = InputCount() - 1;
     HInstruction* last_input = InputAt(last_input_index);
     DCHECK(last_input != nullptr);
-    DCHECK(last_input->IsClinitCheck() || last_input->IsLoadClass()) << last_input->DebugName();
+    DCHECK(last_input->IsLoadClass()) << last_input->DebugName();
     RemoveAsUserOfInput(last_input_index);
     inputs_.DeleteAt(last_input_index);
     clinit_check_requirement_ = ClinitCheckRequirement::kImplicit;
@@ -2317,6 +2400,9 @@
   const InvokeType invoke_type_;
   const bool is_recursive_;
   ClinitCheckRequirement clinit_check_requirement_;
+  // Thread entrypoint offset for string init method if this is a string init invoke.
+  // Note that there are multiple string init methods, each having its own offset.
+  int32_t string_init_offset_;
 
   DISALLOW_COPY_AND_ASSIGN(HInvokeStaticOrDirect);
 };
@@ -2329,7 +2415,7 @@
                  uint32_t dex_pc,
                  uint32_t dex_method_index,
                  uint32_t vtable_index)
-      : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index),
+      : HInvoke(arena, number_of_arguments, 0u, return_type, dex_pc, dex_method_index),
         vtable_index_(vtable_index) {}
 
   bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE {
@@ -2355,7 +2441,7 @@
                    uint32_t dex_pc,
                    uint32_t dex_method_index,
                    uint32_t imt_index)
-      : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index),
+      : HInvoke(arena, number_of_arguments, 0u, return_type, dex_pc, dex_method_index),
         imt_index_(imt_index) {}
 
   bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE {
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index fa6b3c2..78d1185 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -91,7 +91,7 @@
     // previously) by the graph builder during the creation of the
     // static invoke instruction, but is no longer required at this
     // stage (i.e., after inlining has been performed).
-    invoke->RemoveClinitCheckOrLoadClassAsLastInput();
+    invoke->RemoveLoadClassAsLastInput();
 
     // If the load class instruction is no longer used, remove it from
     // the graph.
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 0fdf051..812642b 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -1455,6 +1455,7 @@
                         : Location::StackSlot(interval->GetParent()->GetSpillSlot()));
   }
   UsePosition* use = current->GetFirstUse();
+  UsePosition* env_use = current->GetFirstEnvironmentUse();
 
   // Walk over all siblings, updating locations of use positions, and
   // connecting them when they are adjacent.
@@ -1467,15 +1468,14 @@
     LiveRange* range = current->GetFirstRange();
     while (range != nullptr) {
       while (use != nullptr && use->GetPosition() < range->GetStart()) {
-        DCHECK(use->GetIsEnvironment());
+        DCHECK(use->IsSynthesized());
         use = use->GetNext();
       }
       while (use != nullptr && use->GetPosition() <= range->GetEnd()) {
+        DCHECK(!use->GetIsEnvironment());
         DCHECK(current->CoversSlow(use->GetPosition()) || (use->GetPosition() == range->GetEnd()));
-        LocationSummary* locations = use->GetUser()->GetLocations();
-        if (use->GetIsEnvironment()) {
-          locations->SetEnvironmentAt(use->GetInputIndex(), source);
-        } else {
+        if (!use->IsSynthesized()) {
+          LocationSummary* locations = use->GetUser()->GetLocations();
           Location expected_location = locations->InAt(use->GetInputIndex());
           // The expected (actual) location may be invalid in case the input is unused. Currently
           // this only happens for intrinsics.
@@ -1492,6 +1492,19 @@
         }
         use = use->GetNext();
       }
+
+      // Walk over the environment uses, and update their locations.
+      while (env_use != nullptr && env_use->GetPosition() < range->GetStart()) {
+        env_use = env_use->GetNext();
+      }
+
+      while (env_use != nullptr && env_use->GetPosition() <= range->GetEnd()) {
+        DCHECK(current->CoversSlow(env_use->GetPosition()) || (env_use->GetPosition() == range->GetEnd()));
+        LocationSummary* locations = env_use->GetUser()->GetLocations();
+        locations->SetEnvironmentAt(env_use->GetInputIndex(), source);
+        env_use = env_use->GetNext();
+      }
+
       range = range->GetNext();
     }
 
@@ -1554,10 +1567,9 @@
   } while (current != nullptr);
 
   if (kIsDebugBuild) {
-    // Following uses can only be environment uses. The location for
-    // these environments will be none.
+    // Following uses can only be synthesized uses.
     while (use != nullptr) {
-      DCHECK(use->GetIsEnvironment());
+      DCHECK(use->IsSynthesized());
       use = use->GetNext();
     }
   }
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index 7a252af..b66e655 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -417,6 +417,7 @@
     ArenaAllocator* allocator = graph->GetArena();
     result = new (allocator) HFloatConstant(bit_cast<float, int32_t>(constant->GetValue()));
     constant->GetBlock()->InsertInstructionBefore(result, constant->GetNext());
+    graph->CacheFloatConstant(result);
   } else {
     // If there is already a constant with the expected type, we know it is
     // the floating point equivalent of this constant.
@@ -439,6 +440,7 @@
     ArenaAllocator* allocator = graph->GetArena();
     result = new (allocator) HDoubleConstant(bit_cast<double, int64_t>(constant->GetValue()));
     constant->GetBlock()->InsertInstructionBefore(result, constant->GetNext());
+    graph->CacheDoubleConstant(result);
   } else {
     // If there is already a constant with the expected type, we know it is
     // the floating point equivalent of this constant.
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index ea0e7c3..0bbcb30 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -341,7 +341,7 @@
   size_t end = GetEnd();
   while (use != nullptr && use->GetPosition() <= end) {
     size_t use_position = use->GetPosition();
-    if (use_position >= start && !use->GetIsEnvironment()) {
+    if (use_position >= start && !use->IsSynthesized()) {
       HInstruction* user = use->GetUser();
       size_t input_index = use->GetInputIndex();
       if (user->IsPhi()) {
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index 97254ed..b74e655 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -112,12 +112,15 @@
         is_environment_(is_environment),
         position_(position),
         next_(next) {
-    DCHECK(user->IsPhi()
+    DCHECK((user == nullptr)
+        || user->IsPhi()
         || (GetPosition() == user->GetLifetimePosition() + 1)
         || (GetPosition() == user->GetLifetimePosition()));
     DCHECK(next_ == nullptr || next->GetPosition() >= GetPosition());
   }
 
+  static constexpr size_t kNoInput = -1;
+
   size_t GetPosition() const { return position_; }
 
   UsePosition* GetNext() const { return next_; }
@@ -126,14 +129,16 @@
   HInstruction* GetUser() const { return user_; }
 
   bool GetIsEnvironment() const { return is_environment_; }
+  bool IsSynthesized() const { return user_ == nullptr; }
 
   size_t GetInputIndex() const { return input_index_; }
 
   void Dump(std::ostream& stream) const {
     stream << position_;
-    if (is_environment_) {
-      stream << " (env)";
-    }
+  }
+
+  HLoopInformation* GetLoopInformation() const {
+    return user_->GetBlock()->GetLoopInformation();
   }
 
   UsePosition* Dup(ArenaAllocator* allocator) const {
@@ -142,6 +147,15 @@
         next_ == nullptr ? nullptr : next_->Dup(allocator));
   }
 
+  bool RequiresRegister() const {
+    if (GetIsEnvironment()) return false;
+    if (IsSynthesized()) return false;
+    Location location = GetUser()->GetLocations()->InAt(GetInputIndex());
+    return location.IsUnallocated()
+        && (location.GetPolicy() == Location::kRequiresRegister
+            || location.GetPolicy() == Location::kRequiresFpuRegister);
+  }
+
  private:
   HInstruction* const user_;
   const size_t input_index_;
@@ -219,6 +233,7 @@
   void AddTempUse(HInstruction* instruction, size_t temp_index) {
     DCHECK(IsTemp());
     DCHECK(first_use_ == nullptr) << "A temporary can only have one user";
+    DCHECK(first_env_use_ == nullptr) << "A temporary cannot have environment user";
     size_t position = instruction->GetLifetimePosition();
     first_use_ = new (allocator_) UsePosition(
         instruction, temp_index, /* is_environment */ false, position, first_use_);
@@ -239,9 +254,15 @@
         // location of the input just before that instruction (and not potential moves due
         // to splitting).
         position = instruction->GetLifetimePosition();
+      } else if (!locations->InAt(input_index).IsValid()) {
+        return;
       }
     }
 
+    if (!is_environment && instruction->IsInLoop()) {
+      AddBackEdgeUses(*instruction->GetBlock());
+    }
+
     DCHECK(position == instruction->GetLifetimePosition()
            || position == instruction->GetLifetimePosition() + 1);
 
@@ -265,8 +286,13 @@
       return;
     }
 
-    first_use_ = new (allocator_) UsePosition(
-        instruction, input_index, is_environment, position, first_use_);
+    if (is_environment) {
+      first_env_use_ = new (allocator_) UsePosition(
+          instruction, input_index, is_environment, position, first_env_use_);
+    } else {
+      first_use_ = new (allocator_) UsePosition(
+          instruction, input_index, is_environment, position, first_use_);
+    }
 
     if (is_environment && !keep_alive) {
       // If this environment use does not keep the instruction live, it does not
@@ -300,6 +326,9 @@
 
   void AddPhiUse(HInstruction* instruction, size_t input_index, HBasicBlock* block) {
     DCHECK(instruction->IsPhi());
+    if (block->IsInLoop()) {
+      AddBackEdgeUses(*block);
+    }
     first_use_ = new (allocator_) UsePosition(
         instruction, input_index, false, block->GetLifetimeEnd(), first_use_);
   }
@@ -450,38 +479,17 @@
     if (is_temp_) {
       return position == GetStart() ? position : kNoLifetime;
     }
-    if (position == GetStart() && IsParent()) {
-      LocationSummary* locations = defined_by_->GetLocations();
-      Location location = locations->Out();
-      // This interval is the first interval of the instruction. If the output
-      // of the instruction requires a register, we return the position of that instruction
-      // as the first register use.
-      if (location.IsUnallocated()) {
-        if ((location.GetPolicy() == Location::kRequiresRegister)
-             || (location.GetPolicy() == Location::kSameAsFirstInput
-                 && (locations->InAt(0).IsRegister()
-                     || locations->InAt(0).IsRegisterPair()
-                     || locations->InAt(0).GetPolicy() == Location::kRequiresRegister))) {
-          return position;
-        } else if ((location.GetPolicy() == Location::kRequiresFpuRegister)
-                   || (location.GetPolicy() == Location::kSameAsFirstInput
-                       && locations->InAt(0).GetPolicy() == Location::kRequiresFpuRegister)) {
-          return position;
-        }
-      } else if (location.IsRegister() || location.IsRegisterPair()) {
-        return position;
-      }
+
+    if (IsDefiningPosition(position) && DefinitionRequiresRegister()) {
+      return position;
     }
 
     UsePosition* use = first_use_;
     size_t end = GetEnd();
     while (use != nullptr && use->GetPosition() <= end) {
       size_t use_position = use->GetPosition();
-      if (use_position > position && !use->GetIsEnvironment()) {
-        Location location = use->GetUser()->GetLocations()->InAt(use->GetInputIndex());
-        if (location.IsUnallocated()
-            && (location.GetPolicy() == Location::kRequiresRegister
-                || location.GetPolicy() == Location::kRequiresFpuRegister)) {
+      if (use_position > position) {
+        if (use->RequiresRegister()) {
           return use_position;
         }
       }
@@ -499,21 +507,17 @@
       return position == GetStart() ? position : kNoLifetime;
     }
 
-    if (position == GetStart() && IsParent()) {
-      if (defined_by_->GetLocations()->Out().IsValid()) {
-        return position;
-      }
+    if (IsDefiningPosition(position)) {
+      DCHECK(defined_by_->GetLocations()->Out().IsValid());
+      return position;
     }
 
     UsePosition* use = first_use_;
     size_t end = GetEnd();
     while (use != nullptr && use->GetPosition() <= end) {
-      if (!use->GetIsEnvironment()) {
-        Location location = use->GetUser()->GetLocations()->InAt(use->GetInputIndex());
-        size_t use_position = use->GetPosition();
-        if (use_position > position && location.IsValid()) {
-          return use_position;
-        }
+      size_t use_position = use->GetPosition();
+      if (use_position > position) {
+        return use_position;
       }
       use = use->GetNext();
     }
@@ -524,6 +528,10 @@
     return first_use_;
   }
 
+  UsePosition* GetFirstEnvironmentUse() const {
+    return first_env_use_;
+  }
+
   Primitive::Type GetType() const {
     return type_;
   }
@@ -577,6 +585,7 @@
     new_interval->parent_ = parent_;
 
     new_interval->first_use_ = first_use_;
+    new_interval->first_env_use_ = first_env_use_;
     LiveRange* current = first_range_;
     LiveRange* previous = nullptr;
     // Iterate over the ranges, and either find a range that covers this position, or
@@ -655,10 +664,18 @@
         stream << " ";
       } while ((use = use->GetNext()) != nullptr);
     }
+    stream << "}, { ";
+    use = first_env_use_;
+    if (use != nullptr) {
+      do {
+        use->Dump(stream);
+        stream << " ";
+      } while ((use = use->GetNext()) != nullptr);
+    }
     stream << "}";
     stream << " is_fixed: " << is_fixed_ << ", is_split: " << IsSplit();
-    stream << " is_high: " << IsHighInterval();
     stream << " is_low: " << IsLowInterval();
+    stream << " is_high: " << IsHighInterval();
   }
 
   LiveInterval* GetNextSibling() const { return next_sibling_; }
@@ -754,6 +771,10 @@
     if (first_use_ != nullptr) {
       high_or_low_interval_->first_use_ = first_use_->Dup(allocator_);
     }
+
+    if (first_env_use_ != nullptr) {
+      high_or_low_interval_->first_env_use_ = first_env_use_->Dup(allocator_);
+    }
   }
 
   // Returns whether an interval, when it is non-split, is using
@@ -851,6 +872,7 @@
         first_safepoint_(nullptr),
         last_safepoint_(nullptr),
         first_use_(nullptr),
+        first_env_use_(nullptr),
         type_(type),
         next_sibling_(nullptr),
         parent_(this),
@@ -888,6 +910,100 @@
     return range;
   }
 
+  bool DefinitionRequiresRegister() const {
+    DCHECK(IsParent());
+    LocationSummary* locations = defined_by_->GetLocations();
+    Location location = locations->Out();
+    // This interval is the first interval of the instruction. If the output
+    // of the instruction requires a register, we return the position of that instruction
+    // as the first register use.
+    if (location.IsUnallocated()) {
+      if ((location.GetPolicy() == Location::kRequiresRegister)
+           || (location.GetPolicy() == Location::kSameAsFirstInput
+               && (locations->InAt(0).IsRegister()
+                   || locations->InAt(0).IsRegisterPair()
+                   || locations->InAt(0).GetPolicy() == Location::kRequiresRegister))) {
+        return true;
+      } else if ((location.GetPolicy() == Location::kRequiresFpuRegister)
+                 || (location.GetPolicy() == Location::kSameAsFirstInput
+                     && (locations->InAt(0).IsFpuRegister()
+                         || locations->InAt(0).IsFpuRegisterPair()
+                         || locations->InAt(0).GetPolicy() == Location::kRequiresFpuRegister))) {
+        return true;
+      }
+    } else if (location.IsRegister() || location.IsRegisterPair()) {
+      return true;
+    }
+    return false;
+  }
+
+  bool IsDefiningPosition(size_t position) const {
+    return IsParent() && (position == GetStart());
+  }
+
+  bool HasSynthesizeUseAt(size_t position) const {
+    UsePosition* use = first_use_;
+    while (use != nullptr) {
+      size_t use_position = use->GetPosition();
+      if ((use_position == position) && use->IsSynthesized()) {
+        return true;
+      }
+      if (use_position > position) break;
+      use = use->GetNext();
+    }
+    return false;
+  }
+
+  void AddBackEdgeUses(const HBasicBlock& block_at_use) {
+    DCHECK(block_at_use.IsInLoop());
+    // Add synthesized uses at the back edge of loops to help the register allocator.
+    // Note that this method is called in decreasing liveness order, to faciliate adding
+    // uses at the head of the `first_use_` linked list. Because below
+    // we iterate from inner-most to outer-most, which is in increasing liveness order,
+    // we need to take extra care of how the `first_use_` linked list is being updated.
+    UsePosition* first_in_new_list = nullptr;
+    UsePosition* last_in_new_list = nullptr;
+    for (HLoopInformationOutwardIterator it(block_at_use);
+         !it.Done();
+         it.Advance()) {
+      HLoopInformation* current = it.Current();
+      if (GetDefinedBy()->GetLifetimePosition() >= current->GetHeader()->GetLifetimeStart()) {
+        // This interval is defined in the loop. We can stop going outward.
+        break;
+      }
+
+      size_t back_edge_use_position = current->GetSingleBackEdge()->GetLifetimeEnd();
+      if ((first_use_ != nullptr) && (first_use_->GetPosition() <= back_edge_use_position)) {
+        // There was a use already seen in this loop. Therefore the previous call to `AddUse`
+        // already inserted the backedge use. We can stop going outward.
+        DCHECK(HasSynthesizeUseAt(back_edge_use_position));
+        break;
+      }
+
+      DCHECK(last_in_new_list == nullptr
+             || back_edge_use_position > last_in_new_list->GetPosition());
+
+      UsePosition* new_use = new (allocator_) UsePosition(
+          nullptr, UsePosition::kNoInput, /* is_environment */ false,
+          back_edge_use_position, nullptr);
+
+      if (last_in_new_list != nullptr) {
+        // Going outward. The latest created use needs to point to the new use.
+        last_in_new_list->SetNext(new_use);
+      } else {
+        // This is the inner-most loop.
+        DCHECK_EQ(current, block_at_use.GetLoopInformation());
+        first_in_new_list = new_use;
+      }
+      last_in_new_list = new_use;
+    }
+    // Link the newly created linked list with `first_use_`.
+    if (last_in_new_list != nullptr) {
+      last_in_new_list->SetNext(first_use_);
+      first_use_ = first_in_new_list;
+    }
+  }
+
   ArenaAllocator* const allocator_;
 
   // Ranges of this interval. We need a quick access to the last range to test
@@ -905,6 +1021,7 @@
 
   // Uses of this interval. Note that this linked list is shared amongst siblings.
   UsePosition* first_use_;
+  UsePosition* first_env_use_;
 
   // The instruction type this interval corresponds to.
   const Primitive::Type type_;
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index f2e35af..b9a3d37 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -76,41 +76,38 @@
   "kClassRoots",
 };
 
-class OatSymbolizer FINAL : public CodeOutput {
+class OatSymbolizer FINAL {
  public:
-  explicit OatSymbolizer(const OatFile* oat_file, const std::string& output_name) :
-      oat_file_(oat_file), builder_(nullptr), elf_output_(nullptr),
-      output_name_(output_name.empty() ? "symbolized.oat" : output_name) {
-  }
+  class RodataWriter FINAL : public CodeOutput {
+   public:
+    explicit RodataWriter(const OatFile* oat_file) : oat_file_(oat_file) {}
 
-  bool Init() {
-    Elf32_Word oat_data_size = oat_file_->GetOatHeader().GetExecutableOffset();
-
-    uint32_t diff = static_cast<uint32_t>(oat_file_->End() - oat_file_->Begin());
-    uint32_t oat_exec_size = diff - oat_data_size;
-    uint32_t oat_bss_size = oat_file_->BssSize();
-
-    elf_output_ = OS::CreateEmptyFile(output_name_.c_str());
-
-    builder_.reset(new ElfBuilder<ElfTypes32>(
-        this,
-        elf_output_,
-        oat_file_->GetOatHeader().GetInstructionSet(),
-        0,
-        oat_data_size,
-        oat_data_size,
-        oat_exec_size,
-        RoundUp(oat_data_size + oat_exec_size, kPageSize),
-        oat_bss_size,
-        true,
-        false));
-
-    if (!builder_->Init()) {
-      builder_.reset(nullptr);
-      return false;
+    bool Write(OutputStream* out) OVERRIDE {
+      const size_t rodata_size = oat_file_->GetOatHeader().GetExecutableOffset();
+      return out->WriteFully(oat_file_->Begin(), rodata_size);
     }
 
-    return true;
+   private:
+    const OatFile* oat_file_;
+  };
+
+  class TextWriter FINAL : public CodeOutput {
+   public:
+    explicit TextWriter(const OatFile* oat_file) : oat_file_(oat_file) {}
+
+    bool Write(OutputStream* out) OVERRIDE {
+      const size_t rodata_size = oat_file_->GetOatHeader().GetExecutableOffset();
+      const uint8_t* text_begin = oat_file_->Begin() + rodata_size;
+      return out->WriteFully(text_begin, oat_file_->End() - text_begin);
+    }
+
+   private:
+    const OatFile* oat_file_;
+  };
+
+  explicit OatSymbolizer(const OatFile* oat_file, const std::string& output_name) :
+      oat_file_(oat_file), builder_(nullptr),
+      output_name_(output_name.empty() ? "symbolized.oat" : output_name) {
   }
 
   typedef void (OatSymbolizer::*Callback)(const DexFile::ClassDef&,
@@ -122,9 +119,17 @@
                                           uint32_t);
 
   bool Symbolize() {
-    if (builder_.get() == nullptr) {
-      return false;
-    }
+    Elf32_Word rodata_size = oat_file_->GetOatHeader().GetExecutableOffset();
+    uint32_t size = static_cast<uint32_t>(oat_file_->End() - oat_file_->Begin());
+    uint32_t text_size = size - rodata_size;
+    uint32_t bss_size = oat_file_->BssSize();
+    RodataWriter rodata_writer(oat_file_);
+    TextWriter text_writer(oat_file_);
+    builder_.reset(new ElfBuilder<ElfTypes32>(
+        oat_file_->GetOatHeader().GetInstructionSet(),
+        rodata_size, &rodata_writer,
+        text_size, &text_writer,
+        bss_size));
 
     Walk(&art::OatSymbolizer::RegisterForDedup);
 
@@ -132,10 +137,11 @@
 
     Walk(&art::OatSymbolizer::AddSymbol);
 
-    bool result = builder_->Write();
+    File* elf_output = OS::CreateEmptyFile(output_name_.c_str());
+    bool result = builder_->Write(elf_output);
 
     // Ignore I/O errors.
-    UNUSED(elf_output_->FlushClose());
+    UNUSED(elf_output->FlushClose());
 
     return result;
   }
@@ -269,24 +275,14 @@
         pretty_name = "[Dedup]" + pretty_name;
       }
 
-      ElfSymtabBuilder<ElfTypes32>* symtab = builder_->GetSymtabBuilder();
+      auto* symtab = builder_->GetSymtab();
 
-      symtab->AddSymbol(pretty_name, &builder_->GetTextBuilder(),
+      symtab->AddSymbol(pretty_name, builder_->GetText(),
           oat_method.GetCodeOffset() - oat_file_->GetOatHeader().GetExecutableOffset(),
           true, oat_method.GetQuickCodeSize(), STB_GLOBAL, STT_FUNC);
     }
   }
 
-  // Set oat data offset. Required by ElfBuilder/CodeOutput.
-  void SetCodeOffset(size_t offset ATTRIBUTE_UNUSED) {
-    // Nothing to do.
-  }
-
-  // Write oat code. Required by ElfBuilder/CodeOutput.
-  bool Write(OutputStream* out) {
-    return out->WriteFully(oat_file_->Begin(), oat_file_->End() - oat_file_->Begin());
-  }
-
  private:
   static void SkipAllFields(ClassDataItemIterator* it) {
     while (it->HasNextStaticField()) {
@@ -299,7 +295,6 @@
 
   const OatFile* oat_file_;
   std::unique_ptr<ElfBuilder<ElfTypes32> > builder_;
-  File* elf_output_;
   std::unordered_map<uint32_t, uint32_t> state_;
   const std::string output_name_;
 };
@@ -2203,10 +2198,6 @@
   }
 
   OatSymbolizer oat_symbolizer(oat_file, output_name);
-  if (!oat_symbolizer.Init()) {
-    fprintf(stderr, "Failed to initialize symbolizer\n");
-    return EXIT_FAILURE;
-  }
   if (!oat_symbolizer.Symbolize()) {
     fprintf(stderr, "Failed to symbolize\n");
     return EXIT_FAILURE;
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 86201ba..ece9d4b 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -124,6 +124,7 @@
   native/java_lang_Object.cc \
   native/java_lang_Runtime.cc \
   native/java_lang_String.cc \
+  native/java_lang_StringFactory.cc \
   native/java_lang_System.cc \
   native/java_lang_Thread.cc \
   native/java_lang_Throwable.cc \
@@ -136,6 +137,7 @@
   native/java_lang_reflect_Method.cc \
   native/java_lang_reflect_Proxy.cc \
   native/java_util_concurrent_atomic_AtomicLong.cc \
+  native/libcore_util_CharsetUtils.cc \
   native/org_apache_harmony_dalvik_ddmc_DdmServer.cc \
   native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc \
   native/sun_misc_Unsafe.cc \
@@ -466,7 +468,7 @@
   ifeq ($$(art_target_or_host),target)
     LOCAL_SHARED_LIBRARIES += libdl
     # ZipArchive support, the order matters here to get all symbols.
-    LOCAL_STATIC_LIBRARIES := libziparchive libz
+    LOCAL_STATIC_LIBRARIES := libziparchive libz libbase
     # For android::FileMap used by libziparchive.
     LOCAL_SHARED_LIBRARIES += libutils
     # For liblog, atrace, properties, ashmem, set_sched_policy and socket_peer_is_trusted.
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 599c22a..7488578 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -705,6 +705,22 @@
 END \name
 .endm
 
+// Macro to facilitate adding new allocation entrypoints.
+.macro FOUR_ARG_DOWNCALL name, entrypoint, return
+    .extern \entrypoint
+ENTRY \name
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  r3, r12  @ save callee saves in case of GC
+    str    r9, [sp, #-16]!            @ expand the frame and pass Thread::Current
+    .pad #16
+    .cfi_adjust_cfa_offset 16
+    bl     \entrypoint
+    add    sp, #16                    @ strip the extra frame
+    .cfi_adjust_cfa_offset -16
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    \return
+END \name
+.endm
+
 ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 ONE_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 ONE_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
@@ -1188,8 +1204,7 @@
     .cfi_rel_offset r11, 8
     .cfi_rel_offset lr, 12
     ldr   r3, [r0, #MIRROR_STRING_COUNT_OFFSET]
-    ldr   r12, [r0, #MIRROR_STRING_OFFSET_OFFSET]
-    ldr   r0, [r0, #MIRROR_STRING_VALUE_OFFSET]
+    add   r0, #MIRROR_STRING_VALUE_OFFSET
 
     /* Clamp start to [0..count] */
     cmp   r2, #0
@@ -1199,10 +1214,6 @@
     it    gt
     movgt r2, r3
 
-    /* Build a pointer to the start of string data */
-    add   r0, #MIRROR_CHAR_ARRAY_DATA_OFFSET
-    add   r0, r0, r12, lsl #1
-
     /* Save a copy in r12 to later compute result */
     mov   r12, r0
 
@@ -1308,12 +1319,10 @@
     .cfi_rel_offset r12, 24
     .cfi_rel_offset lr, 28
 
-    ldr    r4, [r2, #MIRROR_STRING_OFFSET_OFFSET]
-    ldr    r9, [r1, #MIRROR_STRING_OFFSET_OFFSET]
     ldr    r7, [r2, #MIRROR_STRING_COUNT_OFFSET]
     ldr    r10, [r1, #MIRROR_STRING_COUNT_OFFSET]
-    ldr    r2, [r2, #MIRROR_STRING_VALUE_OFFSET]
-    ldr    r1, [r1, #MIRROR_STRING_VALUE_OFFSET]
+    add    r2, #MIRROR_STRING_VALUE_OFFSET
+    add    r1, #MIRROR_STRING_VALUE_OFFSET
 
     /*
      * At this point, we have:
@@ -1328,15 +1337,12 @@
      it    ls
      movls r10, r7
 
-     /* Now, build pointers to the string data */
-     add   r2, r2, r4, lsl #1
-     add   r1, r1, r9, lsl #1
      /*
       * Note: data pointers point to previous element so we can use pre-index
       * mode with base writeback.
       */
-     add   r2, #MIRROR_CHAR_ARRAY_DATA_OFFSET-2   @ offset to contents[-1]
-     add   r1, #MIRROR_CHAR_ARRAY_DATA_OFFSET-2   @ offset to contents[-1]
+     subs  r2, #2   @ offset to contents[-1]
+     subs  r1, #2   @ offset to contents[-1]
 
      /*
       * At this point we have:
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 1e78877..1d316fc 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1264,7 +1264,7 @@
 .macro ONE_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME // save callee saves in case of GC
     mov    x1, xSELF                  // pass Thread::Current
     bl     \entrypoint                // (uint32_t type_idx, Method* method, Thread*)
     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
@@ -1276,7 +1276,7 @@
 .macro TWO_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME // save callee saves in case of GC
     mov    x2, xSELF                  // pass Thread::Current
     bl     \entrypoint                // (uint32_t type_idx, Method* method, Thread*)
     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
@@ -1284,11 +1284,11 @@
 END \name
 .endm
 
-// Macro to facilitate adding new array allocation entrypoints.
+// Macro to facilitate adding new allocation entrypoints.
 .macro THREE_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME // save callee saves in case of GC
     mov    x3, xSELF                  // pass Thread::Current
     bl     \entrypoint
     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
@@ -1296,6 +1296,19 @@
 END \name
 .endm
 
+// Macro to facilitate adding new allocation entrypoints.
+.macro FOUR_ARG_DOWNCALL name, entrypoint, return
+    .extern \entrypoint
+ENTRY \name
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME // save callee saves in case of GC
+    mov    x4, xSELF                  // pass Thread::Current
+    bl     \entrypoint                //
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    \return
+    DELIVER_PENDING_EXCEPTION
+END \name
+.endm
+
 // Macros taking opportunity of code similarities for downcalls with referrer.
 .macro ONE_ARG_REF_DOWNCALL name, entrypoint, return
     .extern \entrypoint
@@ -1725,8 +1738,7 @@
      */
 ENTRY art_quick_indexof
     ldr   w3, [x0, #MIRROR_STRING_COUNT_OFFSET]
-    ldr   w4, [x0, #MIRROR_STRING_OFFSET_OFFSET]
-    ldr   w0, [x0, #MIRROR_STRING_VALUE_OFFSET] // x0 ?
+    add   x0, x0, #MIRROR_STRING_VALUE_OFFSET
 
     /* Clamp start to [0..count] */
     cmp   w2, #0
@@ -1734,10 +1746,6 @@
     cmp   w2, w3
     csel  w2, w3, w2, gt
 
-    /* Build a pointer to the start of the string data */
-    add   x0, x0, #MIRROR_CHAR_ARRAY_DATA_OFFSET
-    add   x0, x0, x4, lsl #1
-
     /* Save a copy to compute result */
     mov   x5, x0
 
@@ -1829,17 +1837,15 @@
     ret
 1:                        // Different string objects.
 
-    ldr    w6, [x2, #MIRROR_STRING_OFFSET_OFFSET]
-    ldr    w5, [x1, #MIRROR_STRING_OFFSET_OFFSET]
     ldr    w4, [x2, #MIRROR_STRING_COUNT_OFFSET]
     ldr    w3, [x1, #MIRROR_STRING_COUNT_OFFSET]
-    ldr    w2, [x2, #MIRROR_STRING_VALUE_OFFSET]
-    ldr    w1, [x1, #MIRROR_STRING_VALUE_OFFSET]
+    add    x2, x2, #MIRROR_STRING_VALUE_OFFSET
+    add    x1, x1, #MIRROR_STRING_VALUE_OFFSET
 
     /*
-     * Now:           CharArray*    Offset   Count
-     *    first arg      x2          w6        w4
-     *   second arg      x1          w5        w3
+     * Now:           Data*  Count
+     *    first arg    x2      w4
+     *   second arg    x1      w3
      */
 
     // x0 := str1.length(w4) - str2.length(w3). ldr zero-extended w3/w4 into x3/x4.
@@ -1847,16 +1853,6 @@
     // Min(count1, count2) into w3.
     csel x3, x3, x4, ge
 
-    // Build pointer into string data.
-
-    // Add offset in array (substr etc.) (sign extend and << 1).
-    add x2, x2, w6, sxtw #1
-    add x1, x1, w5, sxtw #1
-
-    // Add offset in CharArray to array.
-    add x2, x2, #MIRROR_CHAR_ARRAY_DATA_OFFSET
-    add x1, x1, #MIRROR_CHAR_ARRAY_DATA_OFFSET
-
     // TODO: Tune this value.
     // Check for long string, do memcmp16 for them.
     cmp w3, #28  // Constant from arm32.
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 356a145..ee5c59f 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -982,6 +982,7 @@
     RETURN_IF_ZERO
 END art_quick_set_obj_instance
 
+// Macro to facilitate adding new allocation entrypoints.
 .macro ONE_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
@@ -992,7 +993,6 @@
 END \name
 .endm
 
-// Macro to facilitate adding new allocation entrypoints.
 .macro TWO_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
@@ -1013,6 +1013,16 @@
 END \name
 .endm
 
+.macro FOUR_ARG_DOWNCALL name, entrypoint, return
+    .extern \entrypoint
+ENTRY \name
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME # save callee saves in case of GC
+    jal     \entrypoint
+    sw      rSELF, 16($sp)            # pass Thread::Current
+    \return
+END \name
+.endm
+
 // Generate the allocation entrypoints for each allocator.
 GENERATE_ALL_ALLOC_ENTRYPOINTS
 
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index f867aa8..d781e76 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -1265,6 +1265,16 @@
 END \name
 .endm
 
+.macro FOUR_ARG_DOWNCALL name, entrypoint, return
+    .extern \entrypoint
+ENTRY \name
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
+    jal     \entrypoint
+    move    $a4, rSELF                 # pass Thread::Current
+    \return
+END \name
+.endm
+
 // Generate the allocation entrypoints for each allocator.
 GENERATE_ALL_ALLOC_ENTRYPOINTS
 
diff --git a/runtime/arch/quick_alloc_entrypoints.S b/runtime/arch/quick_alloc_entrypoints.S
index 037c26e..fe04bf5 100644
--- a/runtime/arch/quick_alloc_entrypoints.S
+++ b/runtime/arch/quick_alloc_entrypoints.S
@@ -35,6 +35,12 @@
 THREE_ARG_DOWNCALL art_quick_check_and_alloc_array\c_suffix, artCheckAndAllocArrayFromCode\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 // Called by managed code to allocate an array in a special case for FILLED_NEW_ARRAY.
 THREE_ARG_DOWNCALL art_quick_check_and_alloc_array_with_access_check\c_suffix, artCheckAndAllocArrayFromCodeWithAccessCheck\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+// Called by managed code to allocate a string from bytes
+FOUR_ARG_DOWNCALL art_quick_alloc_string_from_bytes\c_suffix, artAllocStringFromBytesFromCode\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+// Called by managed code to allocate a string from chars
+THREE_ARG_DOWNCALL art_quick_alloc_string_from_chars\c_suffix, artAllocStringFromCharsFromCode\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+// Called by managed code to allocate a string from string
+ONE_ARG_DOWNCALL art_quick_alloc_string_from_string\c_suffix, artAllocStringFromStringFromCode\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 .endm
 
 .macro GENERATE_ALL_ALLOC_ENTRYPOINTS
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index 0d9a888..de7804f 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -1229,32 +1229,15 @@
       "aacaacaacaacaacaacaacaacaacaacaacaac",     // This one's over.
       "aacaacaacaacaacaacaacaacaacaacaacaaca" };  // As is this one. We need a separate one to
                                                   // defeat object-equal optimizations.
-  static constexpr size_t kBaseStringCount  = arraysize(c);
-  static constexpr size_t kStringCount = 2 * kBaseStringCount;
+  static constexpr size_t kStringCount = arraysize(c);
 
   StackHandleScope<kStringCount> hs(self);
   Handle<mirror::String> s[kStringCount];
 
-  for (size_t i = 0; i < kBaseStringCount; ++i) {
+  for (size_t i = 0; i < kStringCount; ++i) {
     s[i] = hs.NewHandle(mirror::String::AllocFromModifiedUtf8(soa.Self(), c[i]));
   }
 
-  RandGen r(0x1234);
-
-  for (size_t i = kBaseStringCount; i < kStringCount; ++i) {
-    s[i] = hs.NewHandle(mirror::String::AllocFromModifiedUtf8(soa.Self(), c[i - kBaseStringCount]));
-    int32_t length = s[i]->GetLength();
-    if (length > 1) {
-      // Set a random offset and length.
-      int32_t new_offset = 1 + (r.next() % (length - 1));
-      int32_t rest = length - new_offset - 1;
-      int32_t new_length = 1 + (rest > 0 ? r.next() % rest : 0);
-
-      s[i]->SetField32<false>(mirror::String::CountOffset(), new_length);
-      s[i]->SetField32<false>(mirror::String::OffsetOffset(), new_offset);
-    }
-  }
-
   // TODO: wide characters
 
   // Matrix of expectations. First component is first parameter. Note we only check against the
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 55e3dff..6ebeba3 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -685,6 +685,26 @@
     END_FUNCTION RAW_VAR(c_name, 0)
 END_MACRO
 
+MACRO3(FOUR_ARG_DOWNCALL, c_name, cxx_name, return_macro)
+    DEFINE_FUNCTION RAW_VAR(c_name, 0)
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx  // save ref containing registers for GC
+    // Outgoing argument set up
+    subl MACRO_LITERAL(12), %esp  // alignment padding
+    CFI_ADJUST_CFA_OFFSET(12)
+    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    CFI_ADJUST_CFA_OFFSET(4)
+    PUSH ebx                      // pass arg4
+    PUSH edx                      // pass arg3
+    PUSH ecx                      // pass arg2
+    PUSH eax                      // pass arg1
+    call VAR(cxx_name, 1)         // cxx_name(arg1, arg2, arg3, arg4, Thread*)
+    addl MACRO_LITERAL(32), %esp  // pop arguments
+    CFI_ADJUST_CFA_OFFSET(-32)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
+    CALL_MACRO(return_macro, 2)   // return or deliver exception
+    END_FUNCTION RAW_VAR(c_name, 0)
+END_MACRO
+
 MACRO3(ONE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION RAW_VAR(c_name, 0)
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx // save ref containing registers for GC
@@ -789,6 +809,12 @@
   THREE_ARG_DOWNCALL art_quick_check_and_alloc_array ## c_suffix, artCheckAndAllocArrayFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
 #define GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(c_suffix, cxx_suffix) \
   THREE_ARG_DOWNCALL art_quick_check_and_alloc_array_with_access_check ## c_suffix, artCheckAndAllocArrayFromCodeWithAccessCheck ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(c_suffix, cxx_suffix) \
+  FOUR_ARG_DOWNCALL art_quick_alloc_string_from_bytes ## c_suffix, artAllocStringFromBytesFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(c_suffix, cxx_suffix) \
+  THREE_ARG_DOWNCALL art_quick_alloc_string_from_chars ## c_suffix, artAllocStringFromCharsFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(c_suffix, cxx_suffix) \
+  ONE_ARG_DOWNCALL art_quick_alloc_string_from_string ## c_suffix, artAllocStringFromStringFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc, DlMalloc)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc, DlMalloc)
@@ -799,6 +825,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_dlmalloc, DlMalloc)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_dlmalloc, DlMalloc)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc_instrumented, DlMallocInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc_instrumented, DlMallocInstrumented)
@@ -809,6 +838,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_dlmalloc_instrumented, DlMallocInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_dlmalloc_instrumented, DlMallocInstrumented)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc)
@@ -819,6 +851,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_rosalloc, RosAlloc)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_rosalloc, RosAlloc)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc_instrumented, RosAllocInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc_instrumented, RosAllocInstrumented)
@@ -829,6 +864,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_rosalloc_instrumented, RosAllocInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_rosalloc_instrumented, RosAllocInstrumented)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_bump_pointer, BumpPointer)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_bump_pointer, BumpPointer)
@@ -839,6 +877,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_bump_pointer, BumpPointer)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_bump_pointer, BumpPointer)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_bump_pointer_instrumented, BumpPointerInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_bump_pointer_instrumented, BumpPointerInstrumented)
@@ -849,6 +890,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_bump_pointer_instrumented, BumpPointerInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_bump_pointer_instrumented, BumpPointerInstrumented)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
@@ -859,6 +903,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab, TLAB)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab_instrumented, TLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab_instrumented, TLABInstrumented)
@@ -869,6 +916,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab_instrumented, TLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab_instrumented, TLABInstrumented)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region, Region)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region, Region)
@@ -879,6 +929,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region, Region)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region, Region)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region, Region)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region, Region)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region, Region)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region, Region)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_instrumented, RegionInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_instrumented, RegionInstrumented)
@@ -889,6 +942,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_instrumented, RegionInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_instrumented, RegionInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_instrumented, RegionInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_instrumented, RegionInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_instrumented, RegionInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_instrumented, RegionInstrumented)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
@@ -899,6 +955,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_tlab, RegionTLAB)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab_instrumented, RegionTLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab_instrumented, RegionTLABInstrumented)
@@ -909,6 +968,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab_instrumented, RegionTLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_tlab_instrumented, RegionTLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab_instrumented, RegionTLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab_instrumented, RegionTLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab_instrumented, RegionTLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab_instrumented, RegionTLABInstrumented)
 
 ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO
 ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO
@@ -1567,13 +1629,8 @@
     PUSH edi                    // push callee save reg
     mov MIRROR_STRING_COUNT_OFFSET(%eax), %edx
     mov MIRROR_STRING_COUNT_OFFSET(%ecx), %ebx
-    mov MIRROR_STRING_VALUE_OFFSET(%eax), %esi
-    mov MIRROR_STRING_VALUE_OFFSET(%ecx), %edi
-    mov MIRROR_STRING_OFFSET_OFFSET(%eax), %eax
-    mov MIRROR_STRING_OFFSET_OFFSET(%ecx), %ecx
-    /* Build pointers to the start of string data */
-    lea  MIRROR_CHAR_ARRAY_DATA_OFFSET(%esi, %eax, 2), %esi
-    lea  MIRROR_CHAR_ARRAY_DATA_OFFSET(%edi, %ecx, 2), %edi
+    lea MIRROR_STRING_VALUE_OFFSET(%eax), %esi
+    lea MIRROR_STRING_VALUE_OFFSET(%ecx), %edi
     /* Calculate min length and count diff */
     mov   %edx, %ecx
     mov   %edx, %eax
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 570624c..da4d92b 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -738,6 +738,17 @@
     END_FUNCTION VAR(c_name, 0)
 END_MACRO
 
+MACRO3(FOUR_ARG_DOWNCALL, c_name, cxx_name, return_macro)
+    DEFINE_FUNCTION VAR(c_name, 0)
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME   // save ref containing registers for GC
+    // Outgoing argument set up
+    movq %gs:THREAD_SELF_OFFSET, %r8    // pass Thread::Current()
+    call VAR(cxx_name, 1)               // cxx_name(arg1, arg2, arg3, arg4, Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
+    CALL_MACRO(return_macro, 2)         // return or deliver exception
+    END_FUNCTION VAR(c_name, 0)
+END_MACRO
+
 MACRO3(ONE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name, 0)
     movl 8(%rsp), %esi                  // pass referrer
@@ -822,6 +833,12 @@
   THREE_ARG_DOWNCALL art_quick_check_and_alloc_array ## c_suffix, artCheckAndAllocArrayFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
 #define GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(c_suffix, cxx_suffix) \
   THREE_ARG_DOWNCALL art_quick_check_and_alloc_array_with_access_check ## c_suffix, artCheckAndAllocArrayFromCodeWithAccessCheck ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(c_suffix, cxx_suffix) \
+  FOUR_ARG_DOWNCALL art_quick_alloc_string_from_bytes ## c_suffix, artAllocStringFromBytesFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(c_suffix, cxx_suffix) \
+  THREE_ARG_DOWNCALL art_quick_alloc_string_from_chars ## c_suffix, artAllocStringFromCharsFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(c_suffix, cxx_suffix) \
+  ONE_ARG_DOWNCALL art_quick_alloc_string_from_string ## c_suffix, artAllocStringFromStringFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc, DlMalloc)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc, DlMalloc)
@@ -832,6 +849,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_dlmalloc, DlMalloc)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_dlmalloc, DlMalloc)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc_instrumented, DlMallocInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc_instrumented, DlMallocInstrumented)
@@ -842,6 +862,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_dlmalloc_instrumented, DlMallocInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_dlmalloc_instrumented, DlMallocInstrumented)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc)
@@ -852,6 +875,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_rosalloc, RosAlloc)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_rosalloc, RosAlloc)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc_instrumented, RosAllocInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc_instrumented, RosAllocInstrumented)
@@ -862,6 +888,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_rosalloc_instrumented, RosAllocInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_rosalloc_instrumented, RosAllocInstrumented)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_bump_pointer, BumpPointer)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_bump_pointer, BumpPointer)
@@ -872,6 +901,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_bump_pointer, BumpPointer)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_bump_pointer, BumpPointer)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_bump_pointer_instrumented, BumpPointerInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_bump_pointer_instrumented, BumpPointerInstrumented)
@@ -882,6 +914,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_bump_pointer_instrumented, BumpPointerInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_bump_pointer_instrumented, BumpPointerInstrumented)
 
 DEFINE_FUNCTION art_quick_alloc_object_tlab
     // Fast path tlab allocation.
@@ -929,6 +964,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab, TLAB)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab_instrumented, TLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab_instrumented, TLABInstrumented)
@@ -939,6 +977,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab_instrumented, TLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab_instrumented, TLABInstrumented)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region, Region)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region, Region)
@@ -949,6 +990,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region, Region)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region, Region)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region, Region)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region, Region)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region, Region)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region, Region)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_instrumented, RegionInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_instrumented, RegionInstrumented)
@@ -959,6 +1003,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_instrumented, RegionInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_instrumented, RegionInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_instrumented, RegionInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_instrumented, RegionInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_instrumented, RegionInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_instrumented, RegionInstrumented)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
@@ -969,6 +1016,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_tlab, RegionTLAB)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
 
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab_instrumented, RegionTLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab_instrumented, RegionTLABInstrumented)
@@ -979,6 +1029,9 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab_instrumented, RegionTLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_tlab_instrumented, RegionTLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab_instrumented, RegionTLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab_instrumented, RegionTLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab_instrumented, RegionTLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab_instrumented, RegionTLABInstrumented)
 
 ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO
 ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO
@@ -1622,13 +1675,9 @@
 DEFINE_FUNCTION art_quick_string_compareto
     movl MIRROR_STRING_COUNT_OFFSET(%edi), %r8d
     movl MIRROR_STRING_COUNT_OFFSET(%esi), %r9d
-    movl MIRROR_STRING_VALUE_OFFSET(%edi), %r10d
-    movl MIRROR_STRING_VALUE_OFFSET(%esi), %r11d
-    movl MIRROR_STRING_OFFSET_OFFSET(%edi), %eax
-    movl MIRROR_STRING_OFFSET_OFFSET(%esi), %ecx
     /* Build pointers to the start of string data */
-    leal MIRROR_CHAR_ARRAY_DATA_OFFSET(%r10d, %eax, 2), %esi
-    leal MIRROR_CHAR_ARRAY_DATA_OFFSET(%r11d, %ecx, 2), %edi
+    leal MIRROR_STRING_VALUE_OFFSET(%edi), %edi
+    leal MIRROR_STRING_VALUE_OFFSET(%esi), %esi
     /* Calculate min length and count diff */
     movl  %r8d, %ecx
     movl  %r8d, %eax
@@ -1638,8 +1687,8 @@
      * At this point we have:
      *   eax: value to return if first part of strings are equal
      *   ecx: minimum among the lengths of the two strings
-     *   esi: pointer to this string data
-     *   edi: pointer to comp string data
+     *   esi: pointer to comp string data
+     *   edi: pointer to this string data
      */
     jecxz .Lkeep_length
     repe cmpsw                    // find nonmatching chars in [%esi] and [%edi], up to length %ecx
@@ -1648,8 +1697,8 @@
     ret
     .balign 16
 .Lnot_equal:
-    movzwl  -2(%esi), %eax        // get last compared char from this string
-    movzwl  -2(%edi), %ecx        // get last compared char from comp string
+    movzwl  -2(%edi), %eax        // get last compared char from this string
+    movzwl  -2(%esi), %ecx        // get last compared char from comp string
     subl  %ecx, %eax              // return the difference
     ret
 END_FUNCTION art_quick_string_compareto
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 8057dd1..a115fbe 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -108,7 +108,7 @@
 ADD_TEST_EQ(THREAD_SELF_OFFSET,
             art::Thread::SelfOffset<__SIZEOF_POINTER__>().Int32Value())
 
-#define THREAD_LOCAL_POS_OFFSET (THREAD_CARD_TABLE_OFFSET + 126 * __SIZEOF_POINTER__)
+#define THREAD_LOCAL_POS_OFFSET (THREAD_CARD_TABLE_OFFSET + 145 * __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_LOCAL_POS_OFFSET,
             art::Thread::ThreadLocalPosOffset<__SIZEOF_POINTER__>().Int32Value())
 #define THREAD_LOCAL_END_OFFSET (THREAD_LOCAL_POS_OFFSET + __SIZEOF_POINTER__)
@@ -124,7 +124,7 @@
 #define MIRROR_OBJECT_LOCK_WORD_OFFSET 4
 ADD_TEST_EQ(MIRROR_OBJECT_LOCK_WORD_OFFSET, art::mirror::Object::MonitorOffset().Int32Value())
 
-#if defined(USE_BAKER_OR_BROOKS_READ_BARRIER)
+#if defined(USE_BROOKS_READ_BARRIER)
 #define MIRROR_OBJECT_HEADER_SIZE 16
 #else
 #define MIRROR_OBJECT_HEADER_SIZE 8
@@ -170,14 +170,11 @@
             sizeof(art::mirror::HeapReference<art::mirror::Object>))
 
 // Offsets within java.lang.String.
-#define MIRROR_STRING_VALUE_OFFSET  MIRROR_OBJECT_HEADER_SIZE
-ADD_TEST_EQ(MIRROR_STRING_VALUE_OFFSET, art::mirror::String::ValueOffset().Int32Value())
-
-#define MIRROR_STRING_COUNT_OFFSET  (4 + MIRROR_OBJECT_HEADER_SIZE)
+#define MIRROR_STRING_COUNT_OFFSET  MIRROR_OBJECT_HEADER_SIZE
 ADD_TEST_EQ(MIRROR_STRING_COUNT_OFFSET, art::mirror::String::CountOffset().Int32Value())
 
-#define MIRROR_STRING_OFFSET_OFFSET (12 + MIRROR_OBJECT_HEADER_SIZE)
-ADD_TEST_EQ(MIRROR_STRING_OFFSET_OFFSET, art::mirror::String::OffsetOffset().Int32Value())
+#define MIRROR_STRING_VALUE_OFFSET (8 + MIRROR_OBJECT_HEADER_SIZE)
+ADD_TEST_EQ(MIRROR_STRING_VALUE_OFFSET, art::mirror::String::ValueOffset().Int32Value())
 
 // Offsets within java.lang.reflect.ArtMethod.
 #define MIRROR_ART_METHOD_DEX_CACHE_METHODS_OFFSET (4 + MIRROR_OBJECT_HEADER_SIZE)
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 8911891..b099088 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -345,8 +345,8 @@
   Handle<mirror::Class> java_lang_String(hs.NewHandle(
       AllocClass(self, java_lang_Class.Get(), mirror::String::ClassSize())));
   mirror::String::SetClass(java_lang_String.Get());
-  java_lang_String->SetObjectSize(mirror::String::InstanceSize());
   mirror::Class::SetStatus(java_lang_String, mirror::Class::kStatusResolved, self);
+  java_lang_String->SetStringClass();
 
   // Setup java.lang.ref.Reference.
   Handle<mirror::Class> java_lang_ref_Reference(hs.NewHandle(
@@ -474,7 +474,6 @@
     String_class->DumpClass(os2, mirror::Class::kDumpClassFullDetail);
     LOG(FATAL) << os1.str() << "\n\n" << os2.str();
   }
-  CHECK_EQ(java_lang_String->GetObjectSize(), mirror::String::InstanceSize());
   mirror::Class::SetStatus(java_lang_DexCache, mirror::Class::kStatusNotReady, self);
   CHECK_EQ(java_lang_DexCache.Get(), FindSystemClass(self, "Ljava/lang/DexCache;"));
   CHECK_EQ(java_lang_DexCache->GetObjectSize(), mirror::DexCache::InstanceSize());
@@ -1752,6 +1751,13 @@
 
   SetupClass(dex_file, dex_class_def, klass, class_loader.Get());
 
+  // Mark the string class by setting its access flag.
+  if (UNLIKELY(!init_done_)) {
+    if (strcmp(descriptor, "Ljava/lang/String;") == 0) {
+      klass->SetStringClass();
+    }
+  }
+
   ObjectLock<mirror::Class> lock(self, klass);
   klass->SetClinitThreadId(self->GetTid());
 
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 7bee98f..d155941 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -394,8 +394,9 @@
 
     bool error = false;
 
-    // Art method have a different size due to the padding field.
-    if (!klass->IsArtMethodClass() && !klass->IsClassClass() && !is_static) {
+    // Methods and classes have a different size due to padding field. Strings are variable length.
+    if (!klass->IsArtMethodClass() && !klass->IsClassClass() && !klass->IsStringClass() &&
+        !is_static) {
       // Currently only required for AccessibleObject since of the padding fields. The class linker
       // says AccessibleObject is 9 bytes but sizeof(AccessibleObject) is 12 bytes due to padding.
       // The RoundUp is to get around this case.
@@ -479,7 +480,7 @@
   ObjectOffsets() : CheckOffsets<mirror::Object>(false, "Ljava/lang/Object;") {
     addOffset(OFFSETOF_MEMBER(mirror::Object, klass_), "shadow$_klass_");
     addOffset(OFFSETOF_MEMBER(mirror::Object, monitor_), "shadow$_monitor_");
-#ifdef USE_BAKER_OR_BROOKS_READ_BARRIER
+#ifdef USE_BROOKS_READ_BARRIER
     addOffset(OFFSETOF_MEMBER(mirror::Object, x_rb_ptr_), "shadow$_x_rb_ptr_");
     addOffset(OFFSETOF_MEMBER(mirror::Object, x_xpadding_), "shadow$_x_xpadding_");
 #endif
@@ -538,8 +539,6 @@
   StringOffsets() : CheckOffsets<mirror::String>(false, "Ljava/lang/String;") {
     addOffset(OFFSETOF_MEMBER(mirror::String, count_), "count");
     addOffset(OFFSETOF_MEMBER(mirror::String, hash_code_), "hashCode");
-    addOffset(OFFSETOF_MEMBER(mirror::String, offset_), "offset");
-    addOffset(OFFSETOF_MEMBER(mirror::String, array_), "value");
   };
 };
 
@@ -736,14 +735,14 @@
   EXPECT_FALSE(JavaLangObject->IsSynthetic());
   EXPECT_EQ(2U, JavaLangObject->NumDirectMethods());
   EXPECT_EQ(11U, JavaLangObject->NumVirtualMethods());
-  if (!kUseBakerOrBrooksReadBarrier) {
+  if (!kUseBrooksReadBarrier) {
     EXPECT_EQ(2U, JavaLangObject->NumInstanceFields());
   } else {
     EXPECT_EQ(4U, JavaLangObject->NumInstanceFields());
   }
   EXPECT_STREQ(JavaLangObject->GetInstanceField(0)->GetName(), "shadow$_klass_");
   EXPECT_STREQ(JavaLangObject->GetInstanceField(1)->GetName(), "shadow$_monitor_");
-  if (kUseBakerOrBrooksReadBarrier) {
+  if (kUseBrooksReadBarrier) {
     EXPECT_STREQ(JavaLangObject->GetInstanceField(2)->GetName(), "shadow$_x_rb_ptr_");
     EXPECT_STREQ(JavaLangObject->GetInstanceField(3)->GetName(), "shadow$_x_xpadding_");
   }
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index f3ce552..dc1b4f1 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -4078,7 +4078,7 @@
     StackHandleScope<1> hs(soa.Self());
     Handle<mirror::String> name(hs.NewHandle(t->GetThreadName(soa)));
     size_t char_count = (name.Get() != nullptr) ? name->GetLength() : 0;
-    const jchar* chars = (name.Get() != nullptr) ? name->GetCharArray()->GetData() : nullptr;
+    const jchar* chars = (name.Get() != nullptr) ? name->GetValue() : nullptr;
 
     std::vector<uint8_t> bytes;
     JDWP::Append4BE(bytes, t->GetThreadId());
@@ -4774,7 +4774,7 @@
     for (const std::string& str : table_) {
       const char* s = str.c_str();
       size_t s_len = CountModifiedUtf8Chars(s);
-      std::unique_ptr<uint16_t> s_utf16(new uint16_t[s_len]);
+      std::unique_ptr<uint16_t[]> s_utf16(new uint16_t[s_len]);
       ConvertModifiedUtf8ToUtf16(s_utf16.get(), s);
       JDWP::AppendUtf16BE(bytes, s_utf16.get(), s_len);
     }
diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
index c049e3d..fa129af 100644
--- a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
@@ -153,6 +153,32 @@
   } else { \
     return CheckAndAllocArrayFromCodeInstrumented(type_idx, component_count, method, self, true, allocator_type); \
   } \
+} \
+extern "C" mirror::String* artAllocStringFromBytesFromCode##suffix##suffix2( \
+    mirror::ByteArray* byte_array, int32_t high, int32_t offset, int32_t byte_count, \
+    Thread* self) \
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
+  ScopedQuickEntrypointChecks sqec(self); \
+  StackHandleScope<1> hs(self); \
+  Handle<mirror::ByteArray> handle_array(hs.NewHandle(byte_array)); \
+  return mirror::String::AllocFromByteArray<instrumented_bool>(self, byte_count, handle_array, \
+                                                               offset, high, allocator_type); \
+} \
+extern "C" mirror::String* artAllocStringFromCharsFromCode##suffix##suffix2( \
+    int32_t offset, int32_t char_count, mirror::CharArray* char_array, Thread* self) \
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
+  StackHandleScope<1> hs(self); \
+  Handle<mirror::CharArray> handle_array(hs.NewHandle(char_array)); \
+  return mirror::String::AllocFromCharArray<instrumented_bool>(self, char_count, handle_array, \
+                                                               offset, allocator_type); \
+} \
+extern "C" mirror::String* artAllocStringFromStringFromCode##suffix##suffix2( \
+    mirror::String* string, Thread* self) \
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
+  StackHandleScope<1> hs(self); \
+  Handle<mirror::String> handle_string(hs.NewHandle(string)); \
+  return mirror::String::AllocFromString<instrumented_bool>(self, handle_string->GetLength(), \
+                                                            handle_string, 0, allocator_type); \
 }
 
 #define GENERATE_ENTRYPOINTS_FOR_ALLOCATOR(suffix, allocator_type) \
@@ -176,6 +202,9 @@
 extern "C" void* art_quick_alloc_object_with_access_check##suffix(uint32_t type_idx, mirror::ArtMethod* ref); \
 extern "C" void* art_quick_check_and_alloc_array##suffix(uint32_t, int32_t, mirror::ArtMethod* ref); \
 extern "C" void* art_quick_check_and_alloc_array_with_access_check##suffix(uint32_t, int32_t, mirror::ArtMethod* ref); \
+extern "C" void* art_quick_alloc_string_from_bytes##suffix(void*, int32_t, int32_t, int32_t); \
+extern "C" void* art_quick_alloc_string_from_chars##suffix(int32_t, int32_t, void*); \
+extern "C" void* art_quick_alloc_string_from_string##suffix(void*); \
 extern "C" void* art_quick_alloc_array##suffix##_instrumented(uint32_t, int32_t, mirror::ArtMethod* ref); \
 extern "C" void* art_quick_alloc_array_resolved##suffix##_instrumented(mirror::Class* klass, int32_t, mirror::ArtMethod* ref); \
 extern "C" void* art_quick_alloc_array_with_access_check##suffix##_instrumented(uint32_t, int32_t, mirror::ArtMethod* ref); \
@@ -185,6 +214,9 @@
 extern "C" void* art_quick_alloc_object_with_access_check##suffix##_instrumented(uint32_t type_idx, mirror::ArtMethod* ref); \
 extern "C" void* art_quick_check_and_alloc_array##suffix##_instrumented(uint32_t, int32_t, mirror::ArtMethod* ref); \
 extern "C" void* art_quick_check_and_alloc_array_with_access_check##suffix##_instrumented(uint32_t, int32_t, mirror::ArtMethod* ref); \
+extern "C" void* art_quick_alloc_string_from_bytes##suffix##_instrumented(void*, int32_t, int32_t, int32_t); \
+extern "C" void* art_quick_alloc_string_from_chars##suffix##_instrumented(int32_t, int32_t, void*); \
+extern "C" void* art_quick_alloc_string_from_string##suffix##_instrumented(void*); \
 void SetQuickAllocEntryPoints##suffix(QuickEntryPoints* qpoints, bool instrumented) { \
   if (instrumented) { \
     qpoints->pAllocArray = art_quick_alloc_array##suffix##_instrumented; \
@@ -196,6 +228,9 @@
     qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check##suffix##_instrumented; \
     qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array##suffix##_instrumented; \
     qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check##suffix##_instrumented; \
+    qpoints->pAllocStringFromBytes = art_quick_alloc_string_from_bytes##suffix##_instrumented; \
+    qpoints->pAllocStringFromChars = art_quick_alloc_string_from_chars##suffix##_instrumented; \
+    qpoints->pAllocStringFromString = art_quick_alloc_string_from_string##suffix##_instrumented; \
   } else { \
     qpoints->pAllocArray = art_quick_alloc_array##suffix; \
     qpoints->pAllocArrayResolved = art_quick_alloc_array_resolved##suffix; \
@@ -206,6 +241,9 @@
     qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check##suffix; \
     qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array##suffix; \
     qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check##suffix; \
+    qpoints->pAllocStringFromBytes = art_quick_alloc_string_from_bytes##suffix; \
+    qpoints->pAllocStringFromChars = art_quick_alloc_string_from_chars##suffix; \
+    qpoints->pAllocStringFromString = art_quick_alloc_string_from_string##suffix; \
   } \
 }
 
diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h
index 6d9e483..035f57a 100644
--- a/runtime/entrypoints/quick/quick_entrypoints_list.h
+++ b/runtime/entrypoints/quick/quick_entrypoints_list.h
@@ -29,6 +29,9 @@
   V(AllocObjectWithAccessCheck, void*, uint32_t, mirror::ArtMethod*) \
   V(CheckAndAllocArray, void*, uint32_t, int32_t, mirror::ArtMethod*) \
   V(CheckAndAllocArrayWithAccessCheck, void*, uint32_t, int32_t, mirror::ArtMethod*) \
+  V(AllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t) \
+  V(AllocStringFromChars, void*, int32_t, int32_t, void*) \
+  V(AllocStringFromString, void*, void*) \
 \
   V(InstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*) \
   V(CheckCast, void, const mirror::Class*, const mirror::Class*) \
@@ -123,8 +126,24 @@
   V(Deoptimize, void, void) \
 \
   V(A64Load, int64_t, volatile const int64_t *) \
-  V(A64Store, void, volatile int64_t *, int64_t)
-
+  V(A64Store, void, volatile int64_t *, int64_t) \
+\
+  V(NewEmptyString, void) \
+  V(NewStringFromBytes_B, void) \
+  V(NewStringFromBytes_BI, void) \
+  V(NewStringFromBytes_BII, void) \
+  V(NewStringFromBytes_BIII, void) \
+  V(NewStringFromBytes_BIIString, void) \
+  V(NewStringFromBytes_BString, void) \
+  V(NewStringFromBytes_BIICharset, void) \
+  V(NewStringFromBytes_BCharset, void) \
+  V(NewStringFromChars_C, void) \
+  V(NewStringFromChars_CII, void) \
+  V(NewStringFromChars_IIC, void) \
+  V(NewStringFromCodePoints, void) \
+  V(NewStringFromString, void) \
+  V(NewStringFromStringBuffer, void) \
+  V(NewStringFromStringBuilder, void)
 
 #endif  // ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_ENTRYPOINTS_LIST_H_
 #undef ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_ENTRYPOINTS_LIST_H_   // #define is only for lint.
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index 0664fa0..1fb45f4 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -167,7 +167,13 @@
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCheckAndAllocArray, pCheckAndAllocArrayWithAccessCheck,
                          sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCheckAndAllocArrayWithAccessCheck,
-                         pInstanceofNonTrivial, sizeof(void*));
+                         pAllocStringFromBytes, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocStringFromBytes, pAllocStringFromChars,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocStringFromChars, pAllocStringFromString,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocStringFromString, pInstanceofNonTrivial,
+                         sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pInstanceofNonTrivial, pCheckCast, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCheckCast, pInitializeStaticStorage, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pInitializeStaticStorage, pInitializeTypeAndVerifyAccess,
@@ -269,7 +275,38 @@
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pDeoptimize, pA64Load, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pA64Load, pA64Store, sizeof(void*));
 
-    CHECKED(OFFSETOF_MEMBER(QuickEntryPoints, pA64Store)
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pA64Store, pNewEmptyString, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNewEmptyString, pNewStringFromBytes_B, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNewStringFromBytes_B, pNewStringFromBytes_BI,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNewStringFromBytes_BI, pNewStringFromBytes_BII,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNewStringFromBytes_BII, pNewStringFromBytes_BIII,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNewStringFromBytes_BIII, pNewStringFromBytes_BIIString,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNewStringFromBytes_BIIString,
+                         pNewStringFromBytes_BString, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNewStringFromBytes_BString,
+                         pNewStringFromBytes_BIICharset, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNewStringFromBytes_BIICharset,
+                         pNewStringFromBytes_BCharset, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNewStringFromBytes_BCharset,
+                         pNewStringFromChars_C, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNewStringFromChars_C, pNewStringFromChars_CII,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNewStringFromChars_CII, pNewStringFromChars_IIC,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNewStringFromChars_IIC, pNewStringFromCodePoints,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNewStringFromCodePoints, pNewStringFromString,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNewStringFromString, pNewStringFromStringBuffer,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNewStringFromStringBuffer, pNewStringFromStringBuilder,
+                         sizeof(void*));
+
+    CHECKED(OFFSETOF_MEMBER(QuickEntryPoints, pNewStringFromStringBuilder)
             + sizeof(void*) == sizeof(QuickEntryPoints), QuickEntryPoints_all);
   }
 };
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index 85234dc..49c7fda 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -1042,10 +1042,11 @@
 
 inline uint32_t RosAlloc::Run::GetBitmapLastVectorMask(size_t num_slots, size_t num_vec) {
   const size_t kBitsPerVec = 32;
-  DCHECK_GE(num_slots * kBitsPerVec, num_vec);
+  DCHECK_GE(num_vec * kBitsPerVec, num_slots);
+  DCHECK_NE(num_vec, 0U);
   size_t remain = num_vec * kBitsPerVec - num_slots;
-  DCHECK_NE(remain, kBitsPerVec);
-  return ((1U << remain) - 1) << (kBitsPerVec - remain);
+  DCHECK_LT(remain, kBitsPerVec);
+  return ((1U << remain) - 1) << ((kBitsPerVec - remain) & 0x1F);
 }
 
 inline bool RosAlloc::Run::IsAllFree() {
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index eabb1c2..26f349a 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -718,6 +718,7 @@
       // Leave References gray so that GetReferent() will trigger RB.
       CHECK(to_ref->AsReference()->IsEnqueued()) << "Left unenqueued ref gray " << to_ref;
     } else {
+#ifdef USE_BAKER_OR_BROOKS_READ_BARRIER
       if (kUseBakerReadBarrier) {
         if (region_space_->IsInToSpace(to_ref)) {
           // If to-space, change from gray to white.
@@ -739,6 +740,9 @@
           CHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr());
         }
       }
+#else
+      DCHECK(!kUseBakerReadBarrier);
+#endif
     }
     if (ReadBarrier::kEnableToSpaceInvariantChecks || kIsDebugBuild) {
       ConcurrentCopyingAssertToSpaceInvariantObjectVisitor visitor(this);
@@ -815,7 +819,7 @@
     DCHECK(obj != nullptr);
     DCHECK(collector_->heap_->GetMarkBitmap()->Test(obj)) << obj;
     DCHECK_EQ(obj->GetReadBarrierPointer(), ReadBarrier::BlackPtr()) << obj;
-    obj->SetReadBarrierPointer(ReadBarrier::WhitePtr());
+    obj->AtomicSetReadBarrierPointer(ReadBarrier::BlackPtr(), ReadBarrier::WhitePtr());
     DCHECK_EQ(obj->GetReadBarrierPointer(), ReadBarrier::WhitePtr()) << obj;
   }
 
@@ -963,7 +967,8 @@
     if (kUseBakerReadBarrier) {
       DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::BlackPtr()) << ref;
       // Clear the black ptr.
-      ref->SetReadBarrierPointer(ReadBarrier::WhitePtr());
+      ref->AtomicSetReadBarrierPointer(ReadBarrier::BlackPtr(), ReadBarrier::WhitePtr());
+      DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr()) << ref;
     }
     size_t obj_size = ref->SizeOf();
     size_t alloc_size = RoundUp(obj_size, space::RegionSpace::kAlignment);
@@ -1330,10 +1335,6 @@
   while (true) {
     // Copy the object. TODO: copy only the lockword in the second iteration and on?
     memcpy(to_ref, from_ref, obj_size);
-    // Set the gray ptr.
-    if (kUseBakerReadBarrier) {
-      to_ref->SetReadBarrierPointer(ReadBarrier::GrayPtr());
-    }
 
     LockWord old_lock_word = to_ref->GetLockWord(false);
 
@@ -1378,6 +1379,11 @@
       return to_ref;
     }
 
+    // Set the gray ptr.
+    if (kUseBakerReadBarrier) {
+      to_ref->SetReadBarrierPointer(ReadBarrier::GrayPtr());
+    }
+
     LockWord new_lock_word = LockWord::FromForwardingAddress(reinterpret_cast<size_t>(to_ref));
 
     // Try to atomically write the fwd ptr.
@@ -1484,6 +1490,21 @@
   }
   DCHECK(from_ref != nullptr);
   DCHECK(heap_->collector_type_ == kCollectorTypeCC);
+  if (kUseBakerReadBarrier && !is_active_) {
+    // In the lock word forward address state, the read barrier bits
+    // in the lock word are part of the stored forwarding address and
+    // invalid. This is usually OK as the from-space copy of objects
+    // aren't accessed by mutators due to the to-space
+    // invariant. However, during the dex2oat image writing relocation
+    // and the zygote compaction, objects can be in the forward
+    // address state (to store the forward/relocation addresses) and
+    // they can still be accessed and the invalid read barrier bits
+    // are consulted. If they look like gray but aren't really, the
+    // read barriers slow path can trigger when it shouldn't. To guard
+    // against this, return here if the CC collector isn't running.
+    return from_ref;
+  }
+  DCHECK(region_space_ != nullptr) << "Read barrier slow path taken when CC isn't running?";
   space::RegionSpace::RegionType rtype = region_space_->GetRegionType(from_ref);
   if (rtype == space::RegionSpace::RegionType::kRegionTypeToSpace) {
     // It's already marked.
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index f0e8d14..1068e90 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -381,9 +381,11 @@
     if (UNLIKELY(obj == nullptr || !IsAligned<kPageSize>(obj) ||
                  (kIsDebugBuild && large_object_space != nullptr &&
                      !large_object_space->Contains(obj)))) {
-      LOG(ERROR) << "Tried to mark " << obj << " not contained by any spaces";
-      LOG(ERROR) << "Attempting see if it's a bad root";
+      LOG(INTERNAL_FATAL) << "Tried to mark " << obj << " not contained by any spaces";
+      LOG(INTERNAL_FATAL) << "Attempting see if it's a bad root";
       mark_sweep_->VerifyRoots();
+      PrintFileToLog("/proc/self/maps", LogSeverity::INTERNAL_FATAL);
+      MemMap::DumpMaps(LOG(INTERNAL_FATAL));
       LOG(FATAL) << "Can't mark invalid object";
     }
   }
@@ -498,7 +500,7 @@
     if (heap->GetLiveBitmap()->GetContinuousSpaceBitmap(root) == nullptr) {
       space::LargeObjectSpace* large_object_space = heap->GetLargeObjectsSpace();
       if (large_object_space != nullptr && !large_object_space->Contains(root)) {
-        LOG(ERROR) << "Found invalid root: " << root << " " << info;
+        LOG(INTERNAL_FATAL) << "Found invalid root: " << root << " " << info;
       }
     }
   }
diff --git a/runtime/gc/collector/semi_space-inl.h b/runtime/gc/collector/semi_space-inl.h
index 922a71c..7b19dc9 100644
--- a/runtime/gc/collector/semi_space-inl.h
+++ b/runtime/gc/collector/semi_space-inl.h
@@ -60,10 +60,6 @@
   if (obj == nullptr) {
     return;
   }
-  if (kUseBakerOrBrooksReadBarrier) {
-    // Verify all the objects have the correct forward pointer installed.
-    obj->AssertReadBarrierPointer();
-  }
   if (from_space_->HasAddress(obj)) {
     mirror::Object* forward_address = GetForwardingAddressInFromSpace(obj);
     // If the object has already been moved, return the new forward address.
diff --git a/runtime/gc/reference_queue.cc b/runtime/gc/reference_queue.cc
index 4c93a4c..4ba3983 100644
--- a/runtime/gc/reference_queue.cc
+++ b/runtime/gc/reference_queue.cc
@@ -96,11 +96,11 @@
         << "ref=" << ref << " rb_ptr=" << ref->GetReadBarrierPointer();
     if (heap->ConcurrentCopyingCollector()->RegionSpace()->IsInToSpace(ref)) {
       // Moving objects.
-      ref->SetReadBarrierPointer(ReadBarrier::WhitePtr());
+      ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(), ReadBarrier::WhitePtr());
       CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr());
     } else {
       // Non-moving objects.
-      ref->SetReadBarrierPointer(ReadBarrier::BlackPtr());
+      ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(), ReadBarrier::BlackPtr());
       CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::BlackPtr());
     }
   }
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index fb7ff54..d0eb083 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -981,7 +981,7 @@
     // ClassObjects have their static fields appended, so aren't all the same size.
     // But they're at least this size.
     __ AddU4(sizeof(mirror::Class));  // instance size
-  } else if (klass->IsArrayClass() || klass->IsPrimitive()) {
+  } else if (klass->IsArrayClass() || klass->IsStringClass() || klass->IsPrimitive()) {
     __ AddU4(0);
   } else {
     __ AddU4(klass->GetObjectSize());  // instance size
@@ -1036,13 +1036,22 @@
 
   // Instance fields for this class (no superclass fields)
   int iFieldCount = klass->IsObjectClass() ? 0 : klass->NumInstanceFields();
-  __ AddU2((uint16_t)iFieldCount);
+  if (klass->IsStringClass()) {
+    __ AddU2((uint16_t)iFieldCount + 1);
+  } else {
+    __ AddU2((uint16_t)iFieldCount);
+  }
   for (int i = 0; i < iFieldCount; ++i) {
     ArtField* f = klass->GetInstanceField(i);
     __ AddStringId(LookupStringId(f->GetName()));
     HprofBasicType t = SignatureToBasicTypeAndSize(f->GetTypeDescriptor(), nullptr);
     __ AddU1(t);
   }
+  // Add native value character array for strings.
+  if (klass->IsStringClass()) {
+    __ AddStringId(LookupStringId("value"));
+    __ AddU1(hprof_basic_object);
+  }
 }
 
 void Hprof::DumpHeapArray(mirror::Array* obj, mirror::Class* klass) {
@@ -1099,6 +1108,7 @@
 
   // Write the instance data;  fields for this class, followed by super class fields,
   // and so on. Don't write the klass or monitor fields of Object.class.
+  mirror::Class* orig_klass = klass;
   while (!klass->IsObjectClass()) {
     int ifieldCount = klass->NumInstanceFields();
     for (int i = 0; i < ifieldCount; ++i) {
@@ -1133,8 +1143,24 @@
     klass = klass->GetSuperClass();
   }
 
-  // Patch the instance field length.
-  __ UpdateU4(size_patch_offset, output_->Length() - (size_patch_offset + 4));
+  // Output native value character array for strings.
+  if (orig_klass->IsStringClass()) {
+    mirror::String* s = obj->AsString();
+    __ AddObjectId(reinterpret_cast<mirror::Object*>(s->GetValue()));
+
+    // Patch the instance field length.
+    __ UpdateU4(size_patch_offset, output_->Length() - (size_patch_offset + 4));
+
+    __ AddU1(HPROF_PRIMITIVE_ARRAY_DUMP);
+    __ AddObjectId(reinterpret_cast<mirror::Object*>(s->GetValue()));
+    __ AddU4(StackTraceSerialNumber(obj));
+    __ AddU4(s->GetLength());
+    __ AddU1(hprof_basic_char);
+    __ AddU2List(s->GetValue(), s->GetLength());
+  } else {
+    // Patch the instance field length.
+    __ UpdateU4(size_patch_offset, output_->Length() - (size_patch_offset + 4));
+  }
 }
 
 void Hprof::VisitRoot(mirror::Object* obj, const RootInfo& info) {
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 4765ebc..ae67efb 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -21,6 +21,7 @@
 #include "debugger.h"
 #include "mirror/array-inl.h"
 #include "unstarted_runtime.h"
+#include "verifier/method_verifier.h"
 
 namespace art {
 namespace interpreter {
@@ -485,16 +486,29 @@
 template<bool is_range, bool do_assignability_check>
 bool DoCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame,
             const Instruction* inst, uint16_t inst_data, JValue* result) {
+  bool string_init = false;
+  // Replace calls to String.<init> with equivalent StringFactory call.
+  if (called_method->GetDeclaringClass()->IsStringClass() && called_method->IsConstructor()) {
+    ScopedObjectAccessUnchecked soa(self);
+    jmethodID mid = soa.EncodeMethod(called_method);
+    called_method = soa.DecodeMethod(WellKnownClasses::StringInitToStringFactoryMethodID(mid));
+    string_init = true;
+  }
+
   // Compute method information.
   const DexFile::CodeItem* code_item = called_method->GetCodeItem();
   const uint16_t num_ins = (is_range) ? inst->VRegA_3rc(inst_data) : inst->VRegA_35c(inst_data);
   uint16_t num_regs;
   if (LIKELY(code_item != nullptr)) {
     num_regs = code_item->registers_size_;
-    DCHECK_EQ(num_ins, code_item->ins_size_);
+    DCHECK_EQ(string_init ? num_ins - 1 : num_ins, code_item->ins_size_);
   } else {
     DCHECK(called_method->IsNative() || called_method->IsProxyMethod());
     num_regs = num_ins;
+    if (string_init) {
+      // The new StringFactory call is static and has one fewer argument.
+      num_regs--;
+    }
   }
 
   // Allocate shadow frame on the stack.
@@ -504,7 +518,7 @@
                                                     memory));
 
   // Initialize new shadow frame.
-  const size_t first_dest_reg = num_regs - num_ins;
+  size_t first_dest_reg = num_regs - num_ins;
   if (do_assignability_check) {
     // Slow path.
     // We might need to do class loading, which incurs a thread state change to kNative. So
@@ -536,6 +550,10 @@
       new_shadow_frame->SetVRegReference(dest_reg, shadow_frame.GetVRegReference(receiver_reg));
       ++dest_reg;
       ++arg_offset;
+    } else if (string_init) {
+      // Skip the referrer for the new static StringFactory call.
+      ++dest_reg;
+      ++arg_offset;
     }
     for (uint32_t shorty_pos = 0; dest_reg < num_regs; ++shorty_pos, ++dest_reg, ++arg_offset) {
       DCHECK_LT(shorty_pos + 1, shorty_len);
@@ -583,7 +601,12 @@
   } else {
     // Fast path: no extra checks.
     if (is_range) {
-      const uint16_t first_src_reg = inst->VRegC_3rc();
+      uint16_t first_src_reg = inst->VRegC_3rc();
+      if (string_init) {
+        // Skip the referrer for the new static StringFactory call.
+        ++first_src_reg;
+        ++first_dest_reg;
+      }
       for (size_t src_reg = first_src_reg, dest_reg = first_dest_reg; dest_reg < num_regs;
           ++dest_reg, ++src_reg) {
         AssignRegister(new_shadow_frame, shadow_frame, dest_reg, src_reg);
@@ -592,12 +615,18 @@
       DCHECK_LE(num_ins, 5U);
       uint16_t regList = inst->Fetch16(2);
       uint16_t count = num_ins;
+      size_t arg_index = 0;
+      if (string_init) {
+        // Skip the referrer for the new static StringFactory call.
+        regList >>= 4;
+        ++arg_index;
+      }
       if (count == 5) {
         AssignRegister(new_shadow_frame, shadow_frame, first_dest_reg + 4U,
                        (inst_data >> 8) & 0x0f);
         --count;
        }
-      for (size_t arg_index = 0; arg_index < count; ++arg_index, regList >>= 4) {
+      for (; arg_index < count; ++arg_index, regList >>= 4) {
         AssignRegister(new_shadow_frame, shadow_frame, first_dest_reg + arg_index, regList & 0x0f);
       }
     }
@@ -631,6 +660,38 @@
   } else {
     UnstartedRuntimeInvoke(self, code_item, new_shadow_frame, result, first_dest_reg);
   }
+
+  if (string_init && !self->IsExceptionPending()) {
+    // Set the new string result of the StringFactory.
+    uint32_t vregC = (is_range) ? inst->VRegC_3rc() : inst->VRegC_35c();
+    shadow_frame.SetVRegReference(vregC, result->GetL());
+    // Overwrite all potential copies of the original result of the new-instance of string with the
+    // new result of the StringFactory. Use the verifier to find this set of registers.
+    mirror::ArtMethod* method = shadow_frame.GetMethod();
+    MethodReference method_ref = method->ToMethodReference();
+    SafeMap<uint32_t, std::set<uint32_t>> string_init_map;
+    SafeMap<uint32_t, std::set<uint32_t>>* string_init_map_ptr;
+    MethodRefToStringInitRegMap& method_to_string_init_map = Runtime::Current()->GetStringInitMap();
+    auto it = method_to_string_init_map.find(method_ref);
+    if (it == method_to_string_init_map.end()) {
+      string_init_map = std::move(verifier::MethodVerifier::FindStringInitMap(method));
+      method_to_string_init_map.Overwrite(method_ref, string_init_map);
+      string_init_map_ptr = &string_init_map;
+    } else {
+      string_init_map_ptr = &it->second;
+    }
+    if (string_init_map_ptr->size() != 0) {
+      uint32_t dex_pc = shadow_frame.GetDexPC();
+      auto map_it = string_init_map_ptr->find(dex_pc);
+      if (map_it != string_init_map_ptr->end()) {
+        const std::set<uint32_t>& reg_set = map_it->second;
+        for (auto set_it = reg_set.begin(); set_it != reg_set.end(); ++set_it) {
+          shadow_frame.SetVRegReference(*set_it, result->GetL());
+        }
+      }
+    }
+  }
+
   return !self->IsExceptionPending();
 }
 
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index dbedc16..6acc72e 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -369,7 +369,7 @@
       oss << StringPrintf(" vreg%u=0x%08X", i, raw_value);
       if (ref_value != nullptr) {
         if (ref_value->GetClass()->IsStringClass() &&
-            ref_value->AsString()->GetCharArray() != nullptr) {
+            ref_value->AsString()->GetValue() != nullptr) {
           oss << "/java.lang.String \"" << ref_value->AsString()->ToModifiedUtf8() << "\"";
         } else {
           oss << "/" << PrettyTypeOf(ref_value);
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index dc0b687..878efba 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -526,10 +526,20 @@
   HANDLE_INSTRUCTION_END();
 
   HANDLE_INSTRUCTION_START(NEW_INSTANCE) {
-    Runtime* runtime = Runtime::Current();
-    Object* obj = AllocObjectFromCode<do_access_check, true>(
-        inst->VRegB_21c(), shadow_frame.GetMethod(), self,
-        runtime->GetHeap()->GetCurrentAllocator());
+    Object* obj = nullptr;
+    Class* c = ResolveVerifyAndClinit(inst->VRegB_21c(), shadow_frame.GetMethod(),
+                                      self, false, do_access_check);
+    if (LIKELY(c != nullptr)) {
+      if (UNLIKELY(c->IsStringClass())) {
+        gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
+        mirror::SetStringCountVisitor visitor(0);
+        obj = String::Alloc<true>(self, 0, allocator_type, visitor);
+      } else {
+        obj = AllocObjectFromCode<do_access_check, true>(
+            inst->VRegB_21c(), shadow_frame.GetMethod(), self,
+            Runtime::Current()->GetHeap()->GetCurrentAllocator());
+      }
+    }
     if (UNLIKELY(obj == nullptr)) {
       HANDLE_PENDING_EXCEPTION();
     } else {
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index 82f0009..a5e5299 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -428,10 +428,20 @@
       }
       case Instruction::NEW_INSTANCE: {
         PREAMBLE();
-        Runtime* runtime = Runtime::Current();
-        Object* obj = AllocObjectFromCode<do_access_check, true>(
-            inst->VRegB_21c(), shadow_frame.GetMethod(), self,
-            runtime->GetHeap()->GetCurrentAllocator());
+        Object* obj = nullptr;
+        Class* c = ResolveVerifyAndClinit(inst->VRegB_21c(), shadow_frame.GetMethod(),
+                                          self, false, do_access_check);
+        if (LIKELY(c != nullptr)) {
+          if (UNLIKELY(c->IsStringClass())) {
+            gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
+            mirror::SetStringCountVisitor visitor(0);
+            obj = String::Alloc<true>(self, 0, allocator_type, visitor);
+          } else {
+            obj = AllocObjectFromCode<do_access_check, true>(
+              inst->VRegB_21c(), shadow_frame.GetMethod(), self,
+              Runtime::Current()->GetHeap()->GetCurrentAllocator());
+          }
+        }
         if (UNLIKELY(obj == nullptr)) {
           HANDLE_PENDING_EXCEPTION();
         } else {
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index f5a3a6b..fc3826b 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -573,6 +573,12 @@
     if (c == nullptr) {
       return nullptr;
     }
+    if (c->IsStringClass()) {
+      gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
+      mirror::SetStringCountVisitor visitor(0);
+      return soa.AddLocalReference<jobject>(mirror::String::Alloc<true>(soa.Self(), 0,
+                                                                        allocator_type, visitor));
+    }
     return soa.AddLocalReference<jobject>(c->AllocObject(soa.Self()));
   }
 
@@ -594,6 +600,11 @@
     if (c == nullptr) {
       return nullptr;
     }
+    if (c->IsStringClass()) {
+      // Replace calls to String.<init> with equivalent StringFactory call.
+      jmethodID sf_mid = WellKnownClasses::StringInitToStringFactoryMethodID(mid);
+      return CallStaticObjectMethodV(env, WellKnownClasses::java_lang_StringFactory, sf_mid, args);
+    }
     mirror::Object* result = c->AllocObject(soa.Self());
     if (result == nullptr) {
       return nullptr;
@@ -614,6 +625,11 @@
     if (c == nullptr) {
       return nullptr;
     }
+    if (c->IsStringClass()) {
+      // Replace calls to String.<init> with equivalent StringFactory call.
+      jmethodID sf_mid = WellKnownClasses::StringInitToStringFactoryMethodID(mid);
+      return CallStaticObjectMethodA(env, WellKnownClasses::java_lang_StringFactory, sf_mid, args);
+    }
     mirror::Object* result = c->AllocObject(soa.Self());
     if (result == nullptr) {
       return nullptr;
@@ -1649,7 +1665,7 @@
       ThrowSIOOBE(soa, start, length, s->GetLength());
     } else {
       CHECK_NON_NULL_MEMCPY_ARGUMENT(length, buf);
-      const jchar* chars = s->GetCharArray()->GetData() + s->GetOffset();
+      const jchar* chars = s->GetValue();
       memcpy(buf, chars + start, length * sizeof(jchar));
     }
   }
@@ -1663,7 +1679,7 @@
       ThrowSIOOBE(soa, start, length, s->GetLength());
     } else {
       CHECK_NON_NULL_MEMCPY_ARGUMENT(length, buf);
-      const jchar* chars = s->GetCharArray()->GetData() + s->GetOffset();
+      const jchar* chars = s->GetValue();
       ConvertUtf16ToModifiedUtf8(buf, chars + start, length);
     }
   }
@@ -1672,33 +1688,26 @@
     CHECK_NON_NULL_ARGUMENT(java_string);
     ScopedObjectAccess soa(env);
     mirror::String* s = soa.Decode<mirror::String*>(java_string);
-    mirror::CharArray* chars = s->GetCharArray();
     gc::Heap* heap = Runtime::Current()->GetHeap();
-    if (heap->IsMovableObject(chars)) {
+    if (heap->IsMovableObject(s)) {
+      jchar* chars = new jchar[s->GetLength()];
+      memcpy(chars, s->GetValue(), sizeof(jchar) * s->GetLength());
       if (is_copy != nullptr) {
         *is_copy = JNI_TRUE;
       }
-      int32_t char_count = s->GetLength();
-      int32_t offset = s->GetOffset();
-      jchar* bytes = new jchar[char_count];
-      for (int32_t i = 0; i < char_count; i++) {
-        bytes[i] = chars->Get(i + offset);
-      }
-      return bytes;
-    } else {
-      if (is_copy != nullptr) {
-        *is_copy = JNI_FALSE;
-      }
-      return static_cast<jchar*>(chars->GetData() + s->GetOffset());
+      return chars;
     }
+    if (is_copy != nullptr) {
+      *is_copy = JNI_FALSE;
+    }
+    return static_cast<jchar*>(s->GetValue());
   }
 
   static void ReleaseStringChars(JNIEnv* env, jstring java_string, const jchar* chars) {
     CHECK_NON_NULL_ARGUMENT_RETURN_VOID(java_string);
     ScopedObjectAccess soa(env);
     mirror::String* s = soa.Decode<mirror::String*>(java_string);
-    mirror::CharArray* s_chars = s->GetCharArray();
-    if (chars != (s_chars->GetData() + s->GetOffset())) {
+    if (chars != s->GetValue()) {
       delete[] chars;
     }
   }
@@ -1707,18 +1716,16 @@
     CHECK_NON_NULL_ARGUMENT(java_string);
     ScopedObjectAccess soa(env);
     mirror::String* s = soa.Decode<mirror::String*>(java_string);
-    mirror::CharArray* chars = s->GetCharArray();
-    int32_t offset = s->GetOffset();
     gc::Heap* heap = Runtime::Current()->GetHeap();
-    if (heap->IsMovableObject(chars)) {
+    if (heap->IsMovableObject(s)) {
       StackHandleScope<1> hs(soa.Self());
-      HandleWrapper<mirror::CharArray> h(hs.NewHandleWrapper(&chars));
+      HandleWrapper<mirror::String> h(hs.NewHandleWrapper(&s));
       heap->IncrementDisableMovingGC(soa.Self());
     }
     if (is_copy != nullptr) {
       *is_copy = JNI_FALSE;
     }
-    return static_cast<jchar*>(chars->GetData() + offset);
+    return static_cast<jchar*>(s->GetValue());
   }
 
   static void ReleaseStringCritical(JNIEnv* env, jstring java_string, const jchar* chars) {
@@ -1727,8 +1734,7 @@
     ScopedObjectAccess soa(env);
     gc::Heap* heap = Runtime::Current()->GetHeap();
     mirror::String* s = soa.Decode<mirror::String*>(java_string);
-    mirror::CharArray* s_chars = s->GetCharArray();
-    if (heap->IsMovableObject(s_chars)) {
+    if (heap->IsMovableObject(s)) {
       heap->DecrementDisableMovingGC(soa.Self());
     }
   }
@@ -1745,7 +1751,7 @@
     size_t byte_count = s->GetUtfLength();
     char* bytes = new char[byte_count + 1];
     CHECK(bytes != nullptr);  // bionic aborts anyway.
-    const uint16_t* chars = s->GetCharArray()->GetData() + s->GetOffset();
+    const uint16_t* chars = s->GetValue();
     ConvertUtf16ToModifiedUtf8(bytes, chars, s->GetLength());
     bytes[byte_count] = '\0';
     return bytes;
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index 77db404..3d14a4e 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -625,8 +625,6 @@
   // ...whose fields haven't been initialized because
   // we didn't call a constructor.
   ASSERT_EQ(0, env_->GetIntField(o, env_->GetFieldID(c, "count", "I")));
-  ASSERT_EQ(0, env_->GetIntField(o, env_->GetFieldID(c, "offset", "I")));
-  ASSERT_TRUE(env_->GetObjectField(o, env_->GetFieldID(c, "value", "[C")) == nullptr);
 }
 
 TEST_F(JniInternalTest, GetVersion) {
@@ -860,7 +858,9 @@
   jstring s = reinterpret_cast<jstring>(env_->AllocObject(c));
   ASSERT_NE(s, nullptr);
   env_->CallVoidMethod(s, mid2);
-  ASSERT_EQ(JNI_FALSE, env_->ExceptionCheck());
+  // With the string change, this should now throw an UnsupportedOperationException.
+  ASSERT_EQ(JNI_TRUE, env_->ExceptionCheck());
+  env_->ExceptionClear();
 
   mid = env_->GetMethodID(c, "length", "()I");
   ASSERT_NE(mid, nullptr);
@@ -1538,7 +1538,7 @@
 
   jboolean is_copy = JNI_FALSE;
   chars = env_->GetStringChars(s, &is_copy);
-  if (Runtime::Current()->GetHeap()->IsMovableObject(s_m->GetCharArray())) {
+  if (Runtime::Current()->GetHeap()->IsMovableObject(s_m)) {
     EXPECT_EQ(JNI_TRUE, is_copy);
   } else {
     EXPECT_EQ(JNI_FALSE, is_copy);
diff --git a/runtime/lock_word-inl.h b/runtime/lock_word-inl.h
index d831bfb..341501b 100644
--- a/runtime/lock_word-inl.h
+++ b/runtime/lock_word-inl.h
@@ -53,6 +53,7 @@
 inline LockWord::LockWord(Monitor* mon, uint32_t rb_state)
     : value_(mon->GetMonitorId() | (rb_state << kReadBarrierStateShift) |
              (kStateFat << kStateShift)) {
+  DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U);
 #ifndef __LP64__
   DCHECK_ALIGNED(mon, kMonitorIdAlignment);
 #endif
diff --git a/runtime/lock_word.h b/runtime/lock_word.h
index 46c3bd4..655aa3a 100644
--- a/runtime/lock_word.h
+++ b/runtime/lock_word.h
@@ -94,6 +94,7 @@
     kReadBarrierStateMaskShiftedToggled = ~kReadBarrierStateMaskShifted,
 
     // When the state is kHashCode, the non-state bits hold the hashcode.
+    // Note Object.hashCode() has the hash code layout hardcoded.
     kHashShift = 0,
     kHashSize = 32 - kStateSize - kReadBarrierStateSize,
     kHashMask = (1 << kHashSize) - 1,
@@ -110,6 +111,7 @@
   static LockWord FromThinLockId(uint32_t thread_id, uint32_t count, uint32_t rb_state) {
     CHECK_LE(thread_id, static_cast<uint32_t>(kThinLockMaxOwner));
     CHECK_LE(count, static_cast<uint32_t>(kThinLockMaxCount));
+    DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U);
     return LockWord((thread_id << kThinLockOwnerShift) | (count << kThinLockCountShift) |
                     (rb_state << kReadBarrierStateShift) |
                     (kStateThinOrUnlocked << kStateShift));
@@ -122,12 +124,14 @@
 
   static LockWord FromHashCode(uint32_t hash_code, uint32_t rb_state) {
     CHECK_LE(hash_code, static_cast<uint32_t>(kMaxHash));
+    DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U);
     return LockWord((hash_code << kHashShift) |
                     (rb_state << kReadBarrierStateShift) |
                     (kStateHash << kStateShift));
   }
 
   static LockWord FromDefault(uint32_t rb_state) {
+    DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U);
     return LockWord(rb_state << kReadBarrierStateShift);
   }
 
@@ -149,7 +153,8 @@
 
   LockState GetState() const {
     CheckReadBarrierState();
-    if (UNLIKELY(value_ == 0)) {
+    if ((!kUseReadBarrier && UNLIKELY(value_ == 0)) ||
+        (kUseReadBarrier && UNLIKELY((value_ & kReadBarrierStateMaskShiftedToggled) == 0))) {
       return kUnlocked;
     } else {
       uint32_t internal_state = (value_ >> kStateShift) & kStateMask;
@@ -171,6 +176,14 @@
     return (value_ >> kReadBarrierStateShift) & kReadBarrierStateMask;
   }
 
+  void SetReadBarrierState(uint32_t rb_state) {
+    DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U);
+    DCHECK_NE(static_cast<uint32_t>(GetState()), static_cast<uint32_t>(kForwardingAddress));
+    // Clear and or the bits.
+    value_ &= ~(kReadBarrierStateMask << kReadBarrierStateShift);
+    value_ |= (rb_state & kReadBarrierStateMask) << kReadBarrierStateShift;
+  }
+
   // Return the owner thin lock thread id.
   uint32_t ThinLockOwner() const;
 
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 712286f..cc6f5c4 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -548,6 +548,10 @@
       << PrettyClass(this)
       << "A class object shouldn't be allocated through this "
       << "as it requires a pre-fence visitor that sets the class size.";
+  DCHECK(!IsStringClass())
+      << PrettyClass(this)
+      << "A string shouldn't be allocated through this "
+      << "as it requires a pre-fence visitor that sets the class size.";
   DCHECK(IsInstantiable()) << PrettyClass(this);
   // TODO: decide whether we want this check. It currently fails during bootstrap.
   // DCHECK(!Runtime::Current()->IsStarted() || IsInitializing()) << PrettyClass(this);
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 1739019..56c586a 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -330,10 +330,6 @@
   return IsInSamePackage(klass1->GetDescriptor(&temp1), klass2->GetDescriptor(&temp2));
 }
 
-bool Class::IsStringClass() const {
-  return this == String::GetJavaLangString();
-}
-
 bool Class::IsThrowableClass() {
   return WellKnownClasses::ToClass(WellKnownClasses::java_lang_Throwable)->IsAssignableFrom(this);
 }
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 18496fd..d3cfd01 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -235,6 +235,15 @@
     SetAccessFlags(flags | kAccClassIsFinalizable);
   }
 
+  ALWAYS_INLINE bool IsStringClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return (GetField32(AccessFlagsOffset()) & kAccClassIsStringClass) != 0;
+  }
+
+  ALWAYS_INLINE void SetStringClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    uint32_t flags = GetField32(OFFSET_OF_OBJECT_MEMBER(Class, access_flags_));
+    SetAccessFlags(flags | kAccClassIsStringClass);
+  }
+
   // Returns true if the class is abstract.
   ALWAYS_INLINE bool IsAbstract() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccAbstract) != 0;
@@ -416,8 +425,6 @@
            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsClassClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool IsStringClass() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   bool IsThrowableClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
@@ -484,10 +491,10 @@
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsVariableSize() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    // Classes and arrays vary in size, and so the object_size_ field cannot
+    // Classes, arrays, and strings vary in size, and so the object_size_ field cannot
     // be used to Get their instance size
     return IsClassClass<kVerifyFlags, kReadBarrierOption>() ||
-        IsArrayClass<kVerifyFlags, kReadBarrierOption>();
+        IsArrayClass<kVerifyFlags, kReadBarrierOption>() || IsStringClass();
   }
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index 2581fad..39d0f56 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -28,8 +28,9 @@
 #include "monitor.h"
 #include "object_array-inl.h"
 #include "read_barrier-inl.h"
-#include "runtime.h"
 #include "reference.h"
+#include "runtime.h"
+#include "string-inl.h"
 #include "throwable.h"
 
 namespace art {
@@ -115,8 +116,11 @@
 }
 
 inline Object* Object::GetReadBarrierPointer() {
-#ifdef USE_BAKER_OR_BROOKS_READ_BARRIER
-  DCHECK(kUseBakerOrBrooksReadBarrier);
+#ifdef USE_BAKER_READ_BARRIER
+  DCHECK(kUseBakerReadBarrier);
+  return reinterpret_cast<Object*>(GetLockWord(false).ReadBarrierState());
+#elif USE_BROOKS_READ_BARRIER
+  DCHECK(kUseBrooksReadBarrier);
   return GetFieldObject<Object, kVerifyNone, kWithoutReadBarrier>(
       OFFSET_OF_OBJECT_MEMBER(Object, x_rb_ptr_));
 #else
@@ -126,8 +130,14 @@
 }
 
 inline void Object::SetReadBarrierPointer(Object* rb_ptr) {
-#ifdef USE_BAKER_OR_BROOKS_READ_BARRIER
-  DCHECK(kUseBakerOrBrooksReadBarrier);
+#ifdef USE_BAKER_READ_BARRIER
+  DCHECK(kUseBakerReadBarrier);
+  DCHECK_EQ(reinterpret_cast<uint64_t>(rb_ptr) >> 32, 0U);
+  LockWord lw = GetLockWord(false);
+  lw.SetReadBarrierState(static_cast<uint32_t>(reinterpret_cast<uintptr_t>(rb_ptr)));
+  SetLockWord(lw, false);
+#elif USE_BROOKS_READ_BARRIER
+  DCHECK(kUseBrooksReadBarrier);
   // We don't mark the card as this occurs as part of object allocation. Not all objects have
   // backing cards, such as large objects.
   SetFieldObjectWithoutWriteBarrier<false, false, kVerifyNone>(
@@ -140,8 +150,27 @@
 }
 
 inline bool Object::AtomicSetReadBarrierPointer(Object* expected_rb_ptr, Object* rb_ptr) {
-#ifdef USE_BAKER_OR_BROOKS_READ_BARRIER
-  DCHECK(kUseBakerOrBrooksReadBarrier);
+#ifdef USE_BAKER_READ_BARRIER
+  DCHECK(kUseBakerReadBarrier);
+  DCHECK_EQ(reinterpret_cast<uint64_t>(expected_rb_ptr) >> 32, 0U);
+  DCHECK_EQ(reinterpret_cast<uint64_t>(rb_ptr) >> 32, 0U);
+  LockWord expected_lw;
+  LockWord new_lw;
+  do {
+    LockWord lw = GetLockWord(false);
+    if (UNLIKELY(reinterpret_cast<Object*>(lw.ReadBarrierState()) != expected_rb_ptr)) {
+      // Lost the race.
+      return false;
+    }
+    expected_lw = lw;
+    expected_lw.SetReadBarrierState(
+        static_cast<uint32_t>(reinterpret_cast<uintptr_t>(expected_rb_ptr)));
+    new_lw = lw;
+    new_lw.SetReadBarrierState(static_cast<uint32_t>(reinterpret_cast<uintptr_t>(rb_ptr)));
+  } while (!CasLockWordWeakSequentiallyConsistent(expected_lw, new_lw));
+  return true;
+#elif USE_BROOKS_READ_BARRIER
+  DCHECK(kUseBrooksReadBarrier);
   MemberOffset offset = OFFSET_OF_OBJECT_MEMBER(Object, x_rb_ptr_);
   uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + offset.SizeValue();
   Atomic<uint32_t>* atomic_rb_ptr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
@@ -337,9 +366,14 @@
   return down_cast<DoubleArray*>(this);
 }
 
-template<VerifyObjectFlags kVerifyFlags>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
+inline bool Object::IsString() {
+  return GetClass<kVerifyFlags, kReadBarrierOption>()->IsStringClass();
+}
+
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline String* Object::AsString() {
-  DCHECK(GetClass<kVerifyFlags>()->IsStringClass());
+  DCHECK((IsString<kVerifyFlags, kReadBarrierOption>()));
   return down_cast<String*>(this);
 }
 
@@ -385,6 +419,9 @@
   } else if (IsClass<kNewFlags, kReadBarrierOption>()) {
     result = AsClass<kNewFlags, kReadBarrierOption>()->
         template SizeOf<kNewFlags, kReadBarrierOption>();
+  } else if (GetClass<kNewFlags, kReadBarrierOption>()->IsStringClass()) {
+    result = AsString<kNewFlags, kReadBarrierOption>()->
+        template SizeOf<kNewFlags>();
   } else {
     result = GetClass<kNewFlags, kReadBarrierOption>()->
         template GetObjectSize<kNewFlags, kReadBarrierOption>();
@@ -947,7 +984,7 @@
   mirror::Class* klass = GetClass<kVerifyFlags>();
   if (klass == Class::GetJavaLangClass()) {
     AsClass<kVerifyNone>()->VisitReferences<kVisitClass>(klass, visitor);
-  } else if (klass->IsArrayClass()) {
+  } else if (klass->IsArrayClass() || klass->IsStringClass()) {
     if (klass->IsObjectArrayClass<kVerifyNone>()) {
       AsObjectArray<mirror::Object, kVerifyNone>()->VisitReferences<kVisitClass>(visitor);
     } else if (kVisitClass) {
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index 343c9bc..f2d879f 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -62,7 +62,7 @@
 static constexpr bool kCheckFieldAssignments = false;
 
 // Size of Object.
-static constexpr uint32_t kObjectHeaderSize = kUseBakerOrBrooksReadBarrier ? 16 : 8;
+static constexpr uint32_t kObjectHeaderSize = kUseBrooksReadBarrier ? 16 : 8;
 
 // C++ mirror of java.lang.Object
 class MANAGED LOCKABLE Object {
@@ -94,6 +94,9 @@
   NO_RETURN
 #endif
   void SetReadBarrierPointer(Object* rb_ptr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+#ifndef USE_BAKER_OR_BROOKS_READ_BARRIER
+  NO_RETURN
+#endif
   bool AtomicSetReadBarrierPointer(Object* expected_rb_ptr, Object* rb_ptr)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void AssertReadBarrierPointer() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -182,7 +185,12 @@
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   DoubleArray* AsDoubleArray() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
+  bool IsString() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   String* AsString() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
@@ -502,11 +510,11 @@
   // Monitor and hash code information.
   uint32_t monitor_;
 
-#ifdef USE_BAKER_OR_BROOKS_READ_BARRIER
+#ifdef USE_BROOKS_READ_BARRIER
   // Note names use a 'x' prefix and the x_rb_ptr_ is of type int
   // instead of Object to go with the alphabetical/by-type field order
   // on the Java side.
-  uint32_t x_rb_ptr_;      // For the Baker or Brooks pointer.
+  uint32_t x_rb_ptr_;      // For the Brooks pointer.
   uint32_t x_xpadding_;    // For 8-byte alignment. TODO: get rid of this.
 #endif
 
diff --git a/runtime/mirror/object_test.cc b/runtime/mirror/object_test.cc
index 2262af5..8e50a7a 100644
--- a/runtime/mirror/object_test.cc
+++ b/runtime/mirror/object_test.cc
@@ -61,14 +61,13 @@
     Handle<String> string(
         hs.NewHandle(String::AllocFromModifiedUtf8(self, expected_utf16_length, utf8_in)));
     ASSERT_EQ(expected_utf16_length, string->GetLength());
-    ASSERT_TRUE(string->GetCharArray() != nullptr);
-    ASSERT_TRUE(string->GetCharArray()->GetData() != nullptr);
+    ASSERT_TRUE(string->GetValue() != nullptr);
     // strlen is necessary because the 1-character string "\x00\x00" is interpreted as ""
     ASSERT_TRUE(string->Equals(utf8_in) || (expected_utf16_length == 1 && strlen(utf8_in) == 0));
     ASSERT_TRUE(string->Equals(StringPiece(utf8_in)) ||
                 (expected_utf16_length == 1 && strlen(utf8_in) == 0));
     for (int32_t i = 0; i < expected_utf16_length; i++) {
-      EXPECT_EQ(utf16_expected[i], string->UncheckedCharAt(i));
+      EXPECT_EQ(utf16_expected[i], string->CharAt(i));
     }
     EXPECT_EQ(expected_hash, string->GetHashCode());
   }
@@ -491,12 +490,6 @@
   Handle<String> string(hs.NewHandle(String::AllocFromModifiedUtf8(soa.Self(), "android")));
   EXPECT_EQ(string->GetLength(), 7);
   EXPECT_EQ(string->GetUtfLength(), 7);
-
-  string->SetOffset(2);
-  string->SetCount(5);
-  EXPECT_TRUE(string->Equals("droid"));
-  EXPECT_EQ(string->GetLength(), 5);
-  EXPECT_EQ(string->GetUtfLength(), 5);
 }
 
 TEST_F(ObjectTest, DescriptorCompare) {
diff --git a/runtime/mirror/string-inl.h b/runtime/mirror/string-inl.h
index b367cff..cd5d2f6 100644
--- a/runtime/mirror/string-inl.h
+++ b/runtime/mirror/string-inl.h
@@ -19,6 +19,7 @@
 
 #include "array.h"
 #include "class.h"
+#include "gc/heap-inl.h"
 #include "intern_table.h"
 #include "runtime.h"
 #include "string.h"
@@ -29,41 +30,173 @@
 namespace mirror {
 
 inline uint32_t String::ClassSize() {
-  uint32_t vtable_entries = Object::kVTableLength + 51;
+  uint32_t vtable_entries = Object::kVTableLength + 52;
   return Class::ComputeClassSize(true, vtable_entries, 0, 1, 0, 1, 2);
 }
 
-inline uint16_t String::UncheckedCharAt(int32_t index) {
-  return GetCharArray()->Get(index + GetOffset());
-}
+// Sets string count in the allocation code path to ensure it is guarded by a CAS.
+class SetStringCountVisitor {
+ public:
+  explicit SetStringCountVisitor(int32_t count) : count_(count) {
+  }
 
-inline CharArray* String::GetCharArray() {
-  return GetFieldObject<CharArray>(ValueOffset());
-}
+  void operator()(Object* obj, size_t usable_size ATTRIBUTE_UNUSED) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    // Avoid AsString as object is not yet in live bitmap or allocation stack.
+    String* string = down_cast<String*>(obj);
+    string->SetCount(count_);
+  }
 
-inline int32_t String::GetLength() {
-  int32_t result = GetField32(OFFSET_OF_OBJECT_MEMBER(String, count_));
-  DCHECK(result >= 0 && result <= GetCharArray()->GetLength());
-  return result;
-}
+ private:
+  const int32_t count_;
+};
 
-inline void String::SetArray(CharArray* new_array) {
-  // Array is invariant so use non-transactional mode. Also disable check as we may run inside
-  // a transaction.
-  DCHECK(new_array != nullptr);
-  SetFieldObject<false, false>(OFFSET_OF_OBJECT_MEMBER(String, array_), new_array);
-}
+// Sets string count and value in the allocation code path to ensure it is guarded by a CAS.
+class SetStringCountAndBytesVisitor {
+ public:
+  SetStringCountAndBytesVisitor(int32_t count, Handle<ByteArray> src_array, int32_t offset,
+                                int32_t high_byte)
+      : count_(count), src_array_(src_array), offset_(offset), high_byte_(high_byte) {
+  }
+
+  void operator()(Object* obj, size_t usable_size ATTRIBUTE_UNUSED) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    // Avoid AsString as object is not yet in live bitmap or allocation stack.
+    String* string = down_cast<String*>(obj);
+    string->SetCount(count_);
+    uint16_t* value = string->GetValue();
+    const uint8_t* const src = reinterpret_cast<uint8_t*>(src_array_->GetData()) + offset_;
+    for (int i = 0; i < count_; i++) {
+      value[i] = high_byte_ + (src[i] & 0xFF);
+    }
+  }
+
+ private:
+  const int32_t count_;
+  Handle<ByteArray> src_array_;
+  const int32_t offset_;
+  const int32_t high_byte_;
+};
+
+// Sets string count and value in the allocation code path to ensure it is guarded by a CAS.
+class SetStringCountAndValueVisitorFromCharArray {
+ public:
+  SetStringCountAndValueVisitorFromCharArray(int32_t count, Handle<CharArray> src_array,
+                                             int32_t offset) :
+    count_(count), src_array_(src_array), offset_(offset) {
+  }
+
+  void operator()(Object* obj, size_t usable_size ATTRIBUTE_UNUSED) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    // Avoid AsString as object is not yet in live bitmap or allocation stack.
+    String* string = down_cast<String*>(obj);
+    string->SetCount(count_);
+    const uint16_t* const src = src_array_->GetData() + offset_;
+    memcpy(string->GetValue(), src, count_ * sizeof(uint16_t));
+  }
+
+ private:
+  const int32_t count_;
+  Handle<CharArray> src_array_;
+  const int32_t offset_;
+};
+
+// Sets string count and value in the allocation code path to ensure it is guarded by a CAS.
+class SetStringCountAndValueVisitorFromString {
+ public:
+  SetStringCountAndValueVisitorFromString(int32_t count, Handle<String> src_string,
+                                          int32_t offset) :
+    count_(count), src_string_(src_string), offset_(offset) {
+  }
+
+  void operator()(Object* obj, size_t usable_size ATTRIBUTE_UNUSED) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    // Avoid AsString as object is not yet in live bitmap or allocation stack.
+    String* string = down_cast<String*>(obj);
+    string->SetCount(count_);
+    const uint16_t* const src = src_string_->GetValue() + offset_;
+    memcpy(string->GetValue(), src, count_ * sizeof(uint16_t));
+  }
+
+ private:
+  const int32_t count_;
+  Handle<String> src_string_;
+  const int32_t offset_;
+};
 
 inline String* String::Intern() {
   return Runtime::Current()->GetInternTable()->InternWeak(this);
 }
 
+inline uint16_t String::CharAt(int32_t index) {
+  int32_t count = GetField32(OFFSET_OF_OBJECT_MEMBER(String, count_));
+  if (UNLIKELY((index < 0) || (index >= count))) {
+    Thread* self = Thread::Current();
+    self->ThrowNewExceptionF("Ljava/lang/StringIndexOutOfBoundsException;",
+                             "length=%i; index=%i", count, index);
+    return 0;
+  }
+  return GetValue()[index];
+}
+
+template<VerifyObjectFlags kVerifyFlags>
+inline size_t String::SizeOf() {
+  return sizeof(String) + (sizeof(uint16_t) * GetLength<kVerifyFlags>());
+}
+
+template <bool kIsInstrumented, typename PreFenceVisitor>
+inline String* String::Alloc(Thread* self, int32_t utf16_length, gc::AllocatorType allocator_type,
+                             const PreFenceVisitor& pre_fence_visitor) {
+  size_t header_size = sizeof(String);
+  size_t data_size = sizeof(uint16_t) * utf16_length;
+  size_t size = header_size + data_size;
+  Class* string_class = GetJavaLangString();
+
+  // Check for overflow and throw OutOfMemoryError if this was an unreasonable request.
+  if (UNLIKELY(size < data_size)) {
+    self->ThrowOutOfMemoryError(StringPrintf("%s of length %d would overflow",
+                                             PrettyDescriptor(string_class).c_str(),
+                                             utf16_length).c_str());
+    return nullptr;
+  }
+  gc::Heap* heap = Runtime::Current()->GetHeap();
+  return down_cast<String*>(
+      heap->AllocObjectWithAllocator<kIsInstrumented, false>(self, string_class, size,
+                                                             allocator_type, pre_fence_visitor));
+}
+
+template <bool kIsInstrumented>
+inline String* String::AllocFromByteArray(Thread* self, int32_t byte_length,
+                                          Handle<ByteArray> array, int32_t offset,
+                                          int32_t high_byte, gc::AllocatorType allocator_type) {
+  SetStringCountAndBytesVisitor visitor(byte_length, array, offset, high_byte << 8);
+  String* string = Alloc<kIsInstrumented>(self, byte_length, allocator_type, visitor);
+  return string;
+}
+
+template <bool kIsInstrumented>
+inline String* String::AllocFromCharArray(Thread* self, int32_t array_length,
+                                          Handle<CharArray> array, int32_t offset,
+                                          gc::AllocatorType allocator_type) {
+  SetStringCountAndValueVisitorFromCharArray visitor(array_length, array, offset);
+  String* new_string = Alloc<kIsInstrumented>(self, array_length, allocator_type, visitor);
+  return new_string;
+}
+
+template <bool kIsInstrumented>
+inline String* String::AllocFromString(Thread* self, int32_t string_length, Handle<String> string,
+                                       int32_t offset, gc::AllocatorType allocator_type) {
+  SetStringCountAndValueVisitorFromString visitor(string_length, string, offset);
+  String* new_string = Alloc<kIsInstrumented>(self, string_length, allocator_type, visitor);
+  return new_string;
+}
+
 inline int32_t String::GetHashCode() {
   int32_t result = GetField32(OFFSET_OF_OBJECT_MEMBER(String, hash_code_));
   if (UNLIKELY(result == 0)) {
     result = ComputeHashCode();
   }
-  DCHECK(result != 0 || ComputeUtf16Hash(GetCharArray(), GetOffset(), GetLength()) == 0)
+  DCHECK(result != 0 || ComputeUtf16Hash(GetValue(), GetLength()) == 0)
       << ToModifiedUtf8() << " " << result;
   return result;
 }
diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc
index b7fd240..b6236b1 100644
--- a/runtime/mirror/string.cc
+++ b/runtime/mirror/string.cc
@@ -20,10 +20,11 @@
 #include "array.h"
 #include "class-inl.h"
 #include "gc/accounting/card_table-inl.h"
+#include "handle_scope-inl.h"
 #include "intern_table.h"
 #include "object-inl.h"
 #include "runtime.h"
-#include "handle_scope-inl.h"
+#include "string-inl.h"
 #include "thread.h"
 #include "utf-inl.h"
 
@@ -40,7 +41,7 @@
   } else if (start > count) {
     start = count;
   }
-  const uint16_t* chars = GetCharArray()->GetData() + GetOffset();
+  const uint16_t* chars = GetValue();
   const uint16_t* p = chars + start;
   const uint16_t* end = chars + count;
   while (p < end) {
@@ -62,36 +63,46 @@
   java_lang_String_ = GcRoot<Class>(nullptr);
 }
 
-int32_t String::ComputeHashCode() {
-  const int32_t hash_code = ComputeUtf16Hash(GetCharArray(), GetOffset(), GetLength());
+int String::ComputeHashCode() {
+  const int32_t hash_code = ComputeUtf16Hash(GetValue(), GetLength());
   SetHashCode(hash_code);
   return hash_code;
 }
 
 int32_t String::GetUtfLength() {
-  return CountUtf8Bytes(GetCharArray()->GetData() + GetOffset(), GetLength());
+  return CountUtf8Bytes(GetValue(), GetLength());
 }
 
-String* String::AllocFromUtf16(Thread* self,
-                               int32_t utf16_length,
-                               const uint16_t* utf16_data_in,
-                               int32_t hash_code) {
+void String::SetCharAt(int32_t index, uint16_t c) {
+  DCHECK((index >= 0) && (index < count_));
+  GetValue()[index] = c;
+}
+
+String* String::AllocFromStrings(Thread* self, Handle<String> string, Handle<String> string2) {
+  int32_t length = string->GetLength();
+  int32_t length2 = string2->GetLength();
+  gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
+  SetStringCountVisitor visitor(length + length2);
+  String* new_string = Alloc<true>(self, length + length2, allocator_type, visitor);
+  if (UNLIKELY(new_string == nullptr)) {
+    return nullptr;
+  }
+  uint16_t* new_value = new_string->GetValue();
+  memcpy(new_value, string->GetValue(), length * sizeof(uint16_t));
+  memcpy(new_value + length, string2->GetValue(), length2 * sizeof(uint16_t));
+  return new_string;
+}
+
+String* String::AllocFromUtf16(Thread* self, int32_t utf16_length, const uint16_t* utf16_data_in) {
   CHECK(utf16_data_in != nullptr || utf16_length == 0);
-  String* string = Alloc(self, utf16_length);
+  gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
+  SetStringCountVisitor visitor(utf16_length);
+  String* string = Alloc<true>(self, utf16_length, allocator_type, visitor);
   if (UNLIKELY(string == nullptr)) {
     return nullptr;
   }
-  CharArray* array = const_cast<CharArray*>(string->GetCharArray());
-  if (UNLIKELY(array == nullptr)) {
-    return nullptr;
-  }
-  memcpy(array->GetData(), utf16_data_in, utf16_length * sizeof(uint16_t));
-  if (hash_code != 0) {
-    DCHECK_EQ(hash_code, ComputeUtf16Hash(utf16_data_in, utf16_length));
-    string->SetHashCode(hash_code);
-  } else {
-    string->ComputeHashCode();
-  }
+  uint16_t* array = string->GetValue();
+  memcpy(array, utf16_data_in, utf16_length * sizeof(uint16_t));
   return string;
 }
 
@@ -103,33 +114,14 @@
 
 String* String::AllocFromModifiedUtf8(Thread* self, int32_t utf16_length,
                                       const char* utf8_data_in) {
-  String* string = Alloc(self, utf16_length);
+  gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
+  SetStringCountVisitor visitor(utf16_length);
+  String* string = Alloc<true>(self, utf16_length, allocator_type, visitor);
   if (UNLIKELY(string == nullptr)) {
     return nullptr;
   }
-  uint16_t* utf16_data_out =
-      const_cast<uint16_t*>(string->GetCharArray()->GetData());
+  uint16_t* utf16_data_out = string->GetValue();
   ConvertModifiedUtf8ToUtf16(utf16_data_out, utf8_data_in);
-  string->ComputeHashCode();
-  return string;
-}
-
-String* String::Alloc(Thread* self, int32_t utf16_length) {
-  StackHandleScope<1> hs(self);
-  Handle<CharArray> array(hs.NewHandle(CharArray::Alloc(self, utf16_length)));
-  if (UNLIKELY(array.Get() == nullptr)) {
-    return nullptr;
-  }
-  return Alloc(self, array);
-}
-
-String* String::Alloc(Thread* self, Handle<CharArray> array) {
-  // Hold reference in case AllocObject causes GC.
-  String* string = down_cast<String*>(GetJavaLangString()->AllocObject(self));
-  if (LIKELY(string != nullptr)) {
-    string->SetArray(array.Get());
-    string->SetCount(array->GetLength());
-  }
   return string;
 }
 
@@ -147,7 +139,7 @@
     // Note: don't short circuit on hash code as we're presumably here as the
     // hash code was already equal
     for (int32_t i = 0; i < that->GetLength(); ++i) {
-      if (this->UncheckedCharAt(i) != that->UncheckedCharAt(i)) {
+      if (this->CharAt(i) != that->CharAt(i)) {
         return false;
       }
     }
@@ -160,7 +152,7 @@
     return false;
   } else {
     for (int32_t i = 0; i < that_length; ++i) {
-      if (this->UncheckedCharAt(i) != that_chars[that_offset + i]) {
+      if (this->CharAt(i) != that_chars[that_offset + i]) {
         return false;
       }
     }
@@ -177,7 +169,7 @@
       return false;
     }
 
-    if (GetLeadingUtf16Char(ch) != UncheckedCharAt(i++)) {
+    if (GetLeadingUtf16Char(ch) != CharAt(i++)) {
       return false;
     }
 
@@ -187,7 +179,7 @@
         return false;
       }
 
-      if (UncheckedCharAt(i++) != trailing) {
+      if (CharAt(i++) != trailing) {
         return false;
       }
     }
@@ -201,7 +193,7 @@
   for (int32_t i = 0; i < length; ++i) {
     uint32_t ch = GetUtf16FromUtf8(&p);
 
-    if (GetLeadingUtf16Char(ch) != UncheckedCharAt(i)) {
+    if (GetLeadingUtf16Char(ch) != CharAt(i)) {
       return false;
     }
 
@@ -211,7 +203,7 @@
         return false;
       }
 
-      if (UncheckedCharAt(++i) != trailing) {
+      if (CharAt(++i) != trailing) {
         return false;
       }
     }
@@ -221,7 +213,7 @@
 
 // Create a modified UTF-8 encoded std::string from a java/lang/String object.
 std::string String::ToModifiedUtf8() {
-  const uint16_t* chars = GetCharArray()->GetData() + GetOffset();
+  const uint16_t* chars = GetValue();
   size_t byte_count = GetUtfLength();
   std::string result(byte_count, static_cast<char>(0));
   ConvertUtf16ToModifiedUtf8(&result[0], chars, GetLength());
@@ -244,8 +236,8 @@
   int32_t rhsCount = rhs->GetLength();
   int32_t countDiff = lhsCount - rhsCount;
   int32_t minCount = (countDiff < 0) ? lhsCount : rhsCount;
-  const uint16_t* lhsChars = lhs->GetCharArray()->GetData() + lhs->GetOffset();
-  const uint16_t* rhsChars = rhs->GetCharArray()->GetData() + rhs->GetOffset();
+  const uint16_t* lhsChars = lhs->GetValue();
+  const uint16_t* rhsChars = rhs->GetValue();
   int32_t otherRes = MemCmp16(lhsChars, rhsChars, minCount);
   if (otherRes != 0) {
     return otherRes;
@@ -257,5 +249,19 @@
   java_lang_String_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
 }
 
+CharArray* String::ToCharArray(Thread* self) {
+  StackHandleScope<1> hs(self);
+  Handle<String> string(hs.NewHandle(this));
+  CharArray* result = CharArray::Alloc(self, GetLength());
+  memcpy(result->GetData(), string->GetValue(), string->GetLength() * sizeof(uint16_t));
+  return result;
+}
+
+void String::GetChars(int32_t start, int32_t end, Handle<CharArray> array, int32_t index) {
+  uint16_t* data = array->GetData() + index;
+  uint16_t* value = GetValue() + start;
+  memcpy(data, value, (end - start) * sizeof(uint16_t));
+}
+
 }  // namespace mirror
 }  // namespace art
diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h
index 0670d0b..fcfe976 100644
--- a/runtime/mirror/string.h
+++ b/runtime/mirror/string.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_MIRROR_STRING_H_
 
 #include "gc_root.h"
+#include "gc/allocator_type.h"
 #include "object.h"
 #include "object_callbacks.h"
 
@@ -45,22 +46,27 @@
   }
 
   static MemberOffset ValueOffset() {
-    return OFFSET_OF_OBJECT_MEMBER(String, array_);
+    return OFFSET_OF_OBJECT_MEMBER(String, value_);
   }
 
-  static MemberOffset OffsetOffset() {
-    return OFFSET_OF_OBJECT_MEMBER(String, offset_);
+  uint16_t* GetValue() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return &value_[0];
   }
 
-  CharArray* GetCharArray() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  size_t SizeOf() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  int32_t GetOffset() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    int32_t result = GetField32(OffsetOffset());
-    DCHECK_LE(0, result);
-    return result;
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  int32_t GetLength() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetField32<kVerifyFlags>(OFFSET_OF_OBJECT_MEMBER(String, count_));
   }
 
-  int32_t GetLength() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SetCount(int32_t new_count) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    // Count is invariant so use non-transactional mode. Also disable check as we may run inside
+    // a transaction.
+    DCHECK_LE(0, new_count);
+    SetField32<false, false>(OFFSET_OF_OBJECT_MEMBER(String, count_), new_count);
+  }
 
   int32_t GetHashCode() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -69,19 +75,47 @@
 
   int32_t GetUtfLength() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  uint16_t CharAt(int32_t index) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  void SetCharAt(int32_t index, uint16_t c) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   String* Intern() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  static String* AllocFromUtf16(Thread* self,
-                                int32_t utf16_length,
-                                const uint16_t* utf16_data_in,
-                                int32_t hash_code = 0)
+  template <bool kIsInstrumented, typename PreFenceVisitor>
+  ALWAYS_INLINE static String* Alloc(Thread* self, int32_t utf16_length,
+                                     gc::AllocatorType allocator_type,
+                                     const PreFenceVisitor& pre_fence_visitor)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  template <bool kIsInstrumented>
+  ALWAYS_INLINE static String* AllocFromByteArray(Thread* self, int32_t byte_length,
+                                                  Handle<ByteArray> array, int32_t offset,
+                                                  int32_t high_byte,
+                                                  gc::AllocatorType allocator_type)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  template <bool kIsInstrumented>
+  ALWAYS_INLINE static String* AllocFromCharArray(Thread* self, int32_t array_length,
+                                                  Handle<CharArray> array, int32_t offset,
+                                                  gc::AllocatorType allocator_type)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  template <bool kIsInstrumented>
+  ALWAYS_INLINE static String* AllocFromString(Thread* self, int32_t string_length,
+                                               Handle<String> string, int32_t offset,
+                                               gc::AllocatorType allocator_type)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  static String* AllocFromStrings(Thread* self, Handle<String> string, Handle<String> string2)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  static String* AllocFromUtf16(Thread* self, int32_t utf16_length, const uint16_t* utf16_data_in)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static String* AllocFromModifiedUtf8(Thread* self, const char* utf)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  static String* AllocFromModifiedUtf8(Thread* self, int32_t utf16_length,
-                                       const char* utf8_data_in)
+  static String* AllocFromModifiedUtf8(Thread* self, int32_t utf16_length, const char* utf8_data_in)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // TODO: This is only used in the interpreter to compare against
@@ -112,13 +146,10 @@
 
   int32_t CompareTo(String* other) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void SetOffset(int32_t new_offset) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    // Offset is only used during testing so use non-transactional mode.
-    DCHECK_LE(0, new_offset);
-    SetField32<false>(OFFSET_OF_OBJECT_MEMBER(String, offset_), new_offset);
-  }
+  CharArray* ToCharArray(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void SetArray(CharArray* new_array) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void GetChars(int32_t start, int32_t end, Handle<CharArray> array, int32_t index)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static Class* GetJavaLangString() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(!java_lang_String_.IsNull());
@@ -130,9 +161,6 @@
   static void VisitRoots(RootVisitor* visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  // TODO: Make this private. It's only used on ObjectTest at the moment.
-  uint16_t UncheckedCharAt(int32_t index) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
  private:
   void SetHashCode(int32_t new_hash_code) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // Hash code is invariant so use non-transactional mode. Also disable check as we may run inside
@@ -141,27 +169,12 @@
     SetField32<false, false>(OFFSET_OF_OBJECT_MEMBER(String, hash_code_), new_hash_code);
   }
 
-  void SetCount(int32_t new_count) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    // Count is invariant so use non-transactional mode. Also disable check as we may run inside
-    // a transaction.
-    DCHECK_LE(0, new_count);
-    SetField32<false, false>(OFFSET_OF_OBJECT_MEMBER(String, count_), new_count);
-  }
-
-  static String* Alloc(Thread* self, int32_t utf16_length)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  static String* Alloc(Thread* self, Handle<CharArray> array)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
-  HeapReference<CharArray> array_;
-
   int32_t count_;
 
   uint32_t hash_code_;
 
-  int32_t offset_;
+  uint16_t value_[0];
 
   static GcRoot<Class> java_lang_String_;
 
diff --git a/runtime/modifiers.h b/runtime/modifiers.h
index e7bd207..8586dd1 100644
--- a/runtime/modifiers.h
+++ b/runtime/modifiers.h
@@ -65,6 +65,8 @@
 static constexpr uint32_t kAccClassIsFinalizerReference = 0x02000000;
 // class is a phantom reference
 static constexpr uint32_t kAccClassIsPhantomReference   = 0x01000000;
+// class is the string class
+static constexpr uint32_t kAccClassIsStringClass        = 0x00800000;
 
 static constexpr uint32_t kAccReferenceFlagsMask = (kAccClassIsReference
                                                   | kAccClassIsWeakReference
diff --git a/runtime/monitor.h b/runtime/monitor.h
index 95e4460..b7245c1 100644
--- a/runtime/monitor.h
+++ b/runtime/monitor.h
@@ -141,6 +141,10 @@
     CHECK_EQ(error, 0) << strerror(error);
     return result;
   }
+
+  void operator delete(void* ptr) {
+    free(ptr);
+  }
 #endif
 
  private:
diff --git a/runtime/native/java_lang_Class.cc b/runtime/native/java_lang_Class.cc
index 48a8bc7..a779e97 100644
--- a/runtime/native/java_lang_Class.cc
+++ b/runtime/native/java_lang_Class.cc
@@ -192,7 +192,7 @@
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   size_t low = 0;
   size_t high = num_fields;
-  const uint16_t* const data = name->GetCharArray()->GetData() + name->GetOffset();
+  const uint16_t* const data = name->GetValue();
   const size_t length = name->GetLength();
   while (low < high) {
     auto mid = (low + high) / 2;
@@ -487,6 +487,17 @@
                                    PrettyClass(klass.Get()).c_str());
     return nullptr;
   }
+  // Invoke the string allocator to return an empty string for the string class.
+  if (klass->IsStringClass()) {
+    gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
+    mirror::SetStringCountVisitor visitor(0);
+    mirror::Object* obj = mirror::String::Alloc<true>(soa.Self(), 0, allocator_type, visitor);
+    if (UNLIKELY(soa.Self()->IsExceptionPending())) {
+      return nullptr;
+    } else {
+      return soa.AddLocalReference<jobject>(obj);
+    }
+  }
   auto receiver = hs.NewHandle(klass->AllocObject(soa.Self()));
   if (UNLIKELY(receiver.Get() == nullptr)) {
     soa.Self()->AssertPendingOOMException();
diff --git a/runtime/native/java_lang_String.cc b/runtime/native/java_lang_String.cc
index 2d153d4..aa64b79 100644
--- a/runtime/native/java_lang_String.cc
+++ b/runtime/native/java_lang_String.cc
@@ -18,6 +18,9 @@
 
 #include "common_throws.h"
 #include "jni_internal.h"
+#include "mirror/array.h"
+#include "mirror/object-inl.h"
+#include "mirror/string.h"
 #include "mirror/string-inl.h"
 #include "scoped_fast_native_object_access.h"
 #include "scoped_thread_state_change.h"
@@ -26,36 +29,93 @@
 
 namespace art {
 
-static jint String_compareTo(JNIEnv* env, jobject javaThis, jobject javaRhs) {
+static jchar String_charAt(JNIEnv* env, jobject java_this, jint index) {
   ScopedFastNativeObjectAccess soa(env);
-  if (UNLIKELY(javaRhs == nullptr)) {
+  return soa.Decode<mirror::String*>(java_this)->CharAt(index);
+}
+
+static jint String_compareTo(JNIEnv* env, jobject java_this, jobject java_rhs) {
+  ScopedFastNativeObjectAccess soa(env);
+  if (UNLIKELY(java_rhs == nullptr)) {
     ThrowNullPointerException("rhs == null");
     return -1;
   } else {
-    return soa.Decode<mirror::String*>(javaThis)->CompareTo(soa.Decode<mirror::String*>(javaRhs));
+    return soa.Decode<mirror::String*>(java_this)->CompareTo(soa.Decode<mirror::String*>(java_rhs));
   }
 }
 
+static jstring String_concat(JNIEnv* env, jobject java_this, jobject java_string_arg) {
+  ScopedFastNativeObjectAccess soa(env);
+  if (UNLIKELY(java_string_arg == nullptr)) {
+    ThrowNullPointerException("string arg == null");
+    return nullptr;
+  }
+  StackHandleScope<2> hs(soa.Self());
+  Handle<mirror::String> string_this(hs.NewHandle(soa.Decode<mirror::String*>(java_this)));
+  Handle<mirror::String> string_arg(hs.NewHandle(soa.Decode<mirror::String*>(java_string_arg)));
+  int32_t length_this = string_this->GetLength();
+  int32_t length_arg = string_arg->GetLength();
+  if (length_arg > 0 && length_this > 0) {
+    mirror::String* result = mirror::String::AllocFromStrings(soa.Self(), string_this, string_arg);
+    return soa.AddLocalReference<jstring>(result);
+  }
+  jobject string_original = (length_this == 0) ? java_string_arg : java_this;
+  return reinterpret_cast<jstring>(string_original);
+}
+
 static jint String_fastIndexOf(JNIEnv* env, jobject java_this, jint ch, jint start) {
   ScopedFastNativeObjectAccess soa(env);
   // This method does not handle supplementary characters. They're dealt with in managed code.
   DCHECK_LE(ch, 0xffff);
-
-  mirror::String* s = soa.Decode<mirror::String*>(java_this);
-  return s->FastIndexOf(ch, start);
+  return soa.Decode<mirror::String*>(java_this)->FastIndexOf(ch, start);
 }
 
-static jstring String_intern(JNIEnv* env, jobject javaThis) {
+static jstring String_fastSubstring(JNIEnv* env, jobject java_this, jint start, jint length) {
   ScopedFastNativeObjectAccess soa(env);
-  mirror::String* s = soa.Decode<mirror::String*>(javaThis);
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::String> string_this(hs.NewHandle(soa.Decode<mirror::String*>(java_this)));
+  gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
+  mirror::String* result = mirror::String::AllocFromString<true>(soa.Self(), length, string_this,
+                                                                 start, allocator_type);
+  return soa.AddLocalReference<jstring>(result);
+}
+
+static void String_getCharsNoCheck(JNIEnv* env, jobject java_this, jint start, jint end,
+                                   jcharArray buffer, jint index) {
+  ScopedFastNativeObjectAccess soa(env);
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::CharArray> char_array(hs.NewHandle(soa.Decode<mirror::CharArray*>(buffer)));
+  soa.Decode<mirror::String*>(java_this)->GetChars(start, end, char_array, index);
+}
+
+static jstring String_intern(JNIEnv* env, jobject java_this) {
+  ScopedFastNativeObjectAccess soa(env);
+  mirror::String* s = soa.Decode<mirror::String*>(java_this);
   mirror::String* result = s->Intern();
   return soa.AddLocalReference<jstring>(result);
 }
 
+static void String_setCharAt(JNIEnv* env, jobject java_this, jint index, jchar c) {
+  ScopedFastNativeObjectAccess soa(env);
+  soa.Decode<mirror::String*>(java_this)->SetCharAt(index, c);
+}
+
+static jcharArray String_toCharArray(JNIEnv* env, jobject java_this) {
+  ScopedFastNativeObjectAccess soa(env);
+  mirror::String* s = soa.Decode<mirror::String*>(java_this);
+  return soa.AddLocalReference<jcharArray>(s->ToCharArray(soa.Self()));
+}
+
 static JNINativeMethod gMethods[] = {
+  NATIVE_METHOD(String, charAt, "!(I)C"),
   NATIVE_METHOD(String, compareTo, "!(Ljava/lang/String;)I"),
+  NATIVE_METHOD(String, concat, "!(Ljava/lang/String;)Ljava/lang/String;"),
   NATIVE_METHOD(String, fastIndexOf, "!(II)I"),
+  NATIVE_METHOD(String, fastSubstring, "!(II)Ljava/lang/String;"),
+  NATIVE_METHOD(String, getCharsNoCheck, "!(II[CI)V"),
   NATIVE_METHOD(String, intern, "!()Ljava/lang/String;"),
+  NATIVE_METHOD(String, setCharAt, "!(IC)V"),
+  NATIVE_METHOD(String, toCharArray, "!()[C"),
 };
 
 void register_java_lang_String(JNIEnv* env) {
diff --git a/runtime/native/java_lang_StringFactory.cc b/runtime/native/java_lang_StringFactory.cc
new file mode 100644
index 0000000..34d6a37
--- /dev/null
+++ b/runtime/native/java_lang_StringFactory.cc
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "java_lang_StringFactory.h"
+
+#include "common_throws.h"
+#include "jni_internal.h"
+#include "mirror/object-inl.h"
+#include "mirror/string.h"
+#include "scoped_fast_native_object_access.h"
+#include "scoped_thread_state_change.h"
+#include "ScopedLocalRef.h"
+#include "ScopedPrimitiveArray.h"
+
+namespace art {
+
+static jstring StringFactory_newStringFromBytes(JNIEnv* env, jclass, jbyteArray java_data,
+                                                jint high, jint offset, jint byte_count) {
+  ScopedFastNativeObjectAccess soa(env);
+  if (UNLIKELY(java_data == nullptr)) {
+    ThrowNullPointerException("data == null");
+    return nullptr;
+  }
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::ByteArray> byte_array(hs.NewHandle(soa.Decode<mirror::ByteArray*>(java_data)));
+  int32_t data_size = byte_array->GetLength();
+  if ((offset | byte_count) < 0 || byte_count > data_size - offset) {
+    soa.Self()->ThrowNewExceptionF("Ljava/lang/StringIndexOutOfBoundsException;",
+                                   "length=%d; regionStart=%d; regionLength=%d", data_size,
+                                   offset, byte_count);
+    return nullptr;
+  }
+  gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
+  mirror::String* result = mirror::String::AllocFromByteArray<true>(soa.Self(), byte_count,
+                                                                    byte_array, offset, high,
+                                                                    allocator_type);
+  return soa.AddLocalReference<jstring>(result);
+}
+
+static jstring StringFactory_newStringFromChars(JNIEnv* env, jclass, jint offset,
+                                                jint char_count, jcharArray java_data) {
+  ScopedFastNativeObjectAccess soa(env);
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::CharArray> char_array(hs.NewHandle(soa.Decode<mirror::CharArray*>(java_data)));
+  gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
+  mirror::String* result = mirror::String::AllocFromCharArray<true>(soa.Self(), char_count,
+                                                                    char_array, offset,
+                                                                    allocator_type);
+  return soa.AddLocalReference<jstring>(result);
+}
+
+static jstring StringFactory_newStringFromString(JNIEnv* env, jclass, jstring to_copy) {
+  ScopedFastNativeObjectAccess soa(env);
+  if (UNLIKELY(to_copy == nullptr)) {
+    ThrowNullPointerException("toCopy == null");
+    return nullptr;
+  }
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::String> string(hs.NewHandle(soa.Decode<mirror::String*>(to_copy)));
+  gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
+  mirror::String* result = mirror::String::AllocFromString<true>(soa.Self(), string->GetLength(),
+                                                                 string, 0, allocator_type);
+  return soa.AddLocalReference<jstring>(result);
+}
+
+static JNINativeMethod gMethods[] = {
+  NATIVE_METHOD(StringFactory, newStringFromBytes, "!([BIII)Ljava/lang/String;"),
+  NATIVE_METHOD(StringFactory, newStringFromChars, "!(II[C)Ljava/lang/String;"),
+  NATIVE_METHOD(StringFactory, newStringFromString, "!(Ljava/lang/String;)Ljava/lang/String;"),
+};
+
+void register_java_lang_StringFactory(JNIEnv* env) {
+  REGISTER_NATIVE_METHODS("java/lang/StringFactory");
+}
+
+}  // namespace art
diff --git a/runtime/native/java_lang_StringFactory.h b/runtime/native/java_lang_StringFactory.h
new file mode 100644
index 0000000..c476ad3
--- /dev/null
+++ b/runtime/native/java_lang_StringFactory.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_NATIVE_JAVA_LANG_STRINGFACTORY_H_
+#define ART_RUNTIME_NATIVE_JAVA_LANG_STRINGFACTORY_H_
+
+#include <jni.h>
+
+namespace art {
+
+void register_java_lang_StringFactory(JNIEnv* env);
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_NATIVE_JAVA_LANG_STRINGFACTORY_H_
diff --git a/runtime/native/java_lang_reflect_Constructor.cc b/runtime/native/java_lang_reflect_Constructor.cc
index f74102e..40d6584 100644
--- a/runtime/native/java_lang_reflect_Constructor.cc
+++ b/runtime/native/java_lang_reflect_Constructor.cc
@@ -72,6 +72,12 @@
   if (!kMovingClasses && c->IsClassClass()) {
     movable = false;
   }
+
+  // String constructor is replaced by a StringFactory method in InvokeMethod.
+  if (c->IsStringClass()) {
+    return InvokeMethod(soa, javaMethod, nullptr, javaArgs, 1);
+  }
+
   mirror::Object* receiver =
       movable ? c->AllocObject(soa.Self()) : c->AllocNonMovableObject(soa.Self());
   if (receiver == nullptr) {
diff --git a/runtime/native/libcore_util_CharsetUtils.cc b/runtime/native/libcore_util_CharsetUtils.cc
new file mode 100644
index 0000000..1216824
--- /dev/null
+++ b/runtime/native/libcore_util_CharsetUtils.cc
@@ -0,0 +1,264 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "jni_internal.h"
+#include "mirror/string.h"
+#include "mirror/string-inl.h"
+#include "native/libcore_util_CharsetUtils.h"
+#include "scoped_fast_native_object_access.h"
+#include "ScopedPrimitiveArray.h"
+#include "unicode/utf16.h"
+
+#include <string.h>
+
+namespace art {
+
+/**
+ * Approximates java.lang.UnsafeByteSequence so we don't have to pay the cost of calling back into
+ * Java when converting a char[] to a UTF-8 byte[]. This lets us have UTF-8 conversions slightly
+ * faster than ICU for large char[]s without paying for the NIO overhead with small char[]s.
+ *
+ * We could avoid this by keeping the UTF-8 bytes on the native heap until we're done and only
+ * creating a byte[] on the Java heap when we know how big it needs to be, but one shouldn't lie
+ * to the garbage collector (nor hide potentially large allocations from it).
+ *
+ * Because a call to append might require an allocation, it might fail. Callers should always
+ * check the return value of append.
+ */
+class NativeUnsafeByteSequence {
+ public:
+  explicit NativeUnsafeByteSequence(JNIEnv* env)
+    : mEnv(env), mJavaArray(nullptr), mRawArray(nullptr), mSize(-1), mOffset(0) {
+  }
+
+  ~NativeUnsafeByteSequence() {
+    // Release our pointer to the raw array, copying changes back to the Java heap.
+    if (mRawArray != nullptr) {
+      mEnv->ReleaseByteArrayElements(mJavaArray, mRawArray, 0);
+    }
+  }
+
+  bool append(jbyte b) {
+    if (mOffset == mSize && !resize(mSize * 2)) {
+      return false;
+    }
+    mRawArray[mOffset++] = b;
+    return true;
+  }
+
+  bool resize(int newSize) {
+    if (newSize == mSize) {
+      return true;
+    }
+
+    // Allocate a new array.
+    jbyteArray newJavaArray = mEnv->NewByteArray(newSize);
+    if (newJavaArray == nullptr) {
+      return false;
+    }
+    jbyte* newRawArray = mEnv->GetByteArrayElements(newJavaArray, nullptr);
+    if (newRawArray == nullptr) {
+      return false;
+    }
+
+    // Copy data out of the old array and then let go of it.
+    // Note that we may be trimming the array.
+    if (mRawArray != nullptr) {
+      memcpy(newRawArray, mRawArray, mOffset);
+      mEnv->ReleaseByteArrayElements(mJavaArray, mRawArray, JNI_ABORT);
+      mEnv->DeleteLocalRef(mJavaArray);
+    }
+
+    // Point ourselves at the new array.
+    mJavaArray = newJavaArray;
+    mRawArray = newRawArray;
+    mSize = newSize;
+    return true;
+  }
+
+  jbyteArray toByteArray() {
+    // Trim any unused space, if necessary.
+    bool okay = resize(mOffset);
+    return okay ? mJavaArray : nullptr;
+  }
+
+ private:
+  JNIEnv* mEnv;
+  jbyteArray mJavaArray;
+  jbyte* mRawArray;
+  jint mSize;
+  jint mOffset;
+
+  // Disallow copy and assignment.
+  NativeUnsafeByteSequence(const NativeUnsafeByteSequence&);
+  void operator=(const NativeUnsafeByteSequence&);
+};
+
+static void CharsetUtils_asciiBytesToChars(JNIEnv* env, jclass, jbyteArray javaBytes, jint offset,
+                                           jint length, jcharArray javaChars) {
+  ScopedByteArrayRO bytes(env, javaBytes);
+  if (bytes.get() == nullptr) {
+    return;
+  }
+  ScopedCharArrayRW chars(env, javaChars);
+  if (chars.get() == nullptr) {
+    return;
+  }
+
+  const jbyte* src = &bytes[offset];
+  jchar* dst = &chars[0];
+  static const jchar REPLACEMENT_CHAR = 0xfffd;
+  for (int i = length - 1; i >= 0; --i) {
+    jchar ch = static_cast<jchar>(*src++ & 0xff);
+    *dst++ = (ch <= 0x7f) ? ch : REPLACEMENT_CHAR;
+  }
+}
+
+static void CharsetUtils_isoLatin1BytesToChars(JNIEnv* env, jclass, jbyteArray javaBytes,
+                                               jint offset, jint length, jcharArray javaChars) {
+  ScopedByteArrayRO bytes(env, javaBytes);
+  if (bytes.get() == nullptr) {
+    return;
+  }
+  ScopedCharArrayRW chars(env, javaChars);
+  if (chars.get() == nullptr) {
+    return;
+  }
+
+  const jbyte* src = &bytes[offset];
+  jchar* dst = &chars[0];
+  for (int i = length - 1; i >= 0; --i) {
+    *dst++ = static_cast<jchar>(*src++ & 0xff);
+  }
+}
+
+/**
+ * Translates the given characters to US-ASCII or ISO-8859-1 bytes, using the fact that
+ * Unicode code points between U+0000 and U+007f inclusive are identical to US-ASCII, while
+ * U+0000 to U+00ff inclusive are identical to ISO-8859-1.
+ */
+static jbyteArray charsToBytes(JNIEnv* env, jstring java_string, jint offset, jint length,
+                               jchar maxValidChar) {
+  ScopedObjectAccess soa(env);
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::String> string(hs.NewHandle(soa.Decode<mirror::String*>(java_string)));
+  if (string.Get() == nullptr) {
+    return nullptr;
+  }
+
+  jbyteArray javaBytes = env->NewByteArray(length);
+  ScopedByteArrayRW bytes(env, javaBytes);
+  if (bytes.get() == nullptr) {
+    return nullptr;
+  }
+
+  const jchar* src = &(string->GetValue()[offset]);
+  jbyte* dst = &bytes[0];
+  for (int i = length - 1; i >= 0; --i) {
+    jchar ch = *src++;
+    if (ch > maxValidChar) {
+      ch = '?';
+    }
+    *dst++ = static_cast<jbyte>(ch);
+  }
+
+  return javaBytes;
+}
+
+static jbyteArray CharsetUtils_toAsciiBytes(JNIEnv* env, jclass, jstring java_string, jint offset,
+                                            jint length) {
+    return charsToBytes(env, java_string, offset, length, 0x7f);
+}
+
+static jbyteArray CharsetUtils_toIsoLatin1Bytes(JNIEnv* env, jclass, jstring java_string,
+                                                jint offset, jint length) {
+    return charsToBytes(env, java_string, offset, length, 0xff);
+}
+
+static jbyteArray CharsetUtils_toUtf8Bytes(JNIEnv* env, jclass, jstring java_string, jint offset,
+                                           jint length) {
+  ScopedObjectAccess soa(env);
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::String> string(hs.NewHandle(soa.Decode<mirror::String*>(java_string)));
+  if (string.Get() == nullptr) {
+    return nullptr;
+  }
+
+  NativeUnsafeByteSequence out(env);
+  if (!out.resize(length)) {
+    return nullptr;
+  }
+
+  const int end = offset + length;
+  for (int i = offset; i < end; ++i) {
+    jint ch = string->CharAt(i);
+    if (ch < 0x80) {
+      // One byte.
+      if (!out.append(ch)) {
+        return nullptr;
+      }
+    } else if (ch < 0x800) {
+      // Two bytes.
+      if (!out.append((ch >> 6) | 0xc0) || !out.append((ch & 0x3f) | 0x80)) {
+        return nullptr;
+      }
+    } else if (U16_IS_SURROGATE(ch)) {
+      // A supplementary character.
+      jchar high = static_cast<jchar>(ch);
+      jchar low = (i + 1 != end) ? string->CharAt(i + 1) : 0;
+      if (!U16_IS_SURROGATE_LEAD(high) || !U16_IS_SURROGATE_TRAIL(low)) {
+        if (!out.append('?')) {
+          return nullptr;
+        }
+        continue;
+      }
+      // Now we know we have a *valid* surrogate pair, we can consume the low surrogate.
+      ++i;
+      ch = U16_GET_SUPPLEMENTARY(high, low);
+      // Four bytes.
+      jbyte b1 = (ch >> 18) | 0xf0;
+      jbyte b2 = ((ch >> 12) & 0x3f) | 0x80;
+      jbyte b3 = ((ch >> 6) & 0x3f) | 0x80;
+      jbyte b4 = (ch & 0x3f) | 0x80;
+      if (!out.append(b1) || !out.append(b2) || !out.append(b3) || !out.append(b4)) {
+        return nullptr;
+      }
+    } else {
+      // Three bytes.
+      jbyte b1 = (ch >> 12) | 0xe0;
+      jbyte b2 = ((ch >> 6) & 0x3f) | 0x80;
+      jbyte b3 = (ch & 0x3f) | 0x80;
+      if (!out.append(b1) || !out.append(b2) || !out.append(b3)) {
+        return nullptr;
+      }
+    }
+  }
+  return out.toByteArray();
+}
+
+static JNINativeMethod gMethods[] = {
+  NATIVE_METHOD(CharsetUtils, asciiBytesToChars, "!([BII[C)V"),
+  NATIVE_METHOD(CharsetUtils, isoLatin1BytesToChars, "!([BII[C)V"),
+  NATIVE_METHOD(CharsetUtils, toAsciiBytes, "!(Ljava/lang/String;II)[B"),
+  NATIVE_METHOD(CharsetUtils, toIsoLatin1Bytes, "!(Ljava/lang/String;II)[B"),
+  NATIVE_METHOD(CharsetUtils, toUtf8Bytes, "!(Ljava/lang/String;II)[B"),
+};
+
+void register_libcore_util_CharsetUtils(JNIEnv* env) {
+  REGISTER_NATIVE_METHODS("libcore/util/CharsetUtils");
+}
+
+}  // namespace art
diff --git a/runtime/native/libcore_util_CharsetUtils.h b/runtime/native/libcore_util_CharsetUtils.h
new file mode 100644
index 0000000..3518bdb
--- /dev/null
+++ b/runtime/native/libcore_util_CharsetUtils.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_NATIVE_LIBCORE_UTIL_CHARSETUTILS_H_
+#define ART_RUNTIME_NATIVE_LIBCORE_UTIL_CHARSETUTILS_H_
+
+#include <jni.h>
+
+namespace art {
+
+void register_libcore_util_CharsetUtils(JNIEnv* env);
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_NATIVE_LIBCORE_UTIL_CHARSETUTILS_H_
diff --git a/runtime/quick/inline_method_analyser.h b/runtime/quick/inline_method_analyser.h
index 3463025..0d39e22 100644
--- a/runtime/quick/inline_method_analyser.h
+++ b/runtime/quick/inline_method_analyser.h
@@ -56,8 +56,12 @@
   kIntrinsicReferenceGetReferent,
   kIntrinsicCharAt,
   kIntrinsicCompareTo,
+  kIntrinsicGetCharsNoCheck,
   kIntrinsicIsEmptyOrLength,
   kIntrinsicIndexOf,
+  kIntrinsicNewStringFromBytes,
+  kIntrinsicNewStringFromChars,
+  kIntrinsicNewStringFromString,
   kIntrinsicCurrentThread,
   kIntrinsicPeek,
   kIntrinsicPoke,
@@ -71,6 +75,7 @@
   kInlineOpNonWideConst,
   kInlineOpIGet,
   kInlineOpIPut,
+  kInlineStringInit,
 };
 std::ostream& operator<<(std::ostream& os, const InlineMethodOpcode& rhs);
 
diff --git a/runtime/read_barrier_c.h b/runtime/read_barrier_c.h
index a2c4c36..88bda3a 100644
--- a/runtime/read_barrier_c.h
+++ b/runtime/read_barrier_c.h
@@ -26,9 +26,9 @@
 // table-lookup read barriers.
 
 #ifdef ART_USE_READ_BARRIER
-// #define USE_BAKER_READ_BARRIER
+#define USE_BAKER_READ_BARRIER
 // #define USE_BROOKS_READ_BARRIER
-#define USE_TABLE_LOOKUP_READ_BARRIER
+// #define USE_TABLE_LOOKUP_READ_BARRIER
 #endif
 
 #if defined(USE_BAKER_READ_BARRIER) || defined(USE_BROOKS_READ_BARRIER)
diff --git a/runtime/reflection.cc b/runtime/reflection.cc
index a2ce0cb..329ceb5 100644
--- a/runtime/reflection.cc
+++ b/runtime/reflection.cc
@@ -564,14 +564,21 @@
 
   mirror::Object* receiver = nullptr;
   if (!m->IsStatic()) {
-    // Check that the receiver is non-null and an instance of the field's declaring class.
-    receiver = soa.Decode<mirror::Object*>(javaReceiver);
-    if (!VerifyObjectIsClass(receiver, declaring_class)) {
-      return nullptr;
-    }
+    // Replace calls to String.<init> with equivalent StringFactory call.
+    if (declaring_class->IsStringClass() && m->IsConstructor()) {
+      jmethodID mid = soa.EncodeMethod(m);
+      m = soa.DecodeMethod(WellKnownClasses::StringInitToStringFactoryMethodID(mid));
+      CHECK(javaReceiver == nullptr);
+    } else {
+      // Check that the receiver is non-null and an instance of the field's declaring class.
+      receiver = soa.Decode<mirror::Object*>(javaReceiver);
+      if (!VerifyObjectIsClass(receiver, declaring_class)) {
+        return nullptr;
+      }
 
-    // Find the actual implementation of the virtual method.
-    m = receiver->GetClass()->FindVirtualMethodForVirtualOrInterface(m);
+      // Find the actual implementation of the virtual method.
+      m = receiver->GetClass()->FindVirtualMethodForVirtualOrInterface(m);
+    }
   }
 
   // Get our arrays of arguments and their types, and check they're the same size.
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 48bca62..2633898 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -91,6 +91,7 @@
 #include "native/java_lang_Object.h"
 #include "native/java_lang_Runtime.h"
 #include "native/java_lang_String.h"
+#include "native/java_lang_StringFactory.h"
 #include "native/java_lang_System.h"
 #include "native/java_lang_Thread.h"
 #include "native/java_lang_Throwable.h"
@@ -103,6 +104,7 @@
 #include "native/java_lang_reflect_Method.h"
 #include "native/java_lang_reflect_Proxy.h"
 #include "native/java_util_concurrent_atomic_AtomicLong.h"
+#include "native/libcore_util_CharsetUtils.h"
 #include "native/org_apache_harmony_dalvik_ddmc_DdmServer.h"
 #include "native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.h"
 #include "native/sun_misc_Unsafe.h"
@@ -1170,11 +1172,13 @@
   register_java_lang_ref_Reference(env);
   register_java_lang_Runtime(env);
   register_java_lang_String(env);
+  register_java_lang_StringFactory(env);
   register_java_lang_System(env);
   register_java_lang_Thread(env);
   register_java_lang_Throwable(env);
   register_java_lang_VMClassLoader(env);
   register_java_util_concurrent_atomic_AtomicLong(env);
+  register_libcore_util_CharsetUtils(env);
   register_org_apache_harmony_dalvik_ddmc_DdmServer(env);
   register_org_apache_harmony_dalvik_ddmc_DdmVmInternal(env);
   register_sun_misc_Unsafe(env);
@@ -1562,14 +1566,15 @@
   // Throwing an exception may cause its class initialization. If we mark the transaction
   // aborted before that, we may warn with a false alarm. Throwing the exception before
   // marking the transaction aborted avoids that.
-  preinitialization_transaction_->ThrowAbortError(self, false);
+  preinitialization_transaction_->ThrowAbortError(self, &abort_message);
   preinitialization_transaction_->Abort(abort_message);
 }
 
 void Runtime::ThrowTransactionAbortError(Thread* self) {
   DCHECK(IsAotCompiler());
   DCHECK(IsActiveTransaction());
-  preinitialization_transaction_->ThrowAbortError(self, true);
+  // Passing nullptr means we rethrow an exception with the earlier transaction abort message.
+  preinitialization_transaction_->ThrowAbortError(self, nullptr);
 }
 
 void Runtime::RecordWriteFieldBoolean(mirror::Object* obj, MemberOffset field_offset,
diff --git a/runtime/runtime.h b/runtime/runtime.h
index c35f4ca..348d5c6 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -31,6 +31,7 @@
 #include "gc_root.h"
 #include "instrumentation.h"
 #include "jobject_comparator.h"
+#include "method_reference.h"
 #include "object_callbacks.h"
 #include "offsets.h"
 #include "profiler_options.h"
@@ -86,6 +87,8 @@
 class Transaction;
 
 typedef std::vector<std::pair<std::string, const void*>> RuntimeOptions;
+typedef SafeMap<MethodReference, SafeMap<uint32_t, std::set<uint32_t>>,
+    MethodReferenceComparator> MethodRefToStringInitRegMap;
 
 // Not all combinations of flags are valid. You may not visit all roots as well as the new roots
 // (no logical reason to do this). You also may not start logging new roots and stop logging new
@@ -558,6 +561,10 @@
     return jit_options_.get();
   }
 
+  MethodRefToStringInitRegMap& GetStringInitMap() {
+    return method_ref_string_init_reg_map_;
+  }
+
  private:
   static void InitPlatformSignalHandlers();
 
@@ -737,6 +744,8 @@
   // zygote.
   uint32_t zygote_max_failed_boots_;
 
+  MethodRefToStringInitRegMap method_ref_string_init_reg_map_;
+
   DISALLOW_COPY_AND_ASSIGN(Runtime);
 };
 std::ostream& operator<<(std::ostream& os, const Runtime::CalleeSaveType& rhs);
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 9f7c303..c8aad1b 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -105,6 +105,43 @@
                   &tlsPtr_.quick_entrypoints);
 }
 
+void Thread::InitStringEntryPoints() {
+  ScopedObjectAccess soa(this);
+  QuickEntryPoints* qpoints = &tlsPtr_.quick_entrypoints;
+  qpoints->pNewEmptyString = reinterpret_cast<void(*)()>(
+      soa.DecodeMethod(WellKnownClasses::java_lang_StringFactory_newEmptyString));
+  qpoints->pNewStringFromBytes_B = reinterpret_cast<void(*)()>(
+      soa.DecodeMethod(WellKnownClasses::java_lang_StringFactory_newStringFromBytes_B));
+  qpoints->pNewStringFromBytes_BI = reinterpret_cast<void(*)()>(
+      soa.DecodeMethod(WellKnownClasses::java_lang_StringFactory_newStringFromBytes_BI));
+  qpoints->pNewStringFromBytes_BII = reinterpret_cast<void(*)()>(
+      soa.DecodeMethod(WellKnownClasses::java_lang_StringFactory_newStringFromBytes_BII));
+  qpoints->pNewStringFromBytes_BIII = reinterpret_cast<void(*)()>(
+      soa.DecodeMethod(WellKnownClasses::java_lang_StringFactory_newStringFromBytes_BIII));
+  qpoints->pNewStringFromBytes_BIIString = reinterpret_cast<void(*)()>(
+      soa.DecodeMethod(WellKnownClasses::java_lang_StringFactory_newStringFromBytes_BIIString));
+  qpoints->pNewStringFromBytes_BString = reinterpret_cast<void(*)()>(
+      soa.DecodeMethod(WellKnownClasses::java_lang_StringFactory_newStringFromBytes_BString));
+  qpoints->pNewStringFromBytes_BIICharset = reinterpret_cast<void(*)()>(
+      soa.DecodeMethod(WellKnownClasses::java_lang_StringFactory_newStringFromBytes_BIICharset));
+  qpoints->pNewStringFromBytes_BCharset = reinterpret_cast<void(*)()>(
+      soa.DecodeMethod(WellKnownClasses::java_lang_StringFactory_newStringFromBytes_BCharset));
+  qpoints->pNewStringFromChars_C = reinterpret_cast<void(*)()>(
+      soa.DecodeMethod(WellKnownClasses::java_lang_StringFactory_newStringFromChars_C));
+  qpoints->pNewStringFromChars_CII = reinterpret_cast<void(*)()>(
+      soa.DecodeMethod(WellKnownClasses::java_lang_StringFactory_newStringFromChars_CII));
+  qpoints->pNewStringFromChars_IIC = reinterpret_cast<void(*)()>(
+      soa.DecodeMethod(WellKnownClasses::java_lang_StringFactory_newStringFromChars_IIC));
+  qpoints->pNewStringFromCodePoints = reinterpret_cast<void(*)()>(
+      soa.DecodeMethod(WellKnownClasses::java_lang_StringFactory_newStringFromCodePoints));
+  qpoints->pNewStringFromString = reinterpret_cast<void(*)()>(
+      soa.DecodeMethod(WellKnownClasses::java_lang_StringFactory_newStringFromString));
+  qpoints->pNewStringFromStringBuffer = reinterpret_cast<void(*)()>(
+      soa.DecodeMethod(WellKnownClasses::java_lang_StringFactory_newStringFromStringBuffer));
+  qpoints->pNewStringFromStringBuilder = reinterpret_cast<void(*)()>(
+      soa.DecodeMethod(WellKnownClasses::java_lang_StringFactory_newStringFromStringBuilder));
+}
+
 void Thread::ResetQuickAllocEntryPointsForThread() {
   ResetQuickAllocEntryPoints(&tlsPtr_.quick_entrypoints);
 }
@@ -163,6 +200,7 @@
   }
   {
     ScopedObjectAccess soa(self);
+    self->InitStringEntryPoints();
 
     // Copy peer into self, deleting global reference when done.
     CHECK(self->tlsPtr_.jpeer != nullptr);
@@ -409,6 +447,8 @@
     }
   }
 
+  self->InitStringEntryPoints();
+
   CHECK_NE(self->GetState(), kRunnable);
   self->SetState(kNative);
 
@@ -1930,6 +1970,9 @@
   QUICK_ENTRY_POINT_INFO(pAllocObjectWithAccessCheck)
   QUICK_ENTRY_POINT_INFO(pCheckAndAllocArray)
   QUICK_ENTRY_POINT_INFO(pCheckAndAllocArrayWithAccessCheck)
+  QUICK_ENTRY_POINT_INFO(pAllocStringFromBytes)
+  QUICK_ENTRY_POINT_INFO(pAllocStringFromChars)
+  QUICK_ENTRY_POINT_INFO(pAllocStringFromString)
   QUICK_ENTRY_POINT_INFO(pInstanceofNonTrivial)
   QUICK_ENTRY_POINT_INFO(pCheckCast)
   QUICK_ENTRY_POINT_INFO(pInitializeStaticStorage)
@@ -2013,6 +2056,22 @@
   QUICK_ENTRY_POINT_INFO(pDeoptimize)
   QUICK_ENTRY_POINT_INFO(pA64Load)
   QUICK_ENTRY_POINT_INFO(pA64Store)
+  QUICK_ENTRY_POINT_INFO(pNewEmptyString)
+  QUICK_ENTRY_POINT_INFO(pNewStringFromBytes_B)
+  QUICK_ENTRY_POINT_INFO(pNewStringFromBytes_BI)
+  QUICK_ENTRY_POINT_INFO(pNewStringFromBytes_BII)
+  QUICK_ENTRY_POINT_INFO(pNewStringFromBytes_BIII)
+  QUICK_ENTRY_POINT_INFO(pNewStringFromBytes_BIIString)
+  QUICK_ENTRY_POINT_INFO(pNewStringFromBytes_BString)
+  QUICK_ENTRY_POINT_INFO(pNewStringFromBytes_BIICharset)
+  QUICK_ENTRY_POINT_INFO(pNewStringFromBytes_BCharset)
+  QUICK_ENTRY_POINT_INFO(pNewStringFromChars_C)
+  QUICK_ENTRY_POINT_INFO(pNewStringFromChars_CII)
+  QUICK_ENTRY_POINT_INFO(pNewStringFromChars_IIC)
+  QUICK_ENTRY_POINT_INFO(pNewStringFromCodePoints)
+  QUICK_ENTRY_POINT_INFO(pNewStringFromString)
+  QUICK_ENTRY_POINT_INFO(pNewStringFromStringBuffer)
+  QUICK_ENTRY_POINT_INFO(pNewStringFromStringBuilder)
 #undef QUICK_ENTRY_POINT_INFO
 
   os << offset;
diff --git a/runtime/thread.h b/runtime/thread.h
index 35b785d..e766daa 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -541,6 +541,16 @@
   }
 
  public:
+  static uint32_t QuickEntryPointOffsetWithSize(size_t quick_entrypoint_offset,
+                                                size_t pointer_size) {
+    DCHECK(pointer_size == 4 || pointer_size == 8) << pointer_size;
+    if (pointer_size == 4) {
+      return QuickEntryPointOffset<4>(quick_entrypoint_offset).Uint32Value();
+    } else {
+      return QuickEntryPointOffset<8>(quick_entrypoint_offset).Uint32Value();
+    }
+  }
+
   template<size_t pointer_size>
   static ThreadOffset<pointer_size> QuickEntryPointOffset(size_t quick_entrypoint_offset) {
     return ThreadOffsetFromTlsPtr<pointer_size>(
@@ -911,6 +921,8 @@
   void PushVerifier(verifier::MethodVerifier* verifier);
   void PopVerifier(verifier::MethodVerifier* verifier);
 
+  void InitStringEntryPoints();
+
  private:
   explicit Thread(bool daemon);
   ~Thread() LOCKS_EXCLUDED(Locks::mutator_lock_,
diff --git a/runtime/transaction.cc b/runtime/transaction.cc
index cc0f15f..ab821d7 100644
--- a/runtime/transaction.cc
+++ b/runtime/transaction.cc
@@ -70,13 +70,21 @@
   }
 }
 
-void Transaction::ThrowAbortError(Thread* self, bool rethrow) {
+void Transaction::ThrowAbortError(Thread* self, const std::string* abort_message) {
+  const bool rethrow = (abort_message == nullptr);
   if (kIsDebugBuild && rethrow) {
     CHECK(IsAborted()) << "Rethrow " << Transaction::kAbortExceptionDescriptor
                        << " while transaction is not aborted";
   }
-  std::string abort_msg(GetAbortMessage());
-  self->ThrowNewWrappedException(Transaction::kAbortExceptionSignature, abort_msg.c_str());
+  if (rethrow) {
+    // Rethrow an exception with the earlier abort message stored in the transaction.
+    self->ThrowNewWrappedException(Transaction::kAbortExceptionSignature,
+                                   GetAbortMessage().c_str());
+  } else {
+    // Throw an exception with the given abort message.
+    self->ThrowNewWrappedException(Transaction::kAbortExceptionSignature,
+                                   abort_message->c_str());
+  }
 }
 
 bool Transaction::IsAborted() {
diff --git a/runtime/transaction.h b/runtime/transaction.h
index 4d85662..030478c 100644
--- a/runtime/transaction.h
+++ b/runtime/transaction.h
@@ -48,7 +48,7 @@
   void Abort(const std::string& abort_message)
       LOCKS_EXCLUDED(log_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void ThrowAbortError(Thread* self, bool rethrow)
+  void ThrowAbortError(Thread* self, const std::string* abort_message)
       LOCKS_EXCLUDED(log_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   bool IsAborted() LOCKS_EXCLUDED(log_lock_);
diff --git a/runtime/utf.cc b/runtime/utf.cc
index 3d13c3e..10600e2 100644
--- a/runtime/utf.cc
+++ b/runtime/utf.cc
@@ -107,15 +107,6 @@
   }
 }
 
-int32_t ComputeUtf16Hash(mirror::CharArray* chars, int32_t offset,
-                         size_t char_count) {
-  uint32_t hash = 0;
-  for (size_t i = 0; i < char_count; i++) {
-    hash = hash * 31 + chars->Get(offset + i);
-  }
-  return static_cast<int32_t>(hash);
-}
-
 int32_t ComputeUtf16Hash(const uint16_t* chars, size_t char_count) {
   uint32_t hash = 0;
   while (char_count--) {
diff --git a/runtime/utils.h b/runtime/utils.h
index 853fa08..eaafcf0 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -300,6 +300,18 @@
   return CTZ(x);
 }
 
+// Return whether x / divisor == x * (1.0f / divisor), for every float x.
+static constexpr bool CanDivideByReciprocalMultiplyFloat(int32_t divisor) {
+  // True, if the most significant bits of divisor are 0.
+  return ((divisor & 0x7fffff) == 0);
+}
+
+// Return whether x / divisor == x * (1.0 / divisor), for every double x.
+static constexpr bool CanDivideByReciprocalMultiplyDouble(int64_t divisor) {
+  // True, if the most significant bits of divisor are 0.
+  return ((divisor & ((UINT64_C(1) << 52) - 1)) == 0);
+}
+
 template<typename T>
 static constexpr int POPCOUNT(T x) {
   return (sizeof(T) == sizeof(uint32_t))
diff --git a/runtime/utils_test.cc b/runtime/utils_test.cc
index d8f8950..259fe33 100644
--- a/runtime/utils_test.cc
+++ b/runtime/utils_test.cc
@@ -151,9 +151,6 @@
   f = java_lang_String->FindDeclaredInstanceField("count", "I");
   EXPECT_EQ("int java.lang.String.count", PrettyField(f));
   EXPECT_EQ("java.lang.String.count", PrettyField(f, false));
-  f = java_lang_String->FindDeclaredInstanceField("value", "[C");
-  EXPECT_EQ("char[] java.lang.String.value", PrettyField(f));
-  EXPECT_EQ("java.lang.String.value", PrettyField(f, false));
 }
 
 TEST_F(UtilsTest, PrettySize) {
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 065df05..475fe8b 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -516,6 +516,23 @@
   return GetQuickInvokedMethod(inst, register_line, is_range, false);
 }
 
+SafeMap<uint32_t, std::set<uint32_t>> MethodVerifier::FindStringInitMap(mirror::ArtMethod* m) {
+  Thread* self = Thread::Current();
+  StackHandleScope<3> hs(self);
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(m->GetDexCache()));
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(m->GetClassLoader()));
+  Handle<mirror::ArtMethod> method(hs.NewHandle(m));
+  MethodVerifier verifier(self, m->GetDexFile(), dex_cache, class_loader, &m->GetClassDef(),
+                          m->GetCodeItem(), m->GetDexMethodIndex(), method, m->GetAccessFlags(),
+                          true, true, false, true);
+  return verifier.FindStringInitMap();
+}
+
+SafeMap<uint32_t, std::set<uint32_t>>& MethodVerifier::FindStringInitMap() {
+  Verify();
+  return GetStringInitPcRegMap();
+}
+
 bool MethodVerifier::Verify() {
   // If there aren't any instructions, make sure that's expected, then exit successfully.
   if (code_item_ == nullptr) {
@@ -2445,7 +2462,8 @@
          * Replace the uninitialized reference with an initialized one. We need to do this for all
          * registers that have the same object instance in them, not just the "this" register.
          */
-        work_line_->MarkRefsAsInitialized(this, this_type);
+        const uint32_t this_reg = (is_range) ? inst->VRegC_3rc() : inst->VRegC_35c();
+        work_line_->MarkRefsAsInitialized(this, this_type, this_reg, work_insn_idx_);
       }
       if (return_type == nullptr) {
         return_type = &reg_types_.FromDescriptor(GetClassLoader(), return_type_descriptor,
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index 2914b7c..452d1dd 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -199,6 +199,9 @@
   static mirror::ArtMethod* FindInvokedMethodAtDexPc(mirror::ArtMethod* m, uint32_t dex_pc)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  static SafeMap<uint32_t, std::set<uint32_t>> FindStringInitMap(mirror::ArtMethod* m)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   static void Init() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static void Shutdown();
 
@@ -263,6 +266,10 @@
     return (method_access_flags_ & kAccStatic) != 0;
   }
 
+  SafeMap<uint32_t, std::set<uint32_t>>& GetStringInitPcRegMap() {
+    return string_init_pc_reg_map_;
+  }
+
  private:
   // Private constructor for dumping.
   MethodVerifier(Thread* self, const DexFile* dex_file, Handle<mirror::DexCache> dex_cache,
@@ -307,6 +314,9 @@
   mirror::ArtMethod* FindInvokedMethodAtDexPc(uint32_t dex_pc)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  SafeMap<uint32_t, std::set<uint32_t>>& FindStringInitMap()
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   /*
    * Compute the width of the instruction at each address in the instruction stream, and store it in
    * insn_flags_. Addresses that are in the middle of an instruction, or that are part of switch
@@ -743,6 +753,12 @@
   MethodVerifier* link_;
 
   friend class art::Thread;
+
+  // Map of dex pcs of invocations of java.lang.String.<init> to the set of other registers that
+  // contain the uninitialized this pointer to that invoke. Will contain no entry if there are
+  // no other registers.
+  SafeMap<uint32_t, std::set<uint32_t>> string_init_pc_reg_map_;
+
   DISALLOW_COPY_AND_ASSIGN(MethodVerifier);
 };
 std::ostream& operator<<(std::ostream& os, const MethodVerifier::FailureKind& rhs);
diff --git a/runtime/verifier/register_line.cc b/runtime/verifier/register_line.cc
index ed588fc..2838681 100644
--- a/runtime/verifier/register_line.cc
+++ b/runtime/verifier/register_line.cc
@@ -127,14 +127,25 @@
   return true;
 }
 
-void RegisterLine::MarkRefsAsInitialized(MethodVerifier* verifier, const RegType& uninit_type) {
+void RegisterLine::MarkRefsAsInitialized(MethodVerifier* verifier, const RegType& uninit_type,
+                                         uint32_t this_reg, uint32_t dex_pc) {
   DCHECK(uninit_type.IsUninitializedTypes());
+  bool is_string = !uninit_type.IsUnresolvedTypes() && uninit_type.GetClass()->IsStringClass();
   const RegType& init_type = verifier->GetRegTypeCache()->FromUninitialized(uninit_type);
   size_t changed = 0;
   for (uint32_t i = 0; i < num_regs_; i++) {
     if (GetRegisterType(verifier, i).Equals(uninit_type)) {
       line_[i] = init_type.GetId();
       changed++;
+      if (is_string && i != this_reg) {
+        auto it = verifier->GetStringInitPcRegMap().find(dex_pc);
+        if (it != verifier->GetStringInitPcRegMap().end()) {
+          it->second.insert(i);
+        } else {
+          std::set<uint32_t> reg_set = { i };
+          verifier->GetStringInitPcRegMap().Put(dex_pc, reg_set);
+        }
+      }
     }
   }
   DCHECK_GT(changed, 0u);
diff --git a/runtime/verifier/register_line.h b/runtime/verifier/register_line.h
index 376dbf1..0de0d9c 100644
--- a/runtime/verifier/register_line.h
+++ b/runtime/verifier/register_line.h
@@ -138,7 +138,8 @@
    * reference type. This is called when an appropriate constructor is invoked -- all copies of
    * the reference must be marked as initialized.
    */
-  void MarkRefsAsInitialized(MethodVerifier* verifier, const RegType& uninit_type)
+  void MarkRefsAsInitialized(MethodVerifier* verifier, const RegType& uninit_type,
+                             uint32_t this_reg, uint32_t dex_pc)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   /*
diff --git a/runtime/well_known_classes.cc b/runtime/well_known_classes.cc
index a2d0427..2843806 100644
--- a/runtime/well_known_classes.cc
+++ b/runtime/well_known_classes.cc
@@ -49,6 +49,7 @@
 jclass WellKnownClasses::java_lang_RuntimeException;
 jclass WellKnownClasses::java_lang_StackOverflowError;
 jclass WellKnownClasses::java_lang_String;
+jclass WellKnownClasses::java_lang_StringFactory;
 jclass WellKnownClasses::java_lang_System;
 jclass WellKnownClasses::java_lang_Thread;
 jclass WellKnownClasses::java_lang_Thread__UncaughtExceptionHandler;
@@ -79,6 +80,38 @@
 jmethodID WellKnownClasses::java_lang_reflect_Proxy_invoke;
 jmethodID WellKnownClasses::java_lang_Runtime_nativeLoad;
 jmethodID WellKnownClasses::java_lang_Short_valueOf;
+jmethodID WellKnownClasses::java_lang_String_init;
+jmethodID WellKnownClasses::java_lang_String_init_B;
+jmethodID WellKnownClasses::java_lang_String_init_BI;
+jmethodID WellKnownClasses::java_lang_String_init_BII;
+jmethodID WellKnownClasses::java_lang_String_init_BIII;
+jmethodID WellKnownClasses::java_lang_String_init_BIIString;
+jmethodID WellKnownClasses::java_lang_String_init_BString;
+jmethodID WellKnownClasses::java_lang_String_init_BIICharset;
+jmethodID WellKnownClasses::java_lang_String_init_BCharset;
+jmethodID WellKnownClasses::java_lang_String_init_C;
+jmethodID WellKnownClasses::java_lang_String_init_CII;
+jmethodID WellKnownClasses::java_lang_String_init_IIC;
+jmethodID WellKnownClasses::java_lang_String_init_String;
+jmethodID WellKnownClasses::java_lang_String_init_StringBuffer;
+jmethodID WellKnownClasses::java_lang_String_init_III;
+jmethodID WellKnownClasses::java_lang_String_init_StringBuilder;
+jmethodID WellKnownClasses::java_lang_StringFactory_newEmptyString;
+jmethodID WellKnownClasses::java_lang_StringFactory_newStringFromBytes_B;
+jmethodID WellKnownClasses::java_lang_StringFactory_newStringFromBytes_BI;
+jmethodID WellKnownClasses::java_lang_StringFactory_newStringFromBytes_BII;
+jmethodID WellKnownClasses::java_lang_StringFactory_newStringFromBytes_BIII;
+jmethodID WellKnownClasses::java_lang_StringFactory_newStringFromBytes_BIIString;
+jmethodID WellKnownClasses::java_lang_StringFactory_newStringFromBytes_BString;
+jmethodID WellKnownClasses::java_lang_StringFactory_newStringFromBytes_BIICharset;
+jmethodID WellKnownClasses::java_lang_StringFactory_newStringFromBytes_BCharset;
+jmethodID WellKnownClasses::java_lang_StringFactory_newStringFromChars_C;
+jmethodID WellKnownClasses::java_lang_StringFactory_newStringFromChars_CII;
+jmethodID WellKnownClasses::java_lang_StringFactory_newStringFromChars_IIC;
+jmethodID WellKnownClasses::java_lang_StringFactory_newStringFromString;
+jmethodID WellKnownClasses::java_lang_StringFactory_newStringFromStringBuffer;
+jmethodID WellKnownClasses::java_lang_StringFactory_newStringFromCodePoints;
+jmethodID WellKnownClasses::java_lang_StringFactory_newStringFromStringBuilder;
 jmethodID WellKnownClasses::java_lang_System_runFinalization = nullptr;
 jmethodID WellKnownClasses::java_lang_Thread_init;
 jmethodID WellKnownClasses::java_lang_Thread_run;
@@ -188,6 +221,7 @@
   java_lang_RuntimeException = CacheClass(env, "java/lang/RuntimeException");
   java_lang_StackOverflowError = CacheClass(env, "java/lang/StackOverflowError");
   java_lang_String = CacheClass(env, "java/lang/String");
+  java_lang_StringFactory = CacheClass(env, "java/lang/StringFactory");
   java_lang_System = CacheClass(env, "java/lang/System");
   java_lang_Thread = CacheClass(env, "java/lang/Thread");
   java_lang_Thread__UncaughtExceptionHandler = CacheClass(env,
@@ -223,6 +257,62 @@
   org_apache_harmony_dalvik_ddmc_DdmServer_broadcast = CacheMethod(env, org_apache_harmony_dalvik_ddmc_DdmServer, true, "broadcast", "(I)V");
   org_apache_harmony_dalvik_ddmc_DdmServer_dispatch = CacheMethod(env, org_apache_harmony_dalvik_ddmc_DdmServer, true, "dispatch", "(I[BII)Lorg/apache/harmony/dalvik/ddmc/Chunk;");
 
+  java_lang_String_init = CacheMethod(env, java_lang_String, false, "<init>", "()V");
+  java_lang_String_init_B = CacheMethod(env, java_lang_String, false, "<init>", "([B)V");
+  java_lang_String_init_BI = CacheMethod(env, java_lang_String, false, "<init>", "([BI)V");
+  java_lang_String_init_BII = CacheMethod(env, java_lang_String, false, "<init>", "([BII)V");
+  java_lang_String_init_BIII = CacheMethod(env, java_lang_String, false, "<init>", "([BIII)V");
+  java_lang_String_init_BIIString = CacheMethod(env, java_lang_String, false, "<init>",
+      "([BIILjava/lang/String;)V");
+  java_lang_String_init_BString = CacheMethod(env, java_lang_String, false, "<init>",
+      "([BLjava/lang/String;)V");
+  java_lang_String_init_BIICharset = CacheMethod(env, java_lang_String, false, "<init>",
+      "([BIILjava/nio/charset/Charset;)V");
+  java_lang_String_init_BCharset = CacheMethod(env, java_lang_String, false, "<init>",
+      "([BLjava/nio/charset/Charset;)V");
+  java_lang_String_init_C = CacheMethod(env, java_lang_String, false, "<init>", "([C)V");
+  java_lang_String_init_CII = CacheMethod(env, java_lang_String, false, "<init>", "([CII)V");
+  java_lang_String_init_IIC = CacheMethod(env, java_lang_String, false, "<init>", "(II[C)V");
+  java_lang_String_init_String = CacheMethod(env, java_lang_String, false, "<init>",
+      "(Ljava/lang/String;)V");
+  java_lang_String_init_StringBuffer = CacheMethod(env, java_lang_String, false, "<init>",
+      "(Ljava/lang/StringBuffer;)V");
+  java_lang_String_init_III = CacheMethod(env, java_lang_String, false, "<init>", "([III)V");
+  java_lang_String_init_StringBuilder = CacheMethod(env, java_lang_String, false, "<init>",
+       "(Ljava/lang/StringBuilder;)V");
+  java_lang_StringFactory_newEmptyString = CacheMethod(env, java_lang_StringFactory, true,
+       "newEmptyString", "()Ljava/lang/String;");
+  java_lang_StringFactory_newStringFromBytes_B = CacheMethod(env, java_lang_StringFactory, true,
+       "newStringFromBytes", "([B)Ljava/lang/String;");
+  java_lang_StringFactory_newStringFromBytes_BI = CacheMethod(env, java_lang_StringFactory, true,
+       "newStringFromBytes", "([BI)Ljava/lang/String;");
+  java_lang_StringFactory_newStringFromBytes_BII = CacheMethod(env, java_lang_StringFactory, true,
+       "newStringFromBytes", "([BII)Ljava/lang/String;");
+  java_lang_StringFactory_newStringFromBytes_BIII = CacheMethod(env, java_lang_StringFactory, true,
+       "newStringFromBytes", "([BIII)Ljava/lang/String;");
+  java_lang_StringFactory_newStringFromBytes_BIIString = CacheMethod(env, java_lang_StringFactory,
+       true, "newStringFromBytes", "([BIILjava/lang/String;)Ljava/lang/String;");
+  java_lang_StringFactory_newStringFromBytes_BString = CacheMethod(env, java_lang_StringFactory,
+       true, "newStringFromBytes", "([BLjava/lang/String;)Ljava/lang/String;");
+  java_lang_StringFactory_newStringFromBytes_BIICharset = CacheMethod(env, java_lang_StringFactory,
+       true, "newStringFromBytes", "([BIILjava/nio/charset/Charset;)Ljava/lang/String;");
+  java_lang_StringFactory_newStringFromBytes_BCharset = CacheMethod(env, java_lang_StringFactory,
+       true, "newStringFromBytes", "([BLjava/nio/charset/Charset;)Ljava/lang/String;");
+  java_lang_StringFactory_newStringFromChars_C = CacheMethod(env, java_lang_StringFactory, true,
+       "newStringFromChars", "([C)Ljava/lang/String;");
+  java_lang_StringFactory_newStringFromChars_CII = CacheMethod(env, java_lang_StringFactory, true,
+       "newStringFromChars", "([CII)Ljava/lang/String;");
+  java_lang_StringFactory_newStringFromChars_IIC = CacheMethod(env, java_lang_StringFactory, true,
+       "newStringFromChars", "(II[C)Ljava/lang/String;");
+  java_lang_StringFactory_newStringFromString = CacheMethod(env, java_lang_StringFactory, true,
+       "newStringFromString", "(Ljava/lang/String;)Ljava/lang/String;");
+  java_lang_StringFactory_newStringFromStringBuffer = CacheMethod(env, java_lang_StringFactory,
+       true, "newStringFromStringBuffer", "(Ljava/lang/StringBuffer;)Ljava/lang/String;");
+  java_lang_StringFactory_newStringFromCodePoints = CacheMethod(env, java_lang_StringFactory,
+       true, "newStringFromCodePoints", "([III)Ljava/lang/String;");
+  java_lang_StringFactory_newStringFromStringBuilder = CacheMethod(env, java_lang_StringFactory,
+       true, "newStringFromStringBuilder", "(Ljava/lang/StringBuilder;)Ljava/lang/String;");
+
   dalvik_system_DexFile_cookie = CacheField(env, dalvik_system_DexFile, false, "mCookie", "Ljava/lang/Object;");
   dalvik_system_PathClassLoader_pathList = CacheField(env, dalvik_system_PathClassLoader, false, "pathList", "Ldalvik/system/DexPathList;");
   dalvik_system_DexPathList_dexElements = CacheField(env, dalvik_system_DexPathList, false, "dexElements", "[Ldalvik/system/DexPathList$Element;");
@@ -265,6 +355,8 @@
   java_lang_Integer_valueOf = CachePrimitiveBoxingMethod(env, 'I', "java/lang/Integer");
   java_lang_Long_valueOf = CachePrimitiveBoxingMethod(env, 'J', "java/lang/Long");
   java_lang_Short_valueOf = CachePrimitiveBoxingMethod(env, 'S', "java/lang/Short");
+
+  Thread::Current()->InitStringEntryPoints();
 }
 
 void WellKnownClasses::LateInit(JNIEnv* env) {
@@ -276,4 +368,43 @@
   return reinterpret_cast<mirror::Class*>(Thread::Current()->DecodeJObject(global_jclass));
 }
 
+jmethodID WellKnownClasses::StringInitToStringFactoryMethodID(jmethodID string_init) {
+  // TODO: Prioritize ordering.
+  if (string_init == java_lang_String_init) {
+    return java_lang_StringFactory_newEmptyString;
+  } else if (string_init == java_lang_String_init_B) {
+    return java_lang_StringFactory_newStringFromBytes_B;
+  } else if (string_init == java_lang_String_init_BI) {
+    return java_lang_StringFactory_newStringFromBytes_BI;
+  } else if (string_init == java_lang_String_init_BII) {
+    return java_lang_StringFactory_newStringFromBytes_BII;
+  } else if (string_init == java_lang_String_init_BIII) {
+    return java_lang_StringFactory_newStringFromBytes_BIII;
+  } else if (string_init == java_lang_String_init_BIIString) {
+    return java_lang_StringFactory_newStringFromBytes_BIIString;
+  } else if (string_init == java_lang_String_init_BString) {
+    return java_lang_StringFactory_newStringFromBytes_BString;
+  } else if (string_init == java_lang_String_init_BIICharset) {
+    return java_lang_StringFactory_newStringFromBytes_BIICharset;
+  } else if (string_init == java_lang_String_init_BCharset) {
+    return java_lang_StringFactory_newStringFromBytes_BCharset;
+  } else if (string_init == java_lang_String_init_C) {
+    return java_lang_StringFactory_newStringFromChars_C;
+  } else if (string_init == java_lang_String_init_CII) {
+    return java_lang_StringFactory_newStringFromChars_CII;
+  } else if (string_init == java_lang_String_init_IIC) {
+    return java_lang_StringFactory_newStringFromChars_IIC;
+  } else if (string_init == java_lang_String_init_String) {
+    return java_lang_StringFactory_newStringFromString;
+  } else if (string_init == java_lang_String_init_StringBuffer) {
+    return java_lang_StringFactory_newStringFromStringBuffer;
+  } else if (string_init == java_lang_String_init_III) {
+    return java_lang_StringFactory_newStringFromCodePoints;
+  } else if (string_init == java_lang_String_init_StringBuilder) {
+    return java_lang_StringFactory_newStringFromStringBuilder;
+  }
+  LOG(FATAL) << "Could not find StringFactory method for String.<init>";
+  return nullptr;
+}
+
 }  // namespace art
diff --git a/runtime/well_known_classes.h b/runtime/well_known_classes.h
index cef9d55..acb2656 100644
--- a/runtime/well_known_classes.h
+++ b/runtime/well_known_classes.h
@@ -35,6 +35,7 @@
  public:
   static void Init(JNIEnv* env);  // Run before native methods are registered.
   static void LateInit(JNIEnv* env);  // Run after native methods are registered.
+  static jmethodID StringInitToStringFactoryMethodID(jmethodID string_init);
 
   static mirror::Class* ToClass(jclass global_jclass)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -60,6 +61,7 @@
   static jclass java_lang_RuntimeException;
   static jclass java_lang_StackOverflowError;
   static jclass java_lang_String;
+  static jclass java_lang_StringFactory;
   static jclass java_lang_System;
   static jclass java_lang_Thread;
   static jclass java_lang_ThreadGroup;
@@ -90,6 +92,38 @@
   static jmethodID java_lang_reflect_Proxy_invoke;
   static jmethodID java_lang_Runtime_nativeLoad;
   static jmethodID java_lang_Short_valueOf;
+  static jmethodID java_lang_String_init;
+  static jmethodID java_lang_String_init_B;
+  static jmethodID java_lang_String_init_BI;
+  static jmethodID java_lang_String_init_BII;
+  static jmethodID java_lang_String_init_BIII;
+  static jmethodID java_lang_String_init_BIIString;
+  static jmethodID java_lang_String_init_BString;
+  static jmethodID java_lang_String_init_BIICharset;
+  static jmethodID java_lang_String_init_BCharset;
+  static jmethodID java_lang_String_init_C;
+  static jmethodID java_lang_String_init_CII;
+  static jmethodID java_lang_String_init_IIC;
+  static jmethodID java_lang_String_init_String;
+  static jmethodID java_lang_String_init_StringBuffer;
+  static jmethodID java_lang_String_init_III;
+  static jmethodID java_lang_String_init_StringBuilder;
+  static jmethodID java_lang_StringFactory_newEmptyString;
+  static jmethodID java_lang_StringFactory_newStringFromBytes_B;
+  static jmethodID java_lang_StringFactory_newStringFromBytes_BI;
+  static jmethodID java_lang_StringFactory_newStringFromBytes_BII;
+  static jmethodID java_lang_StringFactory_newStringFromBytes_BIII;
+  static jmethodID java_lang_StringFactory_newStringFromBytes_BIIString;
+  static jmethodID java_lang_StringFactory_newStringFromBytes_BString;
+  static jmethodID java_lang_StringFactory_newStringFromBytes_BIICharset;
+  static jmethodID java_lang_StringFactory_newStringFromBytes_BCharset;
+  static jmethodID java_lang_StringFactory_newStringFromChars_C;
+  static jmethodID java_lang_StringFactory_newStringFromChars_CII;
+  static jmethodID java_lang_StringFactory_newStringFromChars_IIC;
+  static jmethodID java_lang_StringFactory_newStringFromString;
+  static jmethodID java_lang_StringFactory_newStringFromStringBuffer;
+  static jmethodID java_lang_StringFactory_newStringFromCodePoints;
+  static jmethodID java_lang_StringFactory_newStringFromStringBuilder;
   static jmethodID java_lang_System_runFinalization;
   static jmethodID java_lang_Thread_init;
   static jmethodID java_lang_Thread_run;
diff --git a/test/004-JniTest/jni_test.cc b/test/004-JniTest/jni_test.cc
index b23b97b..cdc5461 100644
--- a/test/004-JniTest/jni_test.cc
+++ b/test/004-JniTest/jni_test.cc
@@ -548,3 +548,23 @@
 extern "C" void JNICALL Java_Main_testCallNonvirtual(JNIEnv* env, jclass) {
   JniCallNonvirtualVoidMethodTest(env).Test();
 }
+
+extern "C" JNIEXPORT void JNICALL Java_Main_testNewStringObject(JNIEnv* env, jclass) {
+  const char* string = "Test";
+  int length = strlen(string);
+  jclass c = env->FindClass("java/lang/String");
+  assert(c != NULL);
+  jmethodID method = env->GetMethodID(c, "<init>", "([B)V");
+  assert(method != NULL);
+  assert(!env->ExceptionCheck());
+  jbyteArray array = env->NewByteArray(length);
+  env->SetByteArrayRegion(array, 0, length, reinterpret_cast<const jbyte*>(string));
+  jobject o = env->NewObject(c, method, array);
+  assert(o != NULL);
+  jstring s = reinterpret_cast<jstring>(o);
+  assert(env->GetStringLength(s) == length);
+  assert(env->GetStringUTFLength(s) == length);
+  const char* chars = env->GetStringUTFChars(s, nullptr);
+  assert(strcmp(string, chars) == 0);
+  env->ReleaseStringUTFChars(s, chars);
+}
diff --git a/test/004-JniTest/src/Main.java b/test/004-JniTest/src/Main.java
index 8e92010..584fae3 100644
--- a/test/004-JniTest/src/Main.java
+++ b/test/004-JniTest/src/Main.java
@@ -33,6 +33,7 @@
         testShallowGetCallingClassLoader();
         testShallowGetStackClass2();
         testCallNonvirtual();
+        testNewStringObject();
     }
 
     private static native void testFindClassOnAttachedNativeThread();
@@ -184,6 +185,8 @@
     private static native void nativeTestShallowGetStackClass2();
 
     private static native void testCallNonvirtual();
+
+    private static native void testNewStringObject();
 }
 
 class JniCallNonvirtualTest {
diff --git a/test/021-string2/expected.txt b/test/021-string2/expected.txt
index bd7f049..a9c6eb8 100644
--- a/test/021-string2/expected.txt
+++ b/test/021-string2/expected.txt
@@ -1 +1,2 @@
 Got expected npe
+OK
diff --git a/test/021-string2/src/Main.java b/test/021-string2/src/Main.java
index 0239a3c..0226614 100644
--- a/test/021-string2/src/Main.java
+++ b/test/021-string2/src/Main.java
@@ -15,12 +15,13 @@
  */
 
 import junit.framework.Assert;
+import java.lang.reflect.Method;
 
 /**
  * more string tests
  */
 public class Main {
-    public static void main(String args[]) {
+    public static void main(String args[]) throws Exception {
         String test = "0123456789";
         String test1 = new String("0123456789");    // different object
         String test2 = new String("0123456780");    // different value
@@ -83,5 +84,10 @@
 
         Assert.assertEquals("this is a path", test.replaceAll("/", " "));
         Assert.assertEquals("this is a path", test.replace("/", " "));
+
+        Class Strings = Class.forName("com.android.org.bouncycastle.util.Strings");
+        Method fromUTF8ByteArray = Strings.getDeclaredMethod("fromUTF8ByteArray", byte[].class);
+        String result = (String) fromUTF8ByteArray.invoke(null, new byte[] {'O', 'K'});
+        System.out.println(result);
     }
 }
diff --git a/test/098-ddmc/src/Main.java b/test/098-ddmc/src/Main.java
index 962bd7f..f41ff2a 100644
--- a/test/098-ddmc/src/Main.java
+++ b/test/098-ddmc/src/Main.java
@@ -44,7 +44,7 @@
         System.out.println("Confirm when we overflow, we don't roll over to zero. b/17392248");
         final int overflowAllocations = 64 * 1024;  // Won't fit in unsigned 16-bit value.
         for (int i = 0; i < overflowAllocations; i++) {
-            new String("fnord");
+            new Object();
         }
         Allocations after = new Allocations(DdmVmInternal.getRecentAllocations());
         System.out.println("before < overflowAllocations=" + (before.numberOfEntries < overflowAllocations));
diff --git a/test/100-reflect2/expected.txt b/test/100-reflect2/expected.txt
index 1f8df1d..7db61a1 100644
--- a/test/100-reflect2/expected.txt
+++ b/test/100-reflect2/expected.txt
@@ -32,8 +32,8 @@
 62 (class java.lang.Long)
 14 (class java.lang.Short)
 [public java.lang.String(), java.lang.String(int,int,char[]), public java.lang.String(java.lang.String), public java.lang.String(java.lang.StringBuffer), public java.lang.String(java.lang.StringBuilder), public java.lang.String(byte[]), public java.lang.String(byte[],int), public java.lang.String(byte[],int,int), public java.lang.String(byte[],int,int,int), public java.lang.String(byte[],int,int,java.lang.String) throws java.io.UnsupportedEncodingException, public java.lang.String(byte[],int,int,java.nio.charset.Charset), public java.lang.String(byte[],java.lang.String) throws java.io.UnsupportedEncodingException, public java.lang.String(byte[],java.nio.charset.Charset), public java.lang.String(char[]), public java.lang.String(char[],int,int), public java.lang.String(int[],int,int)]
-[private final int java.lang.String.count, private int java.lang.String.hashCode, private final int java.lang.String.offset, private final char[] java.lang.String.value, private static final char[] java.lang.String.ASCII, public static final java.util.Comparator java.lang.String.CASE_INSENSITIVE_ORDER, private static final char java.lang.String.REPLACEMENT_CHAR, private static final long java.lang.String.serialVersionUID]
-[void java.lang.String._getChars(int,int,char[],int), public char java.lang.String.charAt(int), public int java.lang.String.codePointAt(int), public int java.lang.String.codePointBefore(int), public int java.lang.String.codePointCount(int,int), public int java.lang.String.compareTo(java.lang.Object), public native int java.lang.String.compareTo(java.lang.String), public int java.lang.String.compareToIgnoreCase(java.lang.String), public java.lang.String java.lang.String.concat(java.lang.String), public boolean java.lang.String.contains(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.StringBuffer), public boolean java.lang.String.endsWith(java.lang.String), public boolean java.lang.String.equals(java.lang.Object), public boolean java.lang.String.equalsIgnoreCase(java.lang.String), public void java.lang.String.getBytes(int,int,byte[],int), public [B java.lang.String.getBytes(), public [B java.lang.String.getBytes(java.lang.String) throws java.io.UnsupportedEncodingException, public [B java.lang.String.getBytes(java.nio.charset.Charset), public void java.lang.String.getChars(int,int,char[],int), public int java.lang.String.hashCode(), public int java.lang.String.indexOf(int), public int java.lang.String.indexOf(int,int), public int java.lang.String.indexOf(java.lang.String), public int java.lang.String.indexOf(java.lang.String,int), public native java.lang.String java.lang.String.intern(), public boolean java.lang.String.isEmpty(), public int java.lang.String.lastIndexOf(int), public int java.lang.String.lastIndexOf(int,int), public int java.lang.String.lastIndexOf(java.lang.String), public int java.lang.String.lastIndexOf(java.lang.String,int), public int java.lang.String.length(), public boolean java.lang.String.matches(java.lang.String), public int java.lang.String.offsetByCodePoints(int,int), public boolean java.lang.String.regionMatches(int,java.lang.String,int,int), public boolean java.lang.String.regionMatches(boolean,int,java.lang.String,int,int), public java.lang.String java.lang.String.replace(char,char), public java.lang.String java.lang.String.replace(java.lang.CharSequence,java.lang.CharSequence), public java.lang.String java.lang.String.replaceAll(java.lang.String,java.lang.String), public java.lang.String java.lang.String.replaceFirst(java.lang.String,java.lang.String), public [Ljava.lang.String; java.lang.String.split(java.lang.String), public [Ljava.lang.String; java.lang.String.split(java.lang.String,int), public boolean java.lang.String.startsWith(java.lang.String), public boolean java.lang.String.startsWith(java.lang.String,int), public java.lang.CharSequence java.lang.String.subSequence(int,int), public java.lang.String java.lang.String.substring(int), public java.lang.String java.lang.String.substring(int,int), public [C java.lang.String.toCharArray(), public java.lang.String java.lang.String.toLowerCase(), public java.lang.String java.lang.String.toLowerCase(java.util.Locale), public java.lang.String java.lang.String.toString(), public java.lang.String java.lang.String.toUpperCase(), public java.lang.String java.lang.String.toUpperCase(java.util.Locale), public java.lang.String java.lang.String.trim(), public static java.lang.String java.lang.String.copyValueOf(char[]), public static java.lang.String java.lang.String.copyValueOf(char[],int,int), private java.lang.StringIndexOutOfBoundsException java.lang.String.failedBoundsCheck(int,int,int), private native int java.lang.String.fastIndexOf(int,int), private char java.lang.String.foldCase(char), public static java.lang.String java.lang.String.format(java.lang.String,java.lang.Object[]), public static java.lang.String java.lang.String.format(java.util.Locale,java.lang.String,java.lang.Object[]), private java.lang.StringIndexOutOfBoundsException java.lang.String.indexAndLength(int), private static int java.lang.String.indexOf(java.lang.String,java.lang.String,int,int,char), private int java.lang.String.indexOfSupplementary(int,int), private int java.lang.String.lastIndexOfSupplementary(int,int), private java.lang.StringIndexOutOfBoundsException java.lang.String.startEndAndLength(int,int), public static java.lang.String java.lang.String.valueOf(char), public static java.lang.String java.lang.String.valueOf(double), public static java.lang.String java.lang.String.valueOf(float), public static java.lang.String java.lang.String.valueOf(int), public static java.lang.String java.lang.String.valueOf(long), public static java.lang.String java.lang.String.valueOf(java.lang.Object), public static java.lang.String java.lang.String.valueOf(boolean), public static java.lang.String java.lang.String.valueOf(char[]), public static java.lang.String java.lang.String.valueOf(char[],int,int)]
+[private final int java.lang.String.count, private int java.lang.String.hashCode, private static final char[] java.lang.String.ASCII, public static final java.util.Comparator java.lang.String.CASE_INSENSITIVE_ORDER, private static final char java.lang.String.REPLACEMENT_CHAR, private static final long java.lang.String.serialVersionUID]
+[public native char java.lang.String.charAt(int), public int java.lang.String.codePointAt(int), public int java.lang.String.codePointBefore(int), public int java.lang.String.codePointCount(int,int), public int java.lang.String.compareTo(java.lang.Object), public native int java.lang.String.compareTo(java.lang.String), public int java.lang.String.compareToIgnoreCase(java.lang.String), public native java.lang.String java.lang.String.concat(java.lang.String), public boolean java.lang.String.contains(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.StringBuffer), public boolean java.lang.String.endsWith(java.lang.String), public boolean java.lang.String.equals(java.lang.Object), public boolean java.lang.String.equalsIgnoreCase(java.lang.String), public void java.lang.String.getBytes(int,int,byte[],int), public [B java.lang.String.getBytes(), public [B java.lang.String.getBytes(java.lang.String) throws java.io.UnsupportedEncodingException, public [B java.lang.String.getBytes(java.nio.charset.Charset), public void java.lang.String.getChars(int,int,char[],int), native void java.lang.String.getCharsNoCheck(int,int,char[],int), public int java.lang.String.hashCode(), public int java.lang.String.indexOf(int), public int java.lang.String.indexOf(int,int), public int java.lang.String.indexOf(java.lang.String), public int java.lang.String.indexOf(java.lang.String,int), public native java.lang.String java.lang.String.intern(), public boolean java.lang.String.isEmpty(), public int java.lang.String.lastIndexOf(int), public int java.lang.String.lastIndexOf(int,int), public int java.lang.String.lastIndexOf(java.lang.String), public int java.lang.String.lastIndexOf(java.lang.String,int), public int java.lang.String.length(), public boolean java.lang.String.matches(java.lang.String), public int java.lang.String.offsetByCodePoints(int,int), public boolean java.lang.String.regionMatches(int,java.lang.String,int,int), public boolean java.lang.String.regionMatches(boolean,int,java.lang.String,int,int), public java.lang.String java.lang.String.replace(char,char), public java.lang.String java.lang.String.replace(java.lang.CharSequence,java.lang.CharSequence), public java.lang.String java.lang.String.replaceAll(java.lang.String,java.lang.String), public java.lang.String java.lang.String.replaceFirst(java.lang.String,java.lang.String), native void java.lang.String.setCharAt(int,char), public [Ljava.lang.String; java.lang.String.split(java.lang.String), public [Ljava.lang.String; java.lang.String.split(java.lang.String,int), public boolean java.lang.String.startsWith(java.lang.String), public boolean java.lang.String.startsWith(java.lang.String,int), public java.lang.CharSequence java.lang.String.subSequence(int,int), public java.lang.String java.lang.String.substring(int), public java.lang.String java.lang.String.substring(int,int), public native [C java.lang.String.toCharArray(), public java.lang.String java.lang.String.toLowerCase(), public java.lang.String java.lang.String.toLowerCase(java.util.Locale), public java.lang.String java.lang.String.toString(), public java.lang.String java.lang.String.toUpperCase(), public java.lang.String java.lang.String.toUpperCase(java.util.Locale), public java.lang.String java.lang.String.trim(), public static java.lang.String java.lang.String.copyValueOf(char[]), public static java.lang.String java.lang.String.copyValueOf(char[],int,int), private java.lang.StringIndexOutOfBoundsException java.lang.String.failedBoundsCheck(int,int,int), private native int java.lang.String.fastIndexOf(int,int), private native java.lang.String java.lang.String.fastSubstring(int,int), private char java.lang.String.foldCase(char), public static java.lang.String java.lang.String.format(java.lang.String,java.lang.Object[]), public static java.lang.String java.lang.String.format(java.util.Locale,java.lang.String,java.lang.Object[]), private java.lang.StringIndexOutOfBoundsException java.lang.String.indexAndLength(int), private static int java.lang.String.indexOf(java.lang.String,java.lang.String,int,int,char), private int java.lang.String.indexOfSupplementary(int,int), private int java.lang.String.lastIndexOfSupplementary(int,int), private java.lang.StringIndexOutOfBoundsException java.lang.String.startEndAndLength(int,int), public static java.lang.String java.lang.String.valueOf(char), public static java.lang.String java.lang.String.valueOf(double), public static java.lang.String java.lang.String.valueOf(float), public static java.lang.String java.lang.String.valueOf(int), public static java.lang.String java.lang.String.valueOf(long), public static java.lang.String java.lang.String.valueOf(java.lang.Object), public static java.lang.String java.lang.String.valueOf(boolean), public static java.lang.String java.lang.String.valueOf(char[]), public static java.lang.String java.lang.String.valueOf(char[],int,int)]
 []
 [interface java.io.Serializable, interface java.lang.Comparable, interface java.lang.CharSequence]
 0
diff --git a/test/115-native-bridge/expected.txt b/test/115-native-bridge/expected.txt
index 16a71e4..deb70ba 100644
--- a/test/115-native-bridge/expected.txt
+++ b/test/115-native-bridge/expected.txt
@@ -4,7 +4,7 @@
 Ready for native bridge tests.
 Checking for support.
 Getting trampoline for JNI_OnLoad with shorty (null).
-Test ART callbacks: all JNI function number is 9.
+Test ART callbacks: all JNI function number is 10.
     name:booleanMethod, signature:(ZZZZZZZZZZ)Z, shorty:ZZZZZZZZZZZ.
     name:byteMethod, signature:(BBBBBBBBBB)B, shorty:BBBBBBBBBBB.
     name:charMethod, signature:(CCCCCCCCCC)C, shorty:CCCCCCCCCCC.
@@ -13,6 +13,7 @@
     name:testFindClassOnAttachedNativeThread, signature:()V, shorty:V.
     name:testFindFieldOnAttachedNativeThreadNative, signature:()V, shorty:V.
     name:testGetMirandaMethodNative, signature:()Ljava/lang/reflect/Method;, shorty:L.
+    name:testNewStringObject, signature:()V, shorty:V.
     name:testZeroLengthByteBuffers, signature:()V, shorty:V.
 trampoline_JNI_OnLoad called!
 Getting trampoline for Java_Main_testFindClassOnAttachedNativeThread with shorty V.
@@ -55,3 +56,5 @@
 trampoline_Java_Main_charMethod called!
 trampoline_Java_Main_charMethod called!
 trampoline_Java_Main_charMethod called!
+Getting trampoline for Java_Main_testNewStringObject with shorty V.
+trampoline_Java_Main_testNewStringObject called!
diff --git a/test/115-native-bridge/nativebridge.cc b/test/115-native-bridge/nativebridge.cc
index 6bcc1f5..24e9600 100644
--- a/test/115-native-bridge/nativebridge.cc
+++ b/test/115-native-bridge/nativebridge.cc
@@ -122,6 +122,14 @@
   return fnPtr(env, klass);
 }
 
+static void trampoline_Java_Main_testNewStringObject(JNIEnv* env, jclass klass) {
+  typedef void (*FnPtr_t)(JNIEnv*, jclass);
+  FnPtr_t fnPtr = reinterpret_cast<FnPtr_t>
+    (find_native_bridge_method("testNewStringObject")->fnPtr);
+  printf("%s called!\n", __FUNCTION__);
+  return fnPtr(env, klass);
+}
+
 static void trampoline_Java_Main_testZeroLengthByteBuffers(JNIEnv* env, jclass klass) {
   typedef void (*FnPtr_t)(JNIEnv*, jclass);
   FnPtr_t fnPtr = reinterpret_cast<FnPtr_t>
@@ -190,6 +198,8 @@
     reinterpret_cast<void*>(trampoline_Java_Main_testFindFieldOnAttachedNativeThreadNative) },
   { "testGetMirandaMethodNative", "()Ljava/lang/reflect/Method;", true, nullptr,
     reinterpret_cast<void*>(trampoline_Java_Main_testGetMirandaMethodNative) },
+  { "testNewStringObject", "()V", true, nullptr,
+    reinterpret_cast<void*>(trampoline_Java_Main_testNewStringObject) },
   { "testZeroLengthByteBuffers", "()V", true, nullptr,
     reinterpret_cast<void*>(trampoline_Java_Main_testZeroLengthByteBuffers) },
 };
diff --git a/test/115-native-bridge/src/NativeBridgeMain.java b/test/115-native-bridge/src/NativeBridgeMain.java
index 2405627..c843707 100644
--- a/test/115-native-bridge/src/NativeBridgeMain.java
+++ b/test/115-native-bridge/src/NativeBridgeMain.java
@@ -31,6 +31,7 @@
         testBooleanMethod();
         testCharMethod();
         testEnvironment();
+        testNewStringObject();
     }
 
     public static native void testFindClassOnAttachedNativeThread();
@@ -167,6 +168,8 @@
       //   throw new AssertionError("unexpected value for supported_abis");
       // }
     }
+
+    private static native void testNewStringObject();
 }
 
 public class NativeBridgeMain {
diff --git a/test/201-built-in-exception-detail-messages/src/Main.java b/test/201-built-in-exception-detail-messages/src/Main.java
index 24ee6e0..52d4259 100644
--- a/test/201-built-in-exception-detail-messages/src/Main.java
+++ b/test/201-built-in-exception-detail-messages/src/Main.java
@@ -358,7 +358,8 @@
       field.set(new A(), 5);
       fail();
     } catch (IllegalArgumentException expected) {
-      assertEquals("field A.b has type java.lang.String, got java.lang.Integer", expected.getMessage());
+      assertEquals("field A.b has type java.lang.String, got java.lang.Integer",
+          expected.getMessage());
     }
 
     // Can't unbox null to a primitive.
@@ -385,7 +386,8 @@
       m.invoke(new A(), 2, 2);
       fail();
     } catch (IllegalArgumentException expected) {
-      assertEquals("method A.m argument 2 has type java.lang.String, got java.lang.Integer", expected.getMessage());
+      assertEquals("method A.m argument 2 has type java.lang.String, got java.lang.Integer",
+          expected.getMessage());
     }
 
     // Can't pass null as an int.
@@ -409,21 +411,24 @@
       m.invoke("hello", "world"); // Wrong type.
       fail();
     } catch (IllegalArgumentException iae) {
-      assertEquals("method java.lang.String.charAt argument 1 has type int, got java.lang.String", iae.getMessage());
+      assertEquals("method java.lang.String.charAt! argument 1 has type int, got java.lang.String",
+          iae.getMessage());
     }
     try {
       Method m = String.class.getMethod("charAt", int.class);
       m.invoke("hello", (Object) null); // Null for a primitive argument.
       fail();
     } catch (IllegalArgumentException iae) {
-      assertEquals("method java.lang.String.charAt argument 1 has type int, got null", iae.getMessage());
+      assertEquals("method java.lang.String.charAt! argument 1 has type int, got null",
+          iae.getMessage());
     }
     try {
       Method m = String.class.getMethod("charAt", int.class);
       m.invoke(new Integer(5)); // Wrong type for 'this'.
       fail();
     } catch (IllegalArgumentException iae) {
-      assertEquals("Expected receiver of type java.lang.String, but got java.lang.Integer", iae.getMessage());
+      assertEquals("Expected receiver of type java.lang.String, but got java.lang.Integer",
+          iae.getMessage());
     }
     try {
       Method m = String.class.getMethod("charAt", int.class);
diff --git a/test/449-checker-bce/expected.txt b/test/449-checker-bce/expected.txt
index 29d6383..e69de29 100644
--- a/test/449-checker-bce/expected.txt
+++ b/test/449-checker-bce/expected.txt
@@ -1 +0,0 @@
-100
diff --git a/test/449-checker-bce/src/Main.java b/test/449-checker-bce/src/Main.java
index 17039a3..f90d85d 100644
--- a/test/449-checker-bce/src/Main.java
+++ b/test/449-checker-bce/src/Main.java
@@ -608,6 +608,380 @@
   }
 
 
+  int sum;
+
+  // CHECK-START: void Main.foo1(int[], int, int) BCE (before)
+  // CHECK: BoundsCheck
+  // CHECK: ArraySet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+
+  // CHECK-START: void Main.foo1(int[], int, int) BCE (after)
+  // CHECK: Deoptimize
+  // CHECK: Deoptimize
+  // CHECK: Deoptimize
+  // CHECK-NOT: Deoptimize
+  // CHECK: Phi
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArraySet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+
+  void foo1(int[] array, int start, int end) {
+    // Three HDeoptimize will be added. One for
+    // start >= 0, one for end <= array.length,
+    // and one for null check on array (to hoist null
+    // check and array.length out of loop).
+    for (int i = start ; i < end; i++) {
+      array[i] = 1;
+      sum += array[i];
+    }
+  }
+
+
+  // CHECK-START: void Main.foo2(int[], int, int) BCE (before)
+  // CHECK: BoundsCheck
+  // CHECK: ArraySet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+
+  // CHECK-START: void Main.foo2(int[], int, int) BCE (after)
+  // CHECK: Deoptimize
+  // CHECK: Deoptimize
+  // CHECK: Deoptimize
+  // CHECK-NOT: Deoptimize
+  // CHECK: Phi
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArraySet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+
+  void foo2(int[] array, int start, int end) {
+    // Three HDeoptimize will be added. One for
+    // start >= 0, one for end <= array.length,
+    // and one for null check on array (to hoist null
+    // check and array.length out of loop).
+    for (int i = start ; i <= end; i++) {
+      array[i] = 1;
+      sum += array[i];
+    }
+  }
+
+
+  // CHECK-START: void Main.foo3(int[], int) BCE (before)
+  // CHECK: BoundsCheck
+  // CHECK: ArraySet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+
+  // CHECK-START: void Main.foo3(int[], int) BCE (after)
+  // CHECK: Deoptimize
+  // CHECK: Deoptimize
+  // CHECK-NOT: Deoptimize
+  // CHECK: Phi
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArraySet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+
+  void foo3(int[] array, int end) {
+    // Two HDeoptimize will be added. One for end < array.length,
+    // and one for null check on array (to hoist null check
+    // and array.length out of loop).
+    for (int i = 3 ; i <= end; i++) {
+      array[i] = 1;
+      sum += array[i];
+    }
+  }
+
+  // CHECK-START: void Main.foo4(int[], int) BCE (before)
+  // CHECK: BoundsCheck
+  // CHECK: ArraySet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+
+  // CHECK-START: void Main.foo4(int[], int) BCE (after)
+  // CHECK: Deoptimize
+  // CHECK: Deoptimize
+  // CHECK-NOT: Deoptimize
+  // CHECK: Phi
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArraySet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+
+  void foo4(int[] array, int end) {
+    // Two HDeoptimize will be added. One for end <= array.length,
+    // and one for null check on array (to hoist null check
+    // and array.length out of loop).
+    for (int i = end ; i > 0; i--) {
+      array[i - 1] = 1;
+      sum += array[i - 1];
+    }
+  }
+
+
+  // CHECK-START: void Main.foo5(int[], int) BCE (before)
+  // CHECK: BoundsCheck
+  // CHECK: ArraySet
+  // CHECK: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK: BoundsCheck
+  // CHECK: ArrayGet
+
+  // CHECK-START: void Main.foo5(int[], int) BCE (after)
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArraySet
+  // CHECK: Deoptimize
+  // CHECK-NOT: Deoptimize
+  // CHECK: Phi
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+
+  void foo5(int[] array, int end) {
+    // Bounds check in this loop can be eliminated without deoptimization.
+    for (int i = array.length - 1 ; i >= 0; i--) {
+      array[i] = 1;
+    }
+    // One HDeoptimize will be added.
+    // It's for (end - 2 <= array.length - 2).
+    for (int i = end - 2 ; i > 0; i--) {
+      sum += array[i - 1];
+      sum += array[i];
+      sum += array[i + 1];
+    }
+  }
+
+
+  // CHECK-START: void Main.foo6(int[], int, int) BCE (before)
+  // CHECK: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArraySet
+
+  // CHECK-START: void Main.foo6(int[], int, int) BCE (after)
+  // CHECK: Deoptimize
+  // CHECK: Deoptimize
+  // CHECK: Deoptimize
+  // CHECK-NOT: Deoptimize
+  // CHECK: Phi
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArraySet
+
+  void foo6(int[] array, int start, int end) {
+    // Three HDeoptimize will be added. One for
+    // start >= 2, one for end <= array.length - 3,
+    // and one for null check on array (to hoist null
+    // check and array.length out of loop).
+    for (int i = end; i >= start; i--) {
+      array[i] = (array[i-2] + array[i-1] + array[i] + array[i+1] + array[i+2]) / 5;
+    }
+  }
+
+
+  // CHECK-START: void Main.foo7(int[], int, int, boolean) BCE (before)
+  // CHECK: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK: BoundsCheck
+  // CHECK: ArrayGet
+
+  // CHECK-START: void Main.foo7(int[], int, int, boolean) BCE (after)
+  // CHECK: Deoptimize
+  // CHECK: Deoptimize
+  // CHECK: Deoptimize
+  // CHECK-NOT: Deoptimize
+  // CHECK: Phi
+  // CHECK: BoundsCheck
+  // CHECK: ArrayGet
+  // CHECK-NOT: BoundsCheck
+  // CHECK: ArrayGet
+
+  void foo7(int[] array, int start, int end, boolean lowEnd) {
+    // Three HDeoptimize will be added. One for
+    // start >= 0, one for end <= array.length,
+    // and one for null check on array (to hoist null
+    // check and array.length out of loop).
+    for (int i = start ; i < end; i++) {
+      if (lowEnd) {
+        // This array access isn't certain. So we don't
+        // use +1000 offset in decision making for deoptimization
+        // conditions.
+        sum += array[i + 1000];
+      }
+      sum += array[i];
+    }
+  }
+
+
+  // CHECK-START: void Main.partialLooping(int[], int, int) BCE (before)
+  // CHECK: BoundsCheck
+  // CHECK: ArraySet
+
+  // CHECK-START: void Main.partialLooping(int[], int, int) BCE (after)
+  // CHECK-NOT: Deoptimize
+  // CHECK: BoundsCheck
+  // CHECK: ArraySet
+
+  void partialLooping(int[] array, int start, int end) {
+    // This loop doesn't cover the full range of [start, end) so
+    // adding deoptimization is too aggressive, since end can be
+    // greater than array.length but the loop is never going to work on
+    // more than 2 elements.
+    for (int i = start; i < end; i++) {
+      if (i == 2) {
+        return;
+      }
+      array[i] = 1;
+    }
+  }
+
+
+  static void testUnknownBounds() {
+    boolean caught = false;
+    Main main = new Main();
+    main.foo1(new int[10], 0, 10);
+    if (main.sum != 10) {
+      System.out.println("foo1 failed!");
+    }
+
+    caught = false;
+    main = new Main();
+    try {
+      main.foo1(new int[10], 0, 11);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      caught = true;
+    }
+    if (!caught || main.sum != 10) {
+      System.out.println("foo1 exception failed!");
+    }
+
+    main = new Main();
+    main.foo2(new int[10], 0, 9);
+    if (main.sum != 10) {
+      System.out.println("foo2 failed!");
+    }
+
+    caught = false;
+    main = new Main();
+    try {
+      main.foo2(new int[10], 0, 10);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      caught = true;
+    }
+    if (!caught || main.sum != 10) {
+      System.out.println("foo2 exception failed!");
+    }
+
+    main = new Main();
+    main.foo3(new int[10], 9);
+    if (main.sum != 7) {
+      System.out.println("foo3 failed!");
+    }
+
+    caught = false;
+    main = new Main();
+    try {
+      main.foo3(new int[10], 10);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      caught = true;
+    }
+    if (!caught || main.sum != 7) {
+      System.out.println("foo3 exception failed!");
+    }
+
+    main = new Main();
+    main.foo4(new int[10], 10);
+    if (main.sum != 10) {
+      System.out.println("foo4 failed!");
+    }
+
+    caught = false;
+    main = new Main();
+    try {
+      main.foo4(new int[10], 11);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      caught = true;
+    }
+    if (!caught || main.sum != 0) {
+      System.out.println("foo4 exception failed!");
+    }
+
+    main = new Main();
+    main.foo5(new int[10], 10);
+    if (main.sum != 24) {
+      System.out.println("foo5 failed!");
+    }
+
+    caught = false;
+    main = new Main();
+    try {
+      main.foo5(new int[10], 11);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      caught = true;
+    }
+    if (!caught || main.sum != 2) {
+      System.out.println("foo5 exception failed!");
+    }
+
+    main = new Main();
+    main.foo6(new int[10], 2, 7);
+
+    main = new Main();
+    int[] array = new int[4];
+    main.partialLooping(new int[3], 0, 4);
+    if ((array[0] != 1) && (array[1] != 1) &&
+        (array[2] != 0) && (array[3] != 0)) {
+      System.out.println("partialLooping failed!");
+    }
+
+    caught = false;
+    main = new Main();
+    try {
+      main.foo6(new int[10], 2, 8);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      caught = true;
+    }
+    if (!caught) {
+      System.out.println("foo6 exception failed!");
+    }
+
+    caught = false;
+    main = new Main();
+    try {
+      main.foo6(new int[10], 1, 7);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      caught = true;
+    }
+    if (!caught) {
+      System.out.println("foo6 exception failed!");
+    }
+
+  }
+
   // Make sure this method is compiled with optimizing.
   // CHECK-START: void Main.main(java.lang.String[]) register (after)
   // CHECK: ParallelMove
@@ -643,7 +1017,11 @@
 
     // Make sure this value is kept after deoptimization.
     int i = 1;
-    System.out.println(foo() + i);
+    if (foo() + i != 100) {
+      System.out.println("foo failed!");
+    };
+
+    testUnknownBounds();
   }
 
 }
diff --git a/test/458-checker-instruction-simplification/src/Main.java b/test/458-checker-instruction-simplification/src/Main.java
index f0578ef..5d5a6b3 100644
--- a/test/458-checker-instruction-simplification/src/Main.java
+++ b/test/458-checker-instruction-simplification/src/Main.java
@@ -34,6 +34,18 @@
     }
   }
 
+  public static void assertFloatEquals(float expected, float result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertDoubleEquals(double expected, double result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
   /**
    * Tiny programs exercising optimizations of arithmetic identities.
    */
@@ -918,8 +930,86 @@
   // CHECK:                            BooleanNot
   // CHECK-NOT:                        BooleanNot
 
+  public static boolean NegateValue(boolean arg) {
+    return !arg;
+  }
+
   public static boolean NotNotBool(boolean arg) {
-    return !(!arg);
+    return !(NegateValue(arg));
+  }
+
+  // CHECK-START: float Main.Div2(float) instruction_simplifier (before)
+  // CHECK-DAG:      [[Arg:f\d+]]      ParameterValue
+  // CHECK-DAG:      [[Const2:f\d+]]   FloatConstant 2
+  // CHECK-DAG:      [[Div:f\d+]]      Div [ [[Arg]] [[Const2]] ]
+  // CHECK-DAG:                        Return [ [[Div]] ]
+
+  // CHECK-START: float Main.Div2(float) instruction_simplifier (after)
+  // CHECK-DAG:      [[Arg:f\d+]]      ParameterValue
+  // CHECK-DAG:      [[ConstP5:f\d+]]  FloatConstant 0.5
+  // CHECK-DAG:      [[Mul:f\d+]]      Mul [ [[Arg]] [[ConstP5]] ]
+  // CHECK-DAG:                        Return [ [[Mul]] ]
+
+  // CHECK-START: float Main.Div2(float) instruction_simplifier (after)
+  // CHECK-NOT:                        Div
+
+  public static float Div2(float arg) {
+    return arg / 2.0f;
+  }
+
+  // CHECK-START: double Main.Div2(double) instruction_simplifier (before)
+  // CHECK-DAG:      [[Arg:d\d+]]      ParameterValue
+  // CHECK-DAG:      [[Const2:d\d+]]   DoubleConstant 2
+  // CHECK-DAG:      [[Div:d\d+]]      Div [ [[Arg]] [[Const2]] ]
+  // CHECK-DAG:                        Return [ [[Div]] ]
+
+  // CHECK-START: double Main.Div2(double) instruction_simplifier (after)
+  // CHECK-DAG:      [[Arg:d\d+]]      ParameterValue
+  // CHECK-DAG:      [[ConstP5:d\d+]]  DoubleConstant 0.5
+  // CHECK-DAG:      [[Mul:d\d+]]      Mul [ [[Arg]] [[ConstP5]] ]
+  // CHECK-DAG:                        Return [ [[Mul]] ]
+
+  // CHECK-START: double Main.Div2(double) instruction_simplifier (after)
+  // CHECK-NOT:                        Div
+  public static double Div2(double arg) {
+    return arg / 2.0;
+  }
+
+  // CHECK-START: float Main.DivMP25(float) instruction_simplifier (before)
+  // CHECK-DAG:      [[Arg:f\d+]]      ParameterValue
+  // CHECK-DAG:      [[ConstMP25:f\d+]]   FloatConstant -0.25
+  // CHECK-DAG:      [[Div:f\d+]]      Div [ [[Arg]] [[ConstMP25]] ]
+  // CHECK-DAG:                        Return [ [[Div]] ]
+
+  // CHECK-START: float Main.DivMP25(float) instruction_simplifier (after)
+  // CHECK-DAG:      [[Arg:f\d+]]      ParameterValue
+  // CHECK-DAG:      [[ConstM4:f\d+]]  FloatConstant -4
+  // CHECK-DAG:      [[Mul:f\d+]]      Mul [ [[Arg]] [[ConstM4]] ]
+  // CHECK-DAG:                        Return [ [[Mul]] ]
+
+  // CHECK-START: float Main.DivMP25(float) instruction_simplifier (after)
+  // CHECK-NOT:                        Div
+
+  public static float DivMP25(float arg) {
+    return arg / -0.25f;
+  }
+
+  // CHECK-START: double Main.DivMP25(double) instruction_simplifier (before)
+  // CHECK-DAG:      [[Arg:d\d+]]      ParameterValue
+  // CHECK-DAG:      [[ConstMP25:d\d+]]   DoubleConstant -0.25
+  // CHECK-DAG:      [[Div:d\d+]]      Div [ [[Arg]] [[ConstMP25]] ]
+  // CHECK-DAG:                        Return [ [[Div]] ]
+
+  // CHECK-START: double Main.DivMP25(double) instruction_simplifier (after)
+  // CHECK-DAG:      [[Arg:d\d+]]      ParameterValue
+  // CHECK-DAG:      [[ConstM4:d\d+]]  DoubleConstant -4
+  // CHECK-DAG:      [[Mul:d\d+]]      Mul [ [[Arg]] [[ConstM4]] ]
+  // CHECK-DAG:                        Return [ [[Mul]] ]
+
+  // CHECK-START: double Main.DivMP25(double) instruction_simplifier (after)
+  // CHECK-NOT:                        Div
+  public static double DivMP25(double arg) {
+    return arg / -0.25f;
   }
 
   public static void main(String[] args) {
@@ -956,7 +1046,6 @@
     assertIntEquals(SubNeg1(arg, arg + 1), -(arg + arg + 1));
     assertIntEquals(SubNeg2(arg, arg + 1), -(arg + arg + 1));
     assertLongEquals(SubNeg3(arg, arg + 1), -(2 * arg + 1));
-
     assertIntEquals(EqualTrueRhs(true), 5);
     assertIntEquals(EqualTrueLhs(true), 5);
     assertIntEquals(EqualFalseRhs(true), 3);
@@ -967,5 +1056,9 @@
     assertIntEquals(NotEqualFalseLhs(true), 5);
     assertBooleanEquals(NotNotBool(true), true);
     assertBooleanEquals(NotNotBool(false), false);
+    assertFloatEquals(Div2(100.0f), 50.0f);
+    assertDoubleEquals(Div2(150.0), 75.0);
+    assertFloatEquals(DivMP25(100.0f), -400.0f);
+    assertDoubleEquals(DivMP25(150.0), -600.0);
   }
 }
diff --git a/test/463-checker-boolean-simplifier/src/Main.java b/test/463-checker-boolean-simplifier/src/Main.java
index 3daf693..4346103 100644
--- a/test/463-checker-boolean-simplifier/src/Main.java
+++ b/test/463-checker-boolean-simplifier/src/Main.java
@@ -26,6 +26,12 @@
     }
   }
 
+  public static void assertIntEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
   /*
    * Elementary test negating a boolean. Verifies that blocks are merged and
    * empty branches removed.
@@ -155,6 +161,36 @@
     return (x <= y) == (y <= z);
   }
 
+  // CHECK-START: int Main.NegatedCondition(boolean) boolean_simplifier (before)
+  // CHECK-DAG:     [[Param:z\d+]]    ParameterValue
+  // CHECK-DAG:     [[Const42:i\d+]]  IntConstant 42
+  // CHECK-DAG:     [[Const43:i\d+]]  IntConstant 43
+  // CHECK-DAG:     [[NotParam:z\d+]] BooleanNot [ [[Param]] ]
+  // CHECK-DAG:                       If [ [[NotParam]] ]
+  // CHECK-DAG:     [[Phi:i\d+]]      Phi [ [[Const42]] [[Const43]] ]
+  // CHECK-DAG:                       Return [ [[Phi]] ]
+
+  // CHECK-START: int Main.NegatedCondition(boolean) boolean_simplifier (after)
+  // CHECK-DAG:     [[Param:z\d+]]    ParameterValue
+  // CHECK-DAG:     [[Const42:i\d+]]  IntConstant 42
+  // CHECK-DAG:     [[Const43:i\d+]]  IntConstant 43
+  // CHECK-DAG:                       If [ [[Param]] ]
+  // CHECK-DAG:     [[Phi:i\d+]]      Phi [ [[Const42]] [[Const43]] ]
+  // CHECK-DAG:                       Return [ [[Phi]] ]
+
+  // Note: The fact that branches are swapped is verified by running the test.
+
+  // CHECK-START: int Main.NegatedCondition(boolean) boolean_simplifier (after)
+  // CHECK-NOT:                       BooleanNot
+
+  public static int NegatedCondition(boolean x) {
+    if (x != false) {
+      return 42;
+    } else {
+      return 43;
+    }
+  }
+
   public static void main(String[] args) {
     assertBoolEquals(false, BooleanNot(true));
     assertBoolEquals(true, BooleanNot(false));
@@ -171,5 +207,7 @@
     assertBoolEquals(true, ValuesOrdered(3, 3, 3));
     assertBoolEquals(true, ValuesOrdered(3, 3, 5));
     assertBoolEquals(false, ValuesOrdered(5, 5, 3));
+    assertIntEquals(42, NegatedCondition(true));
+    assertIntEquals(43, NegatedCondition(false));
   }
 }
diff --git a/test/482-checker-loop-back-edge-use/expected.txt b/test/482-checker-loop-back-edge-use/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/482-checker-loop-back-edge-use/expected.txt
diff --git a/test/482-checker-loop-back-edge-use/info.txt b/test/482-checker-loop-back-edge-use/info.txt
new file mode 100644
index 0000000..f7fdeff
--- /dev/null
+++ b/test/482-checker-loop-back-edge-use/info.txt
@@ -0,0 +1,2 @@
+Tests the register allocator's optimization of adding synthesized uses
+at back edges.
diff --git a/test/482-checker-loop-back-edge-use/src/Main.java b/test/482-checker-loop-back-edge-use/src/Main.java
new file mode 100644
index 0000000..74184e8
--- /dev/null
+++ b/test/482-checker-loop-back-edge-use/src/Main.java
@@ -0,0 +1,131 @@
+/*
+* Copyright (C) 2015 The Android Open Source Project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*      http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+
+public class Main {
+
+  // CHECK-START: void Main.loop1(boolean) liveness (after)
+  // CHECK:         ParameterValue (liveness: 2 ranges: { [2, 22) }, uses: { 17 22 }
+  // CHECK:         Goto (liveness: 20)
+  public static void loop1(boolean incoming) {
+    while (incoming) {}
+  }
+
+  // CHECK-START: void Main.loop2(boolean) liveness (after)
+  // CHECK:         ParameterValue (liveness: 2 ranges: { [2, 42) }, uses: { 33 38 42 }
+  // CHECK:         Goto (liveness: 36)
+  // CHECK:         Goto (liveness: 40)
+  public static void loop2(boolean incoming) {
+    while (true) {
+      System.out.println("foo");
+      while (incoming) {}
+    }
+  }
+
+  // CHECK-START: void Main.loop3(boolean) liveness (after)
+  // CHECK:         ParameterValue (liveness: 2 ranges: { [2, 60) }, uses: { 56 60 }
+  // CHECK:         Goto (liveness: 58)
+
+  // CHECK-START: void Main.loop3(boolean) liveness (after)
+  // CHECK-NOT:     Goto (liveness: 54)
+  public static void loop3(boolean incoming) {
+    // 'incoming' only needs a use at the outer loop's back edge.
+    while (System.currentTimeMillis() != 42) {
+      while (Runtime.getRuntime() != null) {}
+      System.out.println(incoming);
+    }
+  }
+
+  // CHECK-START: void Main.loop4(boolean) liveness (after)
+  // CHECK:         ParameterValue (liveness: 2 ranges: { [2, 22) }, uses: { 22 }
+
+  // CHECK-START: void Main.loop4(boolean) liveness (after)
+  // CHECK-NOT:     Goto (liveness: 20)
+  public static void loop4(boolean incoming) {
+    // 'incoming' has no loop use, so should not have back edge uses.
+    System.out.println(incoming);
+    while (System.currentTimeMillis() != 42) {
+      while (Runtime.getRuntime() != null) {}
+    }
+  }
+
+  // CHECK-START: void Main.loop5(boolean) liveness (after)
+  // CHECK:         ParameterValue (liveness: 2 ranges: { [2, 50) }, uses: { 33 42 46 50 }
+  // CHECK:         Goto (liveness: 44)
+  // CHECK:         Goto (liveness: 48)
+  public static void loop5(boolean incoming) {
+    // 'incoming' must have a use at both back edges.
+    while (Runtime.getRuntime() != null) {
+      while (incoming) {
+        System.out.println(incoming);
+      }
+    }
+  }
+
+  // CHECK-START: void Main.loop6(boolean) liveness (after)
+  // CHECK          ParameterValue (liveness: 2 ranges: { [2, 46) }, uses: { 24 46 }
+  // CHECK:         Goto (liveness: 44)
+
+  // CHECK-START: void Main.loop6(boolean) liveness (after)
+  // CHECK-NOT:     Goto (liveness: 22)
+  public static void loop6(boolean incoming) {
+    // 'incoming' must have a use only at the first loop's back edge.
+    while (true) {
+      System.out.println(incoming);
+      while (Runtime.getRuntime() != null) {}
+    }
+  }
+
+  // CHECK-START: void Main.loop7(boolean) liveness (after)
+  // CHECK:         ParameterValue (liveness: 2 ranges: { [2, 50) }, uses: { 32 41 46 50 }
+  // CHECK:         Goto (liveness: 44)
+  // CHECK:         Goto (liveness: 48)
+  public static void loop7(boolean incoming) {
+    // 'incoming' must have a use at both back edges.
+    while (Runtime.getRuntime() != null) {
+      System.out.println(incoming);
+      while (incoming) {}
+    }
+  }
+
+  // CHECK-START: void Main.loop8() liveness (after)
+  // CHECK:         StaticFieldGet (liveness: 12 ranges: { [12, 44) }, uses: { 35 40 44 }
+  // CHECK:         Goto (liveness: 38)
+  // CHECK:         Goto (liveness: 42)
+  public static void loop8() {
+    // 'incoming' must have a use at both back edges.
+    boolean incoming = field;
+    while (Runtime.getRuntime() != null) {
+      while (incoming) {}
+    }
+  }
+
+  // CHECK-START: void Main.loop9() liveness (after)
+  // CHECK:         StaticFieldGet (liveness: 22 ranges: { [22, 36) }, uses: { 31 36 }
+  // CHECK:         Goto (liveness: 38)
+  public static void loop9() {
+    while (Runtime.getRuntime() != null) {
+      // 'incoming' must only have a use in the inner loop.
+      boolean incoming = field;
+      while (incoming) {}
+    }
+  }
+
+  public static void main(String[] args) {
+  }
+
+  static boolean field;
+}
diff --git a/test/483-dce-block/expected.txt b/test/483-dce-block/expected.txt
new file mode 100644
index 0000000..ef48625
--- /dev/null
+++ b/test/483-dce-block/expected.txt
@@ -0,0 +1 @@
+class Main
diff --git a/test/483-dce-block/info.txt b/test/483-dce-block/info.txt
new file mode 100644
index 0000000..3db88ab
--- /dev/null
+++ b/test/483-dce-block/info.txt
@@ -0,0 +1,2 @@
+Regression test for optimizing that used to crash
+compiling the `foo` method.
diff --git a/test/483-dce-block/src/Main.java b/test/483-dce-block/src/Main.java
new file mode 100644
index 0000000..2f66a74
--- /dev/null
+++ b/test/483-dce-block/src/Main.java
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void foo(Object o, int a) {
+    Object result = null;
+    if (o instanceof Main) {
+      // The compiler optimizes the type of `o` by introducing
+      // a `HBoundType` in this block.
+      while (a != 3) {
+        if (a == 2) {
+          a++;
+          result = o;
+          continue;
+        } else if (willInline()) {
+          // This block will be detected as dead after inlining.
+          result = new Object();
+          continue;
+        }
+        result = new Object();
+      }
+      // The compiler produces a phi at the back edge for `result`.
+      // Before dead block elimination, the phi has three inputs:
+      // result = (new Object(), new Object(), HBoundType)
+      //
+      // After dead block elimination, the phi has now two inputs:
+      // result = (new Object(), HBoundType)
+      //
+      // Our internal data structure for linking users and inputs expect
+      // the input index stored in that data structure to be the index
+      // in the inputs array. So the index before dead block elimination
+      // of the `HBoundType` would be 2. Dead block elimination must update
+      // that index to be 1.
+    }
+    System.out.println(result.getClass());
+  }
+
+  public static boolean willInline() {
+    return false;
+  }
+
+  public static void main(String[] args) {
+    foo(new Main(), 2);
+  }
+}
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 3804fb7..c7e6877 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -385,9 +385,7 @@
 
 # Known broken tests for the optimizing compiler.
 TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS :=
-TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS += 099-vmdebug # b/18098594
 TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS += 472-unreachable-if-regression # b/19988134
-TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS += 802-deoptimization # b/18547544
 
 ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \