Add fast string sharpening

String sharpening changes const strings to PC relative loads instead
of always going through the dex cache. This saves code size and
probably improves performance slightly.

Before: 49602992 system@framework@boot.oat
After: 49385904 system@framework@boot.oat

Pre-cursor to removing dex_cache_strings_ field from ArtMethod.

Bug: 17643507

Change-Id: I1787f48774631eee0accafeea257aa8d0e91e8d6
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index ee1c467..458f690 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -509,6 +509,20 @@
     PushPointer(code_buffer_, &target_method_id, cu_->target64);
     data_lir = NEXT_LIR(data_lir);
   }
+  // Push the string literals.
+  data_lir = string_literal_list_;
+  while (data_lir != nullptr) {
+    uint32_t string_idx = data_lir->operands[0];
+    cu_->compiler_driver->AddStringPatch(cu_->dex_file,
+                                         cu_->class_def_idx,
+                                         cu_->method_idx,
+                                         string_idx,
+                                         code_buffer_.size());
+    const auto& target_string_id = cu_->dex_file->GetStringId(string_idx);
+    // unique value based on target to ensure code deduplication works
+    PushPointer(code_buffer_, &target_string_id, cu_->target64);
+    data_lir = NEXT_LIR(data_lir);
+  }
 }
 
 /* Write the switch tables to the output stream */
@@ -768,6 +782,7 @@
   offset = AssignLiteralPointerOffsetCommon(code_literal_list_, offset, ptr_size);
   offset = AssignLiteralPointerOffsetCommon(method_literal_list_, offset, ptr_size);
   offset = AssignLiteralPointerOffsetCommon(class_literal_list_, offset, ptr_size);
+  offset = AssignLiteralPointerOffsetCommon(string_literal_list_, offset, ptr_size);
   return offset;
 }
 
@@ -973,6 +988,7 @@
       literal_list_(NULL),
       method_literal_list_(NULL),
       class_literal_list_(NULL),
+      string_literal_list_(NULL),
       code_literal_list_(NULL),
       first_fixup_(NULL),
       cu_(cu),
@@ -1240,6 +1256,17 @@
   AppendLIR(load_pc_rel);
 }
 
+void Mir2Lir::LoadString(uint32_t string_idx, RegStorage target_reg) {
+  // Use the literal pool and a PC-relative load from a data word.
+  LIR* data_target = ScanLiteralPool(string_literal_list_, string_idx, 0);
+  if (data_target == nullptr) {
+    data_target = AddWordData(&string_literal_list_, string_idx);
+  }
+  // Loads a Class pointer, which is a reference as it lives in the heap.
+  LIR* load_pc_rel = OpPcRelLoad(target_reg, data_target);
+  AppendLIR(load_pc_rel);
+}
+
 std::vector<uint8_t>* Mir2Lir::ReturnCallFrameInformation() {
   // Default case is to do nothing.
   return nullptr;
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 7381f19..d5c9327 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -881,8 +881,8 @@
 
 void Mir2Lir::GenConstString(uint32_t string_idx, RegLocation rl_dest) {
   /* NOTE: Most strings should be available at compile time */
-  int32_t offset_of_string = mirror::ObjectArray<mirror::String>::OffsetOfElement(string_idx).
-                                                                                      Int32Value();
+  const int32_t offset_of_string =
+      mirror::ObjectArray<mirror::String>::OffsetOfElement(string_idx).Int32Value();
   if (!cu_->compiler_driver->CanAssumeStringIsPresentInDexCache(
       *cu_->dex_file, string_idx) || SLOW_STRING_PATH) {
     // slow path, resolve string if not in dex cache
@@ -934,13 +934,32 @@
     GenBarrier();
     StoreValue(rl_dest, GetReturn(kRefReg));
   } else {
-    RegLocation rl_method = LoadCurrMethod();
-    RegStorage res_reg = AllocTempRef();
-    RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
-    LoadRefDisp(rl_method.reg, mirror::ArtMethod::DexCacheStringsOffset().Int32Value(), res_reg,
-                kNotVolatile);
-    LoadRefDisp(res_reg, offset_of_string, rl_result.reg, kNotVolatile);
-    StoreValue(rl_dest, rl_result);
+    // Try to see if we can embed a direct pointer.
+    bool use_direct_ptr = false;
+    size_t direct_ptr = 0;
+    bool embed_string = false;
+    // TODO: Implement for X86.
+    if (cu_->instruction_set != kX86 && cu_->instruction_set != kX86_64) {
+      embed_string = cu_->compiler_driver->CanEmbedStringInCode(*cu_->dex_file, string_idx,
+                                                                &use_direct_ptr, &direct_ptr);
+    }
+    if (embed_string) {
+      RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
+      if (!use_direct_ptr) {
+        LoadString(string_idx, rl_result.reg);
+      } else {
+        LoadConstant(rl_result.reg, static_cast<int32_t>(direct_ptr));
+      }
+      StoreValue(rl_dest, rl_result);
+    } else {
+      RegLocation rl_method = LoadCurrMethod();
+      RegStorage res_reg = AllocTempRef();
+      RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
+      LoadRefDisp(rl_method.reg, mirror::ArtMethod::DexCacheStringsOffset().Int32Value(), res_reg,
+                  kNotVolatile);
+      LoadRefDisp(res_reg, offset_of_string, rl_result.reg, kNotVolatile);
+      StoreValue(rl_dest, rl_result);
+    }
   }
 }
 
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 3dc111f..bfd7860 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -1119,6 +1119,9 @@
      */
     virtual void LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg);
 
+    // Load a string
+    virtual void LoadString(uint32_t string_idx, RegStorage target_reg);
+
     // Routines that work for the generic case, but may be overriden by target.
     /*
      * @brief Compare memory to immediate, and branch if condition true.
@@ -1677,6 +1680,7 @@
     LIR* literal_list_;                        // Constants.
     LIR* method_literal_list_;                 // Method literals requiring patching.
     LIR* class_literal_list_;                  // Class literals requiring patching.
+    LIR* string_literal_list_;                 // String literals requiring patching.
     LIR* code_literal_list_;                   // Code literals requiring patching.
     LIR* first_fixup_;                         // Doubly-linked list of LIR nodes requiring fixups.
 
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 62d70e9..9be7f8d 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -424,22 +424,11 @@
   {
     MutexLock mu(self, compiled_classes_lock_);
     STLDeleteValues(&compiled_classes_);
-  }
-  {
-    MutexLock mu(self, compiled_methods_lock_);
     STLDeleteValues(&compiled_methods_);
-  }
-  {
-    MutexLock mu(self, compiled_methods_lock_);
     STLDeleteElements(&code_to_patch_);
-  }
-  {
-    MutexLock mu(self, compiled_methods_lock_);
     STLDeleteElements(&methods_to_patch_);
-  }
-  {
-    MutexLock mu(self, compiled_methods_lock_);
     STLDeleteElements(&classes_to_patch_);
+    STLDeleteElements(&strings_to_patch_);
   }
   CHECK_PTHREAD_CALL(pthread_key_delete, (tls_key_), "delete tls key");
   compiler_->UnInit();
@@ -941,16 +930,16 @@
 bool CompilerDriver::CanEmbedTypeInCode(const DexFile& dex_file, uint32_t type_idx,
                                         bool* is_type_initialized, bool* use_direct_type_ptr,
                                         uintptr_t* direct_type_ptr, bool* out_is_finalizable) {
+  if (GetCompilerOptions().GetCompilePic()) {
+    // Do not allow a direct class pointer to be used when compiling for position-independent
+    return false;
+  }
   ScopedObjectAccess soa(Thread::Current());
   mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(dex_file);
   mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx);
   if (resolved_class == nullptr) {
     return false;
   }
-  if (GetCompilerOptions().GetCompilePic()) {
-    // Do not allow a direct class pointer to be used when compiling for position-independent
-    return false;
-  }
   *out_is_finalizable = resolved_class->IsFinalizable();
   const bool compiling_boot = Runtime::Current()->GetHeap()->IsCompilingBoot();
   const bool support_boot_image_fixup = GetSupportBootImageFixup();
@@ -989,6 +978,51 @@
   }
 }
 
+bool CompilerDriver::CanEmbedStringInCode(const DexFile& dex_file, uint32_t string_idx,
+                                          bool* use_direct_type_ptr, uintptr_t* direct_type_ptr) {
+  if (GetCompilerOptions().GetCompilePic()) {
+    // Do not allow a direct class pointer to be used when compiling for position-independent
+    return false;
+  }
+  ScopedObjectAccess soa(Thread::Current());
+  mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(dex_file);
+  mirror::String* resolved_string = dex_cache->GetResolvedString(string_idx);
+  if (resolved_string == nullptr) {
+    return false;
+  }
+  const bool compiling_boot = Runtime::Current()->GetHeap()->IsCompilingBoot();
+  const bool support_boot_image_fixup = GetSupportBootImageFixup();
+  if (compiling_boot) {
+    // boot -> boot class pointers.
+    // True if the class is in the image at boot compiling time.
+    const bool is_image_string = IsImage();
+    // True if pc relative load works.
+    if (is_image_string && support_boot_image_fixup) {
+      *use_direct_type_ptr = false;
+      *direct_type_ptr = 0;
+      return true;
+    }
+    return false;
+  } else {
+    // True if the class is in the image at app compiling time.
+    const bool obj_in_image =
+        false && Runtime::Current()->GetHeap()->FindSpaceFromObject(resolved_string, false)->IsImageSpace();
+    if (obj_in_image && support_boot_image_fixup) {
+      // boot -> app class pointers.
+      // TODO This is somewhat hacky. We should refactor all of this invoke codepath.
+      *use_direct_type_ptr = !GetCompilerOptions().GetIncludePatchInformation();
+      *direct_type_ptr = reinterpret_cast<uintptr_t>(resolved_string);
+      return true;
+    }
+
+    // app -> app class pointers.
+    // Give up because app does not have an image and class
+    // isn't created at compile time.  TODO: implement this
+    // if/when each app gets an image.
+    return false;
+  }
+}
+
 void CompilerDriver::ProcessedInstanceField(bool resolved) {
   if (!resolved) {
     stats_->UnresolvedInstanceField();
@@ -1372,6 +1406,18 @@
                                                        target_type_idx,
                                                        literal_offset));
 }
+void CompilerDriver::AddStringPatch(const DexFile* dex_file,
+                                    uint16_t referrer_class_def_idx,
+                                    uint32_t referrer_method_idx,
+                                    uint32_t string_idx,
+                                    size_t literal_offset) {
+  MutexLock mu(Thread::Current(), compiled_methods_lock_);
+  strings_to_patch_.push_back(new StringPatchInformation(dex_file,
+                                                         referrer_class_def_idx,
+                                                         referrer_method_idx,
+                                                         string_idx,
+                                                         literal_offset));
+}
 
 class ParallelCompilationManager {
  public:
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index c487e42..ba98bb5 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -212,6 +212,9 @@
                           bool* is_type_initialized, bool* use_direct_type_ptr,
                           uintptr_t* direct_type_ptr, bool* out_is_finalizable);
 
+  bool CanEmbedStringInCode(const DexFile& dex_file, uint32_t string_idx,
+                            bool* use_direct_type_ptr, uintptr_t* direct_type_ptr);
+
   // Get the DexCache for the
   mirror::DexCache* GetDexCache(const DexCompilationUnit* mUnit)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -357,6 +360,12 @@
                      uint32_t target_method_idx,
                      size_t literal_offset)
       LOCKS_EXCLUDED(compiled_methods_lock_);
+  void AddStringPatch(const DexFile* dex_file,
+                      uint16_t referrer_class_def_idx,
+                      uint32_t referrer_method_idx,
+                      uint32_t string_idx,
+                      size_t literal_offset)
+      LOCKS_EXCLUDED(compiled_methods_lock_);
 
   bool GetSupportBootImageFixup() const {
     return support_boot_image_fixup_;
@@ -576,6 +585,35 @@
     DISALLOW_COPY_AND_ASSIGN(TypePatchInformation);
   };
 
+  class StringPatchInformation : public PatchInformation {
+   public:
+    uint32_t GetStringIdx() const {
+      return string_idx_;
+    }
+
+    bool IsType() const {
+      return false;
+    }
+    const TypePatchInformation* AsType() const {
+      return nullptr;
+    }
+
+   private:
+    StringPatchInformation(const DexFile* dex_file,
+                           uint16_t referrer_class_def_idx,
+                           uint32_t referrer_method_idx,
+                           uint32_t string_idx,
+                           size_t literal_offset)
+        : PatchInformation(dex_file, referrer_class_def_idx, referrer_method_idx, literal_offset),
+          string_idx_(string_idx) {
+    }
+
+    const uint32_t string_idx_;
+
+    friend class CompilerDriver;
+    DISALLOW_COPY_AND_ASSIGN(StringPatchInformation);
+  };
+
   const std::vector<const CallPatchInformation*>& GetCodeToPatch() const {
     return code_to_patch_;
   }
@@ -585,6 +623,9 @@
   const std::vector<const TypePatchInformation*>& GetClassesToPatch() const {
     return classes_to_patch_;
   }
+  const std::vector<const StringPatchInformation*>& GetStringsToPatch() const {
+    return strings_to_patch_;
+  }
 
   // Checks if class specified by type_idx is one of the image_classes_
   bool IsImageClass(const char* descriptor) const;
@@ -710,6 +751,7 @@
   std::vector<const CallPatchInformation*> code_to_patch_;
   std::vector<const CallPatchInformation*> methods_to_patch_;
   std::vector<const TypePatchInformation*> classes_to_patch_;
+  std::vector<const StringPatchInformation*> strings_to_patch_;
 
   const CompilerOptions* const compiler_options_;
   VerificationResults* const verification_results_;
diff --git a/compiler/elf_patcher.cc b/compiler/elf_patcher.cc
index e8ccd67..1577166 100644
--- a/compiler/elf_patcher.cc
+++ b/compiler/elf_patcher.cc
@@ -96,6 +96,16 @@
   return method;
 }
 
+mirror::String* ElfPatcher::GetTargetString(const CompilerDriver::StringPatchInformation* patch) {
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  StackHandleScope<1> hs(Thread::Current());
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(patch->GetDexFile())));
+  mirror::String* string = class_linker->ResolveString(patch->GetDexFile(), patch->GetStringIdx(),
+                                                       dex_cache);
+  CHECK(string != nullptr) << patch->GetDexFile().GetLocation() << " " << patch->GetStringIdx();
+  return string;
+}
+
 mirror::Class* ElfPatcher::GetTargetType(const CompilerDriver::TypePatchInformation* patch) {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   StackHandleScope<2> hs(Thread::Current());
@@ -183,7 +193,8 @@
   if (write_patches_) {
     patches_.reserve(compiler_driver_->GetCodeToPatch().size() +
                      compiler_driver_->GetMethodsToPatch().size() +
-                     compiler_driver_->GetClassesToPatch().size());
+                     compiler_driver_->GetClassesToPatch().size() +
+                     compiler_driver_->GetStringsToPatch().size());
   }
   Thread* self = Thread::Current();
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
@@ -248,13 +259,15 @@
     SetPatchLocation(patch, PointerToLowMemUInt32(get_image_address_(cb_data_, target)));
   }
 
-  const std::vector<const CompilerDriver::TypePatchInformation*>& classes_to_patch =
-      compiler_driver_->GetClassesToPatch();
-  for (size_t i = 0; i < classes_to_patch.size(); i++) {
-    const CompilerDriver::TypePatchInformation* patch = classes_to_patch[i];
+  for (const CompilerDriver::TypePatchInformation* patch : compiler_driver_->GetClassesToPatch()) {
     mirror::Class* target = GetTargetType(patch);
     SetPatchLocation(patch, PointerToLowMemUInt32(get_image_address_(cb_data_, target)));
   }
+  for (const CompilerDriver::StringPatchInformation* patch :
+      compiler_driver_->GetStringsToPatch()) {
+    mirror::String* target = GetTargetString(patch);
+    SetPatchLocation(patch, PointerToLowMemUInt32(get_image_address_(cb_data_, target)));
+  }
 
   self->EndAssertNoThreadSuspension(old_cause);
 
diff --git a/compiler/elf_patcher.h b/compiler/elf_patcher.h
index 0a9f0a01..8bd5c45 100644
--- a/compiler/elf_patcher.h
+++ b/compiler/elf_patcher.h
@@ -83,6 +83,8 @@
 
   mirror::Class* GetTargetType(const CompilerDriver::TypePatchInformation* patch)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  mirror::String* GetTargetString(const CompilerDriver::StringPatchInformation* patch)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void AddPatch(uintptr_t off);
 
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index 703e63f..f0eaec2 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -891,10 +891,11 @@
 // Add patch information to this section. Each patch is a Elf32_Word that
 // identifies an offset from the start of the text section
 void ElfWriterQuick::ReservePatchSpace(std::vector<uint8_t>* buffer, bool debug) {
-  size_t size =
+  const size_t size =
       compiler_driver_->GetCodeToPatch().size() +
       compiler_driver_->GetMethodsToPatch().size() +
-      compiler_driver_->GetClassesToPatch().size();
+      compiler_driver_->GetClassesToPatch().size() +
+      compiler_driver_->GetStringsToPatch().size();
   if (size == 0) {
     if (debug) {
       LOG(INFO) << "No patches to record";