Merge "Add some more instruction support to optimizing compiler."
diff --git a/build/Android.common.mk b/build/Android.common.mk
index 09f34b3..f916e1e 100644
--- a/build/Android.common.mk
+++ b/build/Android.common.mk
@@ -134,7 +134,7 @@
 # Clang on the target: only enabled for ARM64. Target builds use GCC by default.
 ART_TARGET_CLANG :=
 ART_TARGET_CLANG_arm :=
-ART_TARGET_CLANG_arm64 := true
+ART_TARGET_CLANG_arm64 :=
 ART_TARGET_CLANG_mips :=
 ART_TARGET_CLANG_x86 :=
 ART_TARGET_CLANG_x86_64 :=
diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc
index 7441dac..f098a34 100644
--- a/compiler/compiled_method.cc
+++ b/compiler/compiled_method.cc
@@ -86,7 +86,11 @@
 }
 
 size_t CompiledCode::CodeDelta() const {
-  switch (instruction_set_) {
+  return CodeDelta(instruction_set_);
+}
+
+size_t CompiledCode::CodeDelta(InstructionSet instruction_set) {
+  switch (instruction_set) {
     case kArm:
     case kArm64:
     case kMips:
@@ -98,7 +102,7 @@
       return 1;
     }
     default:
-      LOG(FATAL) << "Unknown InstructionSet: " << instruction_set_;
+      LOG(FATAL) << "Unknown InstructionSet: " << instruction_set;
       return 0;
   }
 }
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index 23cd250..b8cd851 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -67,6 +67,7 @@
   // returns the difference between the code address and a usable PC.
   // mainly to cope with kThumb2 where the lower bit must be set.
   size_t CodeDelta() const;
+  static size_t CodeDelta(InstructionSet instruction_set);
 
   // Returns a pointer suitable for invoking the code at the argument
   // code_pointer address.  Mainly to cope with kThumb2 where the
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index b0216b5..0845656 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -783,10 +783,11 @@
                                      uint16_t class_def_idx, uint32_t method_idx,
                                      jobject class_loader, const DexFile& dex_file,
                                      void* llvm_compilation_unit) {
-  VLOG(compiler) << "Compiling " << PrettyMethod(method_idx, dex_file) << "...";
+  std::string method_name = PrettyMethod(method_idx, dex_file);
+  VLOG(compiler) << "Compiling " << method_name << "...";
   if (code_item->insns_size_in_code_units_ >= 0x10000) {
     LOG(INFO) << "Method size exceeds compiler limits: " << code_item->insns_size_in_code_units_
-              << " in " << PrettyMethod(method_idx, dex_file);
+              << " in " << method_name;
     return NULL;
   }
 
@@ -818,8 +819,7 @@
   cu.compiler_flip_match = false;
   bool use_match = !cu.compiler_method_match.empty();
   bool match = use_match && (cu.compiler_flip_match ^
-      (PrettyMethod(method_idx, dex_file).find(cu.compiler_method_match) !=
-       std::string::npos));
+      (method_name.find(cu.compiler_method_match) != std::string::npos));
   if (!use_match || match) {
     cu.disable_opt = kCompilerOptimizerDisableFlags;
     cu.enable_debug = kCompilerDebugFlags;
@@ -830,7 +830,7 @@
   if (gVerboseMethods.size() != 0) {
     cu.verbose = false;
     for (size_t i = 0; i < gVerboseMethods.size(); ++i) {
-      if (PrettyMethod(method_idx, dex_file).find(gVerboseMethods[i])
+      if (method_name.find(gVerboseMethods[i])
           != std::string::npos) {
         cu.verbose = true;
         break;
@@ -887,22 +887,13 @@
     cu.mir_graph->EnableOpcodeCounting();
   }
 
-  // Check early if we should skip this compilation if the profiler is enabled.
-  if (cu.compiler_driver->ProfilePresent()) {
-    std::string methodname = PrettyMethod(method_idx, dex_file);
-    if (cu.mir_graph->SkipCompilationByName(methodname)) {
-      return nullptr;
-    }
-  }
-
   /* Build the raw MIR graph */
   cu.mir_graph->InlineMethod(code_item, access_flags, invoke_type, class_def_idx, method_idx,
                               class_loader, dex_file);
 
   // TODO(Arm64): Remove this when we are able to compile everything.
   if (!CanCompileMethod(method_idx, dex_file, cu)) {
-    VLOG(compiler)  << cu.instruction_set << ": Cannot compile method : "
-                    << PrettyMethod(method_idx, dex_file);
+    VLOG(compiler)  << cu.instruction_set << ": Cannot compile method : " << method_name;
     return nullptr;
   }
 
@@ -910,7 +901,7 @@
   std::string skip_message;
   if (cu.mir_graph->SkipCompilation(&skip_message)) {
     VLOG(compiler) << cu.instruction_set << ": Skipping method : "
-                   << PrettyMethod(method_idx, dex_file) << "  Reason = " << skip_message;
+                   << method_name << "  Reason = " << skip_message;
     return nullptr;
   }
 
@@ -918,6 +909,13 @@
   PassDriverMEOpts pass_driver(&cu);
   pass_driver.Launch();
 
+  /* For non-leaf methods check if we should skip compilation when the profiler is enabled. */
+  if (cu.compiler_driver->ProfilePresent()
+      && !cu.mir_graph->MethodIsLeaf()
+      && cu.mir_graph->SkipCompilationByName(method_name)) {
+    return nullptr;
+  }
+
   if (cu.enable_debug & (1 << kDebugDumpCheckStats)) {
     cu.mir_graph->DumpCheckStats();
   }
@@ -933,7 +931,7 @@
   if (cu.enable_debug & (1 << kDebugShowMemoryUsage)) {
     if (cu.arena_stack.PeakBytesAllocated() > 256 * 1024) {
       MemStats stack_stats(cu.arena_stack.GetPeakStats());
-      LOG(INFO) << PrettyMethod(method_idx, dex_file) << " " << Dumpable<MemStats>(stack_stats);
+      LOG(INFO) << method_name << " " << Dumpable<MemStats>(stack_stats);
     }
   }
   cu.arena_stack.Reset();
@@ -941,8 +939,7 @@
   CompiledMethod* result = NULL;
 
   if (cu.mir_graph->PuntToInterpreter()) {
-    VLOG(compiler) << cu.instruction_set << ": Punted method to interpreter: "
-                   << PrettyMethod(method_idx, dex_file);
+    VLOG(compiler) << cu.instruction_set << ": Punted method to interpreter: " << method_name;
     return nullptr;
   }
 
@@ -953,21 +950,21 @@
   cu.NewTimingSplit("Cleanup");
 
   if (result) {
-    VLOG(compiler) << cu.instruction_set << ": Compiled " << PrettyMethod(method_idx, dex_file);
+    VLOG(compiler) << cu.instruction_set << ": Compiled " << method_name;
   } else {
-    VLOG(compiler) << cu.instruction_set << ": Deferred " << PrettyMethod(method_idx, dex_file);
+    VLOG(compiler) << cu.instruction_set << ": Deferred " << method_name;
   }
 
   if (cu.enable_debug & (1 << kDebugShowMemoryUsage)) {
     if (cu.arena.BytesAllocated() > (1 * 1024 *1024)) {
       MemStats mem_stats(cu.arena.GetMemStats());
-      LOG(INFO) << PrettyMethod(method_idx, dex_file) << " " << Dumpable<MemStats>(mem_stats);
+      LOG(INFO) << method_name << " " << Dumpable<MemStats>(mem_stats);
     }
   }
 
   if (cu.enable_debug & (1 << kDebugShowSummaryMemoryUsage)) {
     LOG(INFO) << "MEMINFO " << cu.arena.BytesAllocated() << " " << cu.mir_graph->GetNumBlocks()
-              << " " << PrettyMethod(method_idx, dex_file);
+              << " " << method_name;
   }
 
   cu.EndTiming();
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 3e326f0..4a331fc 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -2054,7 +2054,9 @@
   ProfileFile::ProfileData data;
   if (!profile_file_.GetProfileData(&data, method_name)) {
     // Not in profile, no information can be determined.
-    VLOG(compiler) << "not compiling " << method_name << " because it's not in the profile";
+    if (kIsDebugBuild) {
+      VLOG(compiler) << "not compiling " << method_name << " because it's not in the profile";
+    }
     return true;
   }
 
@@ -2063,13 +2065,16 @@
   // falls inside a bucket.
   bool compile = data.GetTopKUsedPercentage() - data.GetUsedPercent()
                  <= compiler_options_->GetTopKProfileThreshold();
-  if (compile) {
-    LOG(INFO) << "compiling method " << method_name << " because its usage is part of top "
-        << data.GetTopKUsedPercentage() << "% with a percent of " << data.GetUsedPercent() << "%"
-        << " (topKThreshold=" << compiler_options_->GetTopKProfileThreshold() << ")";
-  } else {
-    VLOG(compiler) << "not compiling method " << method_name << " because it's not part of leading "
-        << compiler_options_->GetTopKProfileThreshold() << "% samples)";
+  if (kIsDebugBuild) {
+    if (compile) {
+      LOG(INFO) << "compiling method " << method_name << " because its usage is part of top "
+          << data.GetTopKUsedPercentage() << "% with a percent of " << data.GetUsedPercent() << "%"
+          << " (topKThreshold=" << compiler_options_->GetTopKProfileThreshold() << ")";
+    } else {
+      VLOG(compiler) << "not compiling method " << method_name
+          << " because it's not part of leading " << compiler_options_->GetTopKProfileThreshold()
+          << "% samples)";
+    }
   }
   return !compile;
 }
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index c6b9161..4590880 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -800,6 +800,7 @@
 size_t OatWriter::InitOatCode(size_t offset) {
   // calculate the offsets within OatHeader to executable code
   size_t old_offset = offset;
+  size_t adjusted_offset = offset;
   // required to be on a new page boundary
   offset = RoundUp(offset, kPageSize);
   oat_header_->SetExecutableOffset(offset);
@@ -809,7 +810,8 @@
 
     #define DO_TRAMPOLINE(field, fn_name) \
       offset = CompiledCode::AlignCode(offset, instruction_set); \
-      oat_header_->Set ## fn_name ## Offset(offset); \
+      adjusted_offset = offset + CompiledCode::CodeDelta(instruction_set); \
+      oat_header_->Set ## fn_name ## Offset(adjusted_offset); \
       field.reset(compiler_driver_->Create ## fn_name()); \
       offset += field->size();
 
diff --git a/compiler/trampolines/trampoline_compiler.cc b/compiler/trampolines/trampoline_compiler.cc
index ac84d6a..d5225c1 100644
--- a/compiler/trampolines/trampoline_compiler.cc
+++ b/compiler/trampolines/trampoline_compiler.cc
@@ -30,11 +30,7 @@
 namespace arm {
 static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi,
                                                     ThreadOffset<4> offset) {
-  // NOTE: the assembler used here is ARM, not Thumb.  This is because the address
-  // returned by this function is a pointer and for thumb we would have to set the
-  // bottom bit.  It doesn't matter since the instructions generated are the same
-  // size anyway.
-  std::unique_ptr<ArmAssembler> assembler(static_cast<ArmAssembler*>(Assembler::Create(kArm)));
+  std::unique_ptr<ArmAssembler> assembler(static_cast<ArmAssembler*>(Assembler::Create(kThumb2)));
 
   switch (abi) {
     case kInterpreterAbi:  // Thread* is first argument (R0) in interpreter ABI.