Merge "ART: Fix UTF test and monitor pool old chunks"
diff --git a/compiler/debug/elf_debug_info_writer.h b/compiler/debug/elf_debug_info_writer.h
index eed032f..bddb054 100644
--- a/compiler/debug/elf_debug_info_writer.h
+++ b/compiler/debug/elf_debug_info_writer.h
@@ -173,6 +173,19 @@
       info_.WriteExprLoc(DW_AT_frame_base, expr);
       WriteLazyType(dex->GetReturnTypeDescriptor(dex_proto));
 
+      // Decode dex register locations for all stack maps.
+      // It might be expensive, so do it just once and reuse the result.
+      std::vector<DexRegisterMap> dex_reg_maps;
+      if (mi->IsFromOptimizingCompiler()) {
+        const CodeInfo code_info(mi->compiled_method->GetVmapTable().data());
+        StackMapEncoding encoding = code_info.ExtractEncoding();
+        for (size_t s = 0; s < code_info.GetNumberOfStackMaps(); ++s) {
+          const StackMap& stack_map = code_info.GetStackMapAt(s, encoding);
+          dex_reg_maps.push_back(code_info.GetDexRegisterMapOf(
+              stack_map, encoding, dex_code->registers_size_));
+        }
+      }
+
       // Write parameters. DecodeDebugLocalInfo returns them as well, but it does not
       // guarantee order or uniqueness so it is safer to iterate over them manually.
       // DecodeDebugLocalInfo might not also be available if there is no debug info.
@@ -187,7 +200,7 @@
           // Write the stack location of the parameter.
           const uint32_t vreg = dex_code->registers_size_ - dex_code->ins_size_ + arg_reg;
           const bool is64bitValue = false;
-          WriteRegLocation(mi, vreg, is64bitValue, compilation_unit.low_pc);
+          WriteRegLocation(mi, dex_reg_maps, vreg, is64bitValue, compilation_unit.low_pc);
         }
         arg_reg++;
         info_.EndTag();
@@ -206,7 +219,7 @@
           if (dex_code != nullptr) {
             // Write the stack location of the parameter.
             const uint32_t vreg = dex_code->registers_size_ - dex_code->ins_size_ + arg_reg;
-            WriteRegLocation(mi, vreg, is64bitValue, compilation_unit.low_pc);
+            WriteRegLocation(mi, dex_reg_maps, vreg, is64bitValue, compilation_unit.low_pc);
           }
           arg_reg += is64bitValue ? 2 : 1;
           info_.EndTag();
@@ -229,8 +242,13 @@
             WriteName(var.name_);
             WriteLazyType(var.descriptor_);
             bool is64bitValue = var.descriptor_[0] == 'D' || var.descriptor_[0] == 'J';
-            WriteRegLocation(mi, var.reg_, is64bitValue, compilation_unit.low_pc,
-                             var.start_address_, var.end_address_);
+            WriteRegLocation(mi,
+                             dex_reg_maps,
+                             var.reg_,
+                             is64bitValue,
+                             compilation_unit.low_pc,
+                             var.start_address_,
+                             var.end_address_);
             info_.EndTag();
           }
         }
@@ -424,12 +442,14 @@
   // The dex register might be valid only at some points and it might
   // move between machine registers and stack.
   void WriteRegLocation(const MethodDebugInfo* method_info,
+                        const std::vector<DexRegisterMap>& dex_register_maps,
                         uint16_t vreg,
                         bool is64bitValue,
                         uint32_t compilation_unit_low_pc,
                         uint32_t dex_pc_low = 0,
                         uint32_t dex_pc_high = 0xFFFFFFFF) {
     WriteDebugLocEntry(method_info,
+                       dex_register_maps,
                        vreg,
                        is64bitValue,
                        compilation_unit_low_pc,
diff --git a/compiler/debug/elf_debug_loc_writer.h b/compiler/debug/elf_debug_loc_writer.h
index 32f624a..c321b4b 100644
--- a/compiler/debug/elf_debug_loc_writer.h
+++ b/compiler/debug/elf_debug_loc_writer.h
@@ -85,29 +85,32 @@
 // The result will cover all ranges where the variable is in scope.
 // PCs corresponding to stackmap with dex register map are accurate,
 // all other PCs are best-effort only.
-std::vector<VariableLocation> GetVariableLocations(const MethodDebugInfo* method_info,
-                                                   uint16_t vreg,
-                                                   bool is64bitValue,
-                                                   uint32_t dex_pc_low,
-                                                   uint32_t dex_pc_high) {
+std::vector<VariableLocation> GetVariableLocations(
+    const MethodDebugInfo* method_info,
+    const std::vector<DexRegisterMap>& dex_register_maps,
+    uint16_t vreg,
+    bool is64bitValue,
+    uint32_t dex_pc_low,
+    uint32_t dex_pc_high) {
   std::vector<VariableLocation> variable_locations;
 
   // Get stack maps sorted by pc (they might not be sorted internally).
   const CodeInfo code_info(method_info->compiled_method->GetVmapTable().data());
   const StackMapEncoding encoding = code_info.ExtractEncoding();
-  std::map<uint32_t, StackMap> stack_maps;
+  std::map<uint32_t, uint32_t> stack_maps;  // low_pc -> stack_map_index.
   for (uint32_t s = 0; s < code_info.GetNumberOfStackMaps(); s++) {
     StackMap stack_map = code_info.GetStackMapAt(s, encoding);
     DCHECK(stack_map.IsValid());
     const uint32_t low_pc = method_info->low_pc + stack_map.GetNativePcOffset(encoding);
     DCHECK_LE(low_pc, method_info->high_pc);
-    stack_maps.emplace(low_pc, stack_map);
+    stack_maps.emplace(low_pc, s);
   }
 
   // Create entries for the requested register based on stack map data.
   for (auto it = stack_maps.begin(); it != stack_maps.end(); it++) {
-    const StackMap& stack_map = it->second;
     const uint32_t low_pc = it->first;
+    const uint32_t stack_map_index = it->second;
+    const StackMap& stack_map = code_info.GetStackMapAt(stack_map_index, encoding);
     auto next_it = it;
     next_it++;
     const uint32_t high_pc = next_it != stack_maps.end() ? next_it->first
@@ -126,9 +129,9 @@
     // Find the location of the dex register.
     DexRegisterLocation reg_lo = DexRegisterLocation::None();
     DexRegisterLocation reg_hi = DexRegisterLocation::None();
-    if (stack_map.HasDexRegisterMap(encoding)) {
-      DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(
-          stack_map, encoding, method_info->code_item->registers_size_);
+    DCHECK_LT(stack_map_index, dex_register_maps.size());
+    DexRegisterMap dex_register_map = dex_register_maps[stack_map_index];
+    if (dex_register_map.IsValid()) {
       reg_lo = dex_register_map.GetDexRegisterLocation(
           vreg, method_info->code_item->registers_size_, code_info, encoding);
       if (is64bitValue) {
@@ -159,6 +162,7 @@
 // The dex register might be valid only at some points and it might
 // move between machine registers and stack.
 static void WriteDebugLocEntry(const MethodDebugInfo* method_info,
+                               const std::vector<DexRegisterMap>& dex_register_maps,
                                uint16_t vreg,
                                bool is64bitValue,
                                uint32_t compilation_unit_low_pc,
@@ -175,6 +179,7 @@
 
   std::vector<VariableLocation> variable_locations = GetVariableLocations(
       method_info,
+      dex_register_maps,
       vreg,
       is64bitValue,
       dex_pc_low,
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index 79a6d38..6ff1e2e 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -85,7 +85,7 @@
   exit(EXIT_FAILURE);
 }
 
-JitCompiler::JitCompiler() : total_time_(0) {
+JitCompiler::JitCompiler() {
   compiler_options_.reset(new CompilerOptions(
       CompilerOptions::kDefaultCompilerFilter,
       CompilerOptions::kDefaultHugeMethodThreshold,
@@ -195,7 +195,6 @@
 bool JitCompiler::CompileMethod(Thread* self, ArtMethod* method, bool osr) {
   DCHECK(!method->IsProxyMethod());
   TimingLogger logger("JIT compiler timing logger", true, VLOG_IS_ON(jit));
-  const uint64_t start_time = NanoTime();
   StackHandleScope<2> hs(self);
   self->AssertNoPendingException();
   Runtime* runtime = Runtime::Current();
@@ -236,7 +235,6 @@
     runtime->GetJitArenaPool()->TrimMaps();
   }
 
-  total_time_ += NanoTime() - start_time;
   runtime->GetJit()->AddTimingLogger(logger);
   return success;
 }
diff --git a/compiler/jit/jit_compiler.h b/compiler/jit/jit_compiler.h
index 5294d0e..bc134fe 100644
--- a/compiler/jit/jit_compiler.h
+++ b/compiler/jit/jit_compiler.h
@@ -18,13 +18,10 @@
 #define ART_COMPILER_JIT_JIT_COMPILER_H_
 
 #include "base/mutex.h"
-#include "compiler_callbacks.h"
 #include "compiled_method.h"
-#include "dex/verification_results.h"
 #include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
-#include "oat_file.h"
 
 namespace art {
 
@@ -37,23 +34,19 @@
  public:
   static JitCompiler* Create();
   virtual ~JitCompiler();
+
+  // Compilation entrypoint. Returns whether the compilation succeeded.
   bool CompileMethod(Thread* self, ArtMethod* method, bool osr)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  CompilerCallbacks* GetCompilerCallbacks() const;
-  size_t GetTotalCompileTime() const {
-    return total_time_;
-  }
+
   CompilerOptions* GetCompilerOptions() const {
     return compiler_options_.get();
   }
 
  private:
-  uint64_t total_time_;
   std::unique_ptr<CompilerOptions> compiler_options_;
   std::unique_ptr<CumulativeLogger> cumulative_logger_;
-  std::unique_ptr<VerificationResults> verification_results_;
   std::unique_ptr<DexFileToMethodInlinerMap> method_inliner_map_;
-  std::unique_ptr<CompilerCallbacks> callbacks_;
   std::unique_ptr<CompilerDriver> compiler_driver_;
   std::unique_ptr<const InstructionSetFeatures> instruction_set_features_;
   std::unique_ptr<File> perf_file_;
@@ -62,8 +55,7 @@
 
   // This is in the compiler since the runtime doesn't have access to the compiled method
   // structures.
-  bool AddToCodeCache(ArtMethod* method,
-                      const CompiledMethod* compiled_method)
+  bool AddToCodeCache(ArtMethod* method, const CompiledMethod* compiled_method)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   DISALLOW_COPY_AND_ASSIGN(JitCompiler);
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 7a4a6ef..934b427 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -46,6 +46,7 @@
 using helpers::SRegisterFrom;
 using helpers::WRegisterFrom;
 using helpers::XRegisterFrom;
+using helpers::InputRegisterAt;
 
 namespace {
 
@@ -367,6 +368,40 @@
   GenReverse(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler());
 }
 
+static void GenBitCount(HInvoke* instr, bool is_long, vixl::MacroAssembler* masm) {
+  DCHECK(instr->GetType() == Primitive::kPrimInt);
+  DCHECK((is_long && instr->InputAt(0)->GetType() == Primitive::kPrimLong) ||
+         (!is_long && instr->InputAt(0)->GetType() == Primitive::kPrimInt));
+
+  Location out = instr->GetLocations()->Out();
+  UseScratchRegisterScope temps(masm);
+
+  Register src = InputRegisterAt(instr, 0);
+  Register dst = is_long ? XRegisterFrom(out) : WRegisterFrom(out);
+  FPRegister fpr = is_long ? temps.AcquireD() : temps.AcquireS();
+
+  __ Fmov(fpr, src);
+  __ Cnt(fpr.V8B(), fpr.V8B());
+  __ Addv(fpr.B(), fpr.V8B());
+  __ Fmov(dst, fpr);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitLongBitCount(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitLongBitCount(HInvoke* invoke) {
+  GenBitCount(invoke, /* is_long */ true, GetVIXLAssembler());
+}
+
+void IntrinsicLocationsBuilderARM64::VisitIntegerBitCount(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitIntegerBitCount(HInvoke* invoke) {
+  GenBitCount(invoke, /* is_long */ false, GetVIXLAssembler());
+}
+
 static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
   LocationSummary* locations = new (arena) LocationSummary(invoke,
                                                            LocationSummary::kNoCall,
@@ -1672,9 +1707,243 @@
   __ Bind(&done);
 }
 
-UNIMPLEMENTED_INTRINSIC(ARM64, IntegerBitCount)
-UNIMPLEMENTED_INTRINSIC(ARM64, LongBitCount)
-UNIMPLEMENTED_INTRINSIC(ARM64, SystemArrayCopyChar)
+// Mirrors ARRAYCOPY_SHORT_CHAR_ARRAY_THRESHOLD in libcore, so we can choose to use the native
+// implementation there for longer copy lengths.
+static constexpr int32_t kSystemArrayCopyThreshold = 32;
+
+static void SetSystemArrayCopyLocationRequires(LocationSummary* locations,
+                                               uint32_t at,
+                                               HInstruction* input) {
+  HIntConstant* const_input = input->AsIntConstant();
+  if (const_input != nullptr && !vixl::Assembler::IsImmAddSub(const_input->GetValue())) {
+    locations->SetInAt(at, Location::RequiresRegister());
+  } else {
+    locations->SetInAt(at, Location::RegisterOrConstant(input));
+  }
+}
+
+void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
+  // Check to see if we have known failures that will cause us to have to bail out
+  // to the runtime, and just generate the runtime call directly.
+  HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
+  HIntConstant* dst_pos = invoke->InputAt(3)->AsIntConstant();
+
+  // The positions must be non-negative.
+  if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
+      (dst_pos != nullptr && dst_pos->GetValue() < 0)) {
+    // We will have to fail anyways.
+    return;
+  }
+
+  // The length must be >= 0 and not so long that we would (currently) prefer libcore's
+  // native implementation.
+  HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
+  if (length != nullptr) {
+    int32_t len = length->GetValue();
+    if (len < 0 || len > kSystemArrayCopyThreshold) {
+      // Just call as normal.
+      return;
+    }
+  }
+
+  ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetArena();
+  LocationSummary* locations = new (allocator) LocationSummary(invoke,
+                                                               LocationSummary::kCallOnSlowPath,
+                                                               kIntrinsified);
+  // arraycopy(char[] src, int src_pos, char[] dst, int dst_pos, int length).
+  locations->SetInAt(0, Location::RequiresRegister());
+  SetSystemArrayCopyLocationRequires(locations, 1, invoke->InputAt(1));
+  locations->SetInAt(2, Location::RequiresRegister());
+  SetSystemArrayCopyLocationRequires(locations, 3, invoke->InputAt(3));
+  SetSystemArrayCopyLocationRequires(locations, 4, invoke->InputAt(4));
+
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+static void CheckSystemArrayCopyPosition(vixl::MacroAssembler* masm,
+                                         const Location& pos,
+                                         const Register& input,
+                                         const Location& length,
+                                         SlowPathCodeARM64* slow_path,
+                                         const Register& input_len,
+                                         const Register& temp,
+                                         bool length_is_input_length = false) {
+  const int32_t length_offset = mirror::Array::LengthOffset().Int32Value();
+  if (pos.IsConstant()) {
+    int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
+    if (pos_const == 0) {
+      if (!length_is_input_length) {
+        // Check that length(input) >= length.
+        __ Ldr(temp, MemOperand(input, length_offset));
+        __ Cmp(temp, OperandFrom(length, Primitive::kPrimInt));
+        __ B(slow_path->GetEntryLabel(), lt);
+      }
+    } else {
+      // Check that length(input) >= pos.
+      __ Ldr(input_len, MemOperand(input, length_offset));
+      __ Subs(temp, input_len, pos_const);
+      __ B(slow_path->GetEntryLabel(), lt);
+
+      // Check that (length(input) - pos) >= length.
+      __ Cmp(temp, OperandFrom(length, Primitive::kPrimInt));
+      __ B(slow_path->GetEntryLabel(), lt);
+    }
+  } else if (length_is_input_length) {
+    // The only way the copy can succeed is if pos is zero.
+    __ Cbnz(WRegisterFrom(pos), slow_path->GetEntryLabel());
+  } else {
+    // Check that pos >= 0.
+    Register pos_reg = WRegisterFrom(pos);
+    __ Tbnz(pos_reg, pos_reg.size() - 1, slow_path->GetEntryLabel());
+
+    // Check that pos <= length(input) && (length(input) - pos) >= length.
+    __ Ldr(temp, MemOperand(input, length_offset));
+    __ Subs(temp, temp, pos_reg);
+    // Ccmp if length(input) >= pos, else definitely bail to slow path (N!=V == lt).
+    __ Ccmp(temp, OperandFrom(length, Primitive::kPrimInt), NFlag, ge);
+    __ B(slow_path->GetEntryLabel(), lt);
+  }
+}
+
+// Compute base source address, base destination address, and end source address
+// for System.arraycopy* intrinsics.
+static void GenSystemArrayCopyAddresses(vixl::MacroAssembler* masm,
+                                        Primitive::Type type,
+                                        const Register& src,
+                                        const Location& src_pos,
+                                        const Register& dst,
+                                        const Location& dst_pos,
+                                        const Location& copy_length,
+                                        const Register& src_base,
+                                        const Register& dst_base,
+                                        const Register& src_end) {
+  DCHECK(type == Primitive::kPrimNot || type == Primitive::kPrimChar)
+         << "Unexpected element type: "
+         << type;
+  const int32_t char_size = Primitive::ComponentSize(type);
+  const int32_t char_size_shift = Primitive::ComponentSizeShift(type);
+
+  uint32_t offset = mirror::Array::DataOffset(char_size).Uint32Value();
+  if (src_pos.IsConstant()) {
+    int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
+    __ Add(src_base, src, char_size * constant + offset);
+  } else {
+    __ Add(src_base, src, offset);
+    __ Add(src_base,
+           src_base,
+           Operand(XRegisterFrom(src_pos), LSL, char_size_shift));
+  }
+
+  if (dst_pos.IsConstant()) {
+    int32_t constant = dst_pos.GetConstant()->AsIntConstant()->GetValue();
+    __ Add(dst_base, dst, char_size * constant + offset);
+  } else {
+    __ Add(dst_base, dst, offset);
+    __ Add(dst_base,
+           dst_base,
+           Operand(XRegisterFrom(dst_pos), LSL, char_size_shift));
+  }
+
+  if (copy_length.IsConstant()) {
+    int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
+    __ Add(src_end, src_base, char_size * constant);
+  } else {
+    __ Add(src_end,
+           src_base,
+           Operand(XRegisterFrom(copy_length), LSL, char_size_shift));
+  }
+}
+
+void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
+  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+  Register src = XRegisterFrom(locations->InAt(0));
+  Location src_pos = locations->InAt(1);
+  Register dst = XRegisterFrom(locations->InAt(2));
+  Location dst_pos = locations->InAt(3);
+  Location length = locations->InAt(4);
+
+  SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
+  codegen_->AddSlowPath(slow_path);
+
+  // If source and destination are the same, take the slow path. Overlapping copy regions must be
+  // copied in reverse and we can't know in all cases if it's needed.
+  __ Cmp(src, dst);
+  __ B(slow_path->GetEntryLabel(), eq);
+
+  // Bail out if the source is null.
+  __ Cbz(src, slow_path->GetEntryLabel());
+
+  // Bail out if the destination is null.
+  __ Cbz(dst, slow_path->GetEntryLabel());
+
+  if (!length.IsConstant()) {
+    // If the length is negative, bail out.
+    __ Tbnz(WRegisterFrom(length), kWRegSize - 1, slow_path->GetEntryLabel());
+    // If the length > 32 then (currently) prefer libcore's native implementation.
+    __ Cmp(WRegisterFrom(length), kSystemArrayCopyThreshold);
+    __ B(slow_path->GetEntryLabel(), gt);
+  } else {
+    // We have already checked in the LocationsBuilder for the constant case.
+    DCHECK_GE(length.GetConstant()->AsIntConstant()->GetValue(), 0);
+    DCHECK_LE(length.GetConstant()->AsIntConstant()->GetValue(), 32);
+  }
+
+  Register src_curr_addr = WRegisterFrom(locations->GetTemp(0));
+  Register dst_curr_addr = WRegisterFrom(locations->GetTemp(1));
+  Register src_stop_addr = WRegisterFrom(locations->GetTemp(2));
+
+  CheckSystemArrayCopyPosition(masm,
+                               src_pos,
+                               src,
+                               length,
+                               slow_path,
+                               src_curr_addr,
+                               dst_curr_addr,
+                               false);
+
+  CheckSystemArrayCopyPosition(masm,
+                               dst_pos,
+                               dst,
+                               length,
+                               slow_path,
+                               src_curr_addr,
+                               dst_curr_addr,
+                               false);
+
+  src_curr_addr = src_curr_addr.X();
+  dst_curr_addr = dst_curr_addr.X();
+  src_stop_addr = src_stop_addr.X();
+
+  GenSystemArrayCopyAddresses(masm,
+                              Primitive::kPrimChar,
+                              src,
+                              src_pos,
+                              dst,
+                              dst_pos,
+                              length,
+                              src_curr_addr,
+                              dst_curr_addr,
+                              src_stop_addr);
+
+  // Iterate over the arrays and do a raw copy of the chars.
+  const int32_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
+  UseScratchRegisterScope temps(masm);
+  Register tmp = temps.AcquireW();
+  vixl::Label loop, done;
+  __ Bind(&loop);
+  __ Cmp(src_curr_addr, src_stop_addr);
+  __ B(&done, eq);
+  __ Ldrh(tmp, MemOperand(src_curr_addr, char_size, vixl::PostIndex));
+  __ Strh(tmp, MemOperand(dst_curr_addr, char_size, vixl::PostIndex));
+  __ B(&loop);
+  __ Bind(&done);
+
+  __ Bind(slow_path->GetExitLabel());
+}
+
 UNIMPLEMENTED_INTRINSIC(ARM64, SystemArrayCopy)
 UNIMPLEMENTED_INTRINSIC(ARM64, ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(ARM64, FloatIsInfinite)
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 5a35dd5..710df0a 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -490,7 +490,6 @@
 static void GenNumberOfTrailingZeroes(LocationSummary* locations,
                                       bool is64bit,
                                       bool isR6,
-                                      bool isR2OrNewer,
                                       MipsAssembler* assembler) {
   Register out = locations->Out().AsRegister<Register>();
   Register in_lo;
@@ -503,7 +502,7 @@
 
     // If in_lo is zero then count the number of trailing zeroes in in_hi;
     // otherwise count the number of trailing zeroes in in_lo.
-    // AT = in_lo ? in_lo : in_hi;
+    // out = in_lo ? in_lo : in_hi;
     if (isR6) {
       __ Seleqz(out, in_hi, in_lo);
       __ Selnez(TMP, in_lo, in_lo);
@@ -522,50 +521,26 @@
     in_lo = in;
   }
 
-  // We don't have an instruction to count the number of trailing zeroes.
-  // Start by flipping the bits end-for-end so we can count the number of
-  // leading zeroes instead.
-  if (isR2OrNewer) {
+  if (isR6) {
+    // We don't have an instruction to count the number of trailing zeroes.
+    // Start by flipping the bits end-for-end so we can count the number of
+    // leading zeroes instead.
     __ Rotr(out, in, 16);
     __ Wsbh(out, out);
-  } else {
-    // MIPS32r1
-    // __ Rotr(out, in, 16);
-    __ Sll(TMP, in, 16);
-    __ Srl(out, in, 16);
-    __ Or(out, out, TMP);
-    // __ Wsbh(out, out);
-    __ LoadConst32(AT, 0x00FF00FF);
-    __ And(TMP, out, AT);
-    __ Sll(TMP, TMP, 8);
-    __ Srl(out, out, 8);
-    __ And(out, out, AT);
-    __ Or(out, out, TMP);
-  }
-
-  if (isR6) {
     __ Bitswap(out, out);
     __ ClzR6(out, out);
   } else {
-    __ LoadConst32(AT, 0x0F0F0F0F);
-    __ And(TMP, out, AT);
-    __ Sll(TMP, TMP, 4);
-    __ Srl(out, out, 4);
-    __ And(out, out, AT);
-    __ Or(out, TMP, out);
-    __ LoadConst32(AT, 0x33333333);
-    __ And(TMP, out, AT);
-    __ Sll(TMP, TMP, 2);
-    __ Srl(out, out, 2);
-    __ And(out, out, AT);
-    __ Or(out, TMP, out);
-    __ LoadConst32(AT, 0x55555555);
-    __ And(TMP, out, AT);
-    __ Sll(TMP, TMP, 1);
-    __ Srl(out, out, 1);
-    __ And(out, out, AT);
-    __ Or(out, TMP, out);
+    // Convert trailing zeroes to trailing ones, and bits to their left
+    // to zeroes.
+    __ Addiu(TMP, in, -1);
+    __ Xor(out, TMP, in);
+    __ And(out, out, TMP);
+    // Count number of leading zeroes.
     __ ClzR2(out, out);
+    // Subtract number of leading zeroes from 32 to get number of trailing ones.
+    // Remember that the trailing ones were formerly trailing zeroes.
+    __ LoadConst32(TMP, 32);
+    __ Subu(out, TMP, out);
   }
 
   if (is64bit) {
@@ -587,11 +562,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
-  GenNumberOfTrailingZeroes(invoke->GetLocations(),
-                            /* is64bit */ false,
-                            IsR6(),
-                            IsR2OrNewer(),
-                            GetAssembler());
+  GenNumberOfTrailingZeroes(invoke->GetLocations(), /* is64bit */ false, IsR6(), GetAssembler());
 }
 
 // int java.lang.Long.numberOfTrailingZeros(long i)
@@ -600,11 +571,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
-  GenNumberOfTrailingZeroes(invoke->GetLocations(),
-                            /* is64bit */ true,
-                            IsR6(),
-                            IsR2OrNewer(),
-                            GetAssembler());
+  GenNumberOfTrailingZeroes(invoke->GetLocations(), /* is64bit */ true, IsR6(), GetAssembler());
 }
 
 // int java.lang.Integer.reverse(int)
@@ -643,6 +610,142 @@
   locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
 }
 
+static void GenBitCount(LocationSummary* locations,
+                        Primitive::Type type,
+                        bool isR6,
+                        MipsAssembler* assembler) {
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
+
+  Register out = locations->Out().AsRegister<Register>();
+
+  // https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+  //
+  // A generalization of the best bit counting method to integers of
+  // bit-widths up to 128 (parameterized by type T) is this:
+  //
+  // v = v - ((v >> 1) & (T)~(T)0/3);                           // temp
+  // v = (v & (T)~(T)0/15*3) + ((v >> 2) & (T)~(T)0/15*3);      // temp
+  // v = (v + (v >> 4)) & (T)~(T)0/255*15;                      // temp
+  // c = (T)(v * ((T)~(T)0/255)) >> (sizeof(T) - 1) * BITS_PER_BYTE; // count
+  //
+  // For comparison, for 32-bit quantities, this algorithm can be executed
+  // using 20 MIPS instructions (the calls to LoadConst32() generate two
+  // machine instructions each for the values being used in this algorithm).
+  // A(n unrolled) loop-based algorithm required 25 instructions.
+  //
+  // For 64-bit quantities, this algorithm gets executed twice, (once
+  // for in_lo, and again for in_hi), but saves a few instructions
+  // because the mask values only have to be loaded once.  Using this
+  // algorithm the count for a 64-bit operand can be performed in 33
+  // instructions compared to a loop-based algorithm which required 47
+  // instructions.
+
+  if (type == Primitive::kPrimInt) {
+    Register in = locations->InAt(0).AsRegister<Register>();
+
+    __ Srl(TMP, in, 1);
+    __ LoadConst32(AT, 0x55555555);
+    __ And(TMP, TMP, AT);
+    __ Subu(TMP, in, TMP);
+    __ LoadConst32(AT, 0x33333333);
+    __ And(out, TMP, AT);
+    __ Srl(TMP, TMP, 2);
+    __ And(TMP, TMP, AT);
+    __ Addu(TMP, out, TMP);
+    __ Srl(out, TMP, 4);
+    __ Addu(out, out, TMP);
+    __ LoadConst32(AT, 0x0F0F0F0F);
+    __ And(out, out, AT);
+    __ LoadConst32(TMP, 0x01010101);
+    if (isR6) {
+      __ MulR6(out, out, TMP);
+    } else {
+      __ MulR2(out, out, TMP);
+    }
+    __ Srl(out, out, 24);
+  } else if (type == Primitive::kPrimLong) {
+    Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+    Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+    Register tmp_hi = locations->GetTemp(0).AsRegister<Register>();
+    Register out_hi = locations->GetTemp(1).AsRegister<Register>();
+    Register tmp_lo = TMP;
+    Register out_lo = out;
+
+    __ Srl(tmp_lo, in_lo, 1);
+    __ Srl(tmp_hi, in_hi, 1);
+
+    __ LoadConst32(AT, 0x55555555);
+
+    __ And(tmp_lo, tmp_lo, AT);
+    __ Subu(tmp_lo, in_lo, tmp_lo);
+
+    __ And(tmp_hi, tmp_hi, AT);
+    __ Subu(tmp_hi, in_hi, tmp_hi);
+
+    __ LoadConst32(AT, 0x33333333);
+
+    __ And(out_lo, tmp_lo, AT);
+    __ Srl(tmp_lo, tmp_lo, 2);
+    __ And(tmp_lo, tmp_lo, AT);
+    __ Addu(tmp_lo, out_lo, tmp_lo);
+    __ Srl(out_lo, tmp_lo, 4);
+    __ Addu(out_lo, out_lo, tmp_lo);
+
+    __ And(out_hi, tmp_hi, AT);
+    __ Srl(tmp_hi, tmp_hi, 2);
+    __ And(tmp_hi, tmp_hi, AT);
+    __ Addu(tmp_hi, out_hi, tmp_hi);
+    __ Srl(out_hi, tmp_hi, 4);
+    __ Addu(out_hi, out_hi, tmp_hi);
+
+    __ LoadConst32(AT, 0x0F0F0F0F);
+
+    __ And(out_lo, out_lo, AT);
+    __ And(out_hi, out_hi, AT);
+
+    __ LoadConst32(AT, 0x01010101);
+
+    if (isR6) {
+      __ MulR6(out_lo, out_lo, AT);
+
+      __ MulR6(out_hi, out_hi, AT);
+    } else {
+      __ MulR2(out_lo, out_lo, AT);
+
+      __ MulR2(out_hi, out_hi, AT);
+    }
+
+    __ Srl(out_lo, out_lo, 24);
+    __ Srl(out_hi, out_hi, 24);
+
+    __ Addu(out, out_hi, out_lo);
+  }
+}
+
+// int java.lang.Integer.bitCount(int)
+void IntrinsicLocationsBuilderMIPS::VisitIntegerBitCount(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitIntegerBitCount(HInvoke* invoke) {
+  GenBitCount(invoke->GetLocations(), Primitive::kPrimInt, IsR6(), GetAssembler());
+}
+
+// int java.lang.Long.bitCount(int)
+void IntrinsicLocationsBuilderMIPS::VisitLongBitCount(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitLongBitCount(HInvoke* invoke) {
+  GenBitCount(invoke->GetLocations(), Primitive::kPrimLong, IsR6(), GetAssembler());
+}
+
 static void MathAbsFP(LocationSummary* locations, bool is64bit, MipsAssembler* assembler) {
   FRegister in = locations->InAt(0).AsFpuRegister<FRegister>();
   FRegister out = locations->Out().AsFpuRegister<FRegister>();
@@ -1502,8 +1605,178 @@
   __ Bind(&end);
 }
 
-UNIMPLEMENTED_INTRINSIC(MIPS, IntegerBitCount)
-UNIMPLEMENTED_INTRINSIC(MIPS, LongBitCount)
+static void GenIsInfinite(LocationSummary* locations,
+                          const Primitive::Type type,
+                          const bool isR6,
+                          MipsAssembler* assembler) {
+  FRegister in = locations->InAt(0).AsFpuRegister<FRegister>();
+  Register out = locations->Out().AsRegister<Register>();
+
+  DCHECK(type == Primitive::kPrimFloat || type == Primitive::kPrimDouble);
+
+  if (isR6) {
+    if (type == Primitive::kPrimDouble) {
+        __ ClassD(FTMP, in);
+    } else {
+        __ ClassS(FTMP, in);
+    }
+    __ Mfc1(out, FTMP);
+    __ Andi(out, out, kPositiveInfinity | kNegativeInfinity);
+    __ Sltu(out, ZERO, out);
+  } else {
+    // If one, or more, of the exponent bits is zero, then the number can't be infinite.
+    if (type == Primitive::kPrimDouble) {
+      __ MoveFromFpuHigh(TMP, in);
+      __ LoadConst32(AT, 0x7FF00000);
+    } else {
+      __ Mfc1(TMP, in);
+      __ LoadConst32(AT, 0x7F800000);
+    }
+    __ Xor(TMP, TMP, AT);
+
+    __ Sll(TMP, TMP, 1);
+
+    if (type == Primitive::kPrimDouble) {
+      __ Mfc1(AT, in);
+      __ Or(TMP, TMP, AT);
+    }
+    // If any of the significand bits are one, then the number is not infinite.
+    __ Sltiu(out, TMP, 1);
+  }
+}
+
+// boolean java.lang.Float.isInfinite(float)
+void IntrinsicLocationsBuilderMIPS::VisitFloatIsInfinite(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitFloatIsInfinite(HInvoke* invoke) {
+  GenIsInfinite(invoke->GetLocations(), Primitive::kPrimFloat, IsR6(), GetAssembler());
+}
+
+// boolean java.lang.Double.isInfinite(double)
+void IntrinsicLocationsBuilderMIPS::VisitDoubleIsInfinite(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitDoubleIsInfinite(HInvoke* invoke) {
+  GenIsInfinite(invoke->GetLocations(), Primitive::kPrimDouble, IsR6(), GetAssembler());
+}
+
+static void GenHighestOneBit(LocationSummary* locations,
+                             const Primitive::Type type,
+                             bool isR6,
+                             MipsAssembler* assembler) {
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
+
+  if (type == Primitive::kPrimLong) {
+    Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+    Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+    Register out_lo = locations->Out().AsRegisterPairLow<Register>();
+    Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+    if (isR6) {
+      __ ClzR6(TMP, in_hi);
+    } else {
+      __ ClzR2(TMP, in_hi);
+    }
+    __ LoadConst32(AT, 0x80000000);
+    __ Srlv(out_hi, AT, TMP);
+    __ And(out_hi, out_hi, in_hi);
+    if (isR6) {
+      __ ClzR6(TMP, in_lo);
+    } else {
+      __ ClzR2(TMP, in_lo);
+    }
+    __ Srlv(out_lo, AT, TMP);
+    __ And(out_lo, out_lo, in_lo);
+    if (isR6) {
+      __ Seleqz(out_lo, out_lo, out_hi);
+    } else {
+      __ Movn(out_lo, ZERO, out_hi);
+    }
+  } else {
+    Register in = locations->InAt(0).AsRegister<Register>();
+    Register out = locations->Out().AsRegister<Register>();
+
+    if (isR6) {
+      __ ClzR6(TMP, in);
+    } else {
+      __ ClzR2(TMP, in);
+    }
+    __ LoadConst32(AT, 0x80000000);
+    __ Srlv(AT, AT, TMP);  // Srlv shifts in the range of [0;31] bits (lower 5 bits of arg).
+    __ And(out, AT, in);   // So this is required for 0 (=shift by 32).
+  }
+}
+
+// int java.lang.Integer.highestOneBit(int)
+void IntrinsicLocationsBuilderMIPS::VisitIntegerHighestOneBit(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitIntegerHighestOneBit(HInvoke* invoke) {
+  GenHighestOneBit(invoke->GetLocations(), Primitive::kPrimInt, IsR6(), GetAssembler());
+}
+
+// long java.lang.Long.highestOneBit(long)
+void IntrinsicLocationsBuilderMIPS::VisitLongHighestOneBit(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke, Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitLongHighestOneBit(HInvoke* invoke) {
+  GenHighestOneBit(invoke->GetLocations(), Primitive::kPrimLong, IsR6(), GetAssembler());
+}
+
+static void GenLowestOneBit(LocationSummary* locations,
+                            const Primitive::Type type,
+                            bool isR6,
+                            MipsAssembler* assembler) {
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
+
+  if (type == Primitive::kPrimLong) {
+    Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+    Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+    Register out_lo = locations->Out().AsRegisterPairLow<Register>();
+    Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+    __ Subu(TMP, ZERO, in_lo);
+    __ And(out_lo, TMP, in_lo);
+    __ Subu(TMP, ZERO, in_hi);
+    __ And(out_hi, TMP, in_hi);
+    if (isR6) {
+      __ Seleqz(out_hi, out_hi, out_lo);
+    } else {
+      __ Movn(out_hi, ZERO, out_lo);
+    }
+  } else {
+    Register in = locations->InAt(0).AsRegister<Register>();
+    Register out = locations->Out().AsRegister<Register>();
+
+    __ Subu(TMP, ZERO, in);
+    __ And(out, TMP, in);
+  }
+}
+
+// int java.lang.Integer.lowestOneBit(int)
+void IntrinsicLocationsBuilderMIPS::VisitIntegerLowestOneBit(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitIntegerLowestOneBit(HInvoke* invoke) {
+  GenLowestOneBit(invoke->GetLocations(), Primitive::kPrimInt, IsR6(), GetAssembler());
+}
+
+// long java.lang.Long.lowestOneBit(long)
+void IntrinsicLocationsBuilderMIPS::VisitLongLowestOneBit(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitLongLowestOneBit(HInvoke* invoke) {
+  GenLowestOneBit(invoke->GetLocations(), Primitive::kPrimLong, IsR6(), GetAssembler());
+}
+
+// Unimplemented intrinsics.
 
 UNIMPLEMENTED_INTRINSIC(MIPS, MathCeil)
 UNIMPLEMENTED_INTRINSIC(MIPS, MathFloor)
@@ -1559,16 +1832,10 @@
 UNIMPLEMENTED_INTRINSIC(MIPS, MathTan)
 UNIMPLEMENTED_INTRINSIC(MIPS, MathTanh)
 
-UNIMPLEMENTED_INTRINSIC(MIPS, FloatIsInfinite)
-UNIMPLEMENTED_INTRINSIC(MIPS, DoubleIsInfinite)
-
-UNIMPLEMENTED_INTRINSIC(MIPS, IntegerHighestOneBit)
-UNIMPLEMENTED_INTRINSIC(MIPS, LongHighestOneBit)
-UNIMPLEMENTED_INTRINSIC(MIPS, IntegerLowestOneBit)
-UNIMPLEMENTED_INTRINSIC(MIPS, LongLowestOneBit)
-
 UNREACHABLE_INTRINSICS(MIPS)
 
+#undef UNIMPLEMENTED_INTRINSIC
+
 #undef __
 
 }  // namespace mips
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index d9a2f30..f25d748 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -698,6 +698,12 @@
       Usage("Can't have both --image and (--app-image-fd or --app-image-file)");
     }
 
+    if (IsBootImage()) {
+      // We need the boot image to always be debuggable.
+      // TODO: Remove this once we better deal with full frame deoptimization.
+      compiler_options_->debuggable_ = true;
+    }
+
     if (oat_filenames_.empty() && oat_fd_ == -1) {
       Usage("Output must be supplied with either --oat-file or --oat-fd");
     }
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index ebe89bb..12d6d8f 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -463,12 +463,6 @@
       interface_method->VisitRoots(visitor, pointer_size);
     }
     visitor.VisitRoot(declaring_class_.AddressWithoutBarrier());
-    if (!IsNative()) {
-      ProfilingInfo* profiling_info = GetProfilingInfo(pointer_size);
-      if (profiling_info != nullptr) {
-        profiling_info->VisitRoots(visitor);
-      }
-    }
   }
 }
 
diff --git a/runtime/interpreter/mterp/arm/op_cmpg_double.S b/runtime/interpreter/mterp/arm/op_cmpg_double.S
index 4b05c44..602a4b1 100644
--- a/runtime/interpreter/mterp/arm/op_cmpg_double.S
+++ b/runtime/interpreter/mterp/arm/op_cmpg_double.S
@@ -23,7 +23,7 @@
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
     fldd    d0, [r2]                    @ d0<- vBB
     fldd    d1, [r3]                    @ d1<- vCC
-    fcmped  d0, d1                      @ compare (vBB, vCC)
+    vcmpe.f64 d0, d1                    @ compare (vBB, vCC)
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     mov     r0, #1                      @ r0<- 1 (default)
     GET_INST_OPCODE ip                  @ extract opcode from rINST
diff --git a/runtime/interpreter/mterp/arm/op_cmpg_float.S b/runtime/interpreter/mterp/arm/op_cmpg_float.S
index d5d2df2..965091f 100644
--- a/runtime/interpreter/mterp/arm/op_cmpg_float.S
+++ b/runtime/interpreter/mterp/arm/op_cmpg_float.S
@@ -23,7 +23,7 @@
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
     flds    s0, [r2]                    @ s0<- vBB
     flds    s1, [r3]                    @ s1<- vCC
-    fcmpes  s0, s1                      @ compare (vBB, vCC)
+    vcmpe.f32 s0, s1                    @ compare (vBB, vCC)
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     mov     r0, #1                      @ r0<- 1 (default)
     GET_INST_OPCODE ip                  @ extract opcode from rINST
diff --git a/runtime/interpreter/mterp/arm/op_cmpl_double.S b/runtime/interpreter/mterp/arm/op_cmpl_double.S
index 6ee53b3..8a5e509 100644
--- a/runtime/interpreter/mterp/arm/op_cmpl_double.S
+++ b/runtime/interpreter/mterp/arm/op_cmpl_double.S
@@ -23,7 +23,7 @@
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
     fldd    d0, [r2]                    @ d0<- vBB
     fldd    d1, [r3]                    @ d1<- vCC
-    fcmped  d0, d1                      @ compare (vBB, vCC)
+    vcmpe.f64 d0, d1                    @ compare (vBB, vCC)
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     mvn     r0, #0                      @ r0<- -1 (default)
     GET_INST_OPCODE ip                  @ extract opcode from rINST
diff --git a/runtime/interpreter/mterp/arm/op_cmpl_float.S b/runtime/interpreter/mterp/arm/op_cmpl_float.S
index 64535b6..9df0c2c 100644
--- a/runtime/interpreter/mterp/arm/op_cmpl_float.S
+++ b/runtime/interpreter/mterp/arm/op_cmpl_float.S
@@ -23,7 +23,7 @@
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
     flds    s0, [r2]                    @ s0<- vBB
     flds    s1, [r3]                    @ s1<- vCC
-    fcmpes  s0, s1                      @ compare (vBB, vCC)
+    vcmpe.f32  s0, s1                   @ compare (vBB, vCC)
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     mvn     r0, #0                      @ r0<- -1 (default)
     GET_INST_OPCODE ip                  @ extract opcode from rINST
diff --git a/runtime/interpreter/mterp/arm/op_double_to_float.S b/runtime/interpreter/mterp/arm/op_double_to_float.S
index e327000..98fdfbc 100644
--- a/runtime/interpreter/mterp/arm/op_double_to_float.S
+++ b/runtime/interpreter/mterp/arm/op_double_to_float.S
@@ -1 +1 @@
-%include "arm/funopNarrower.S" {"instr":"fcvtsd  s0, d0"}
+%include "arm/funopNarrower.S" {"instr":"vcvt.f32.f64  s0, d0"}
diff --git a/runtime/interpreter/mterp/arm/op_float_to_double.S b/runtime/interpreter/mterp/arm/op_float_to_double.S
index fb1892b..b1e12bd 100644
--- a/runtime/interpreter/mterp/arm/op_float_to_double.S
+++ b/runtime/interpreter/mterp/arm/op_float_to_double.S
@@ -1 +1 @@
-%include "arm/funopWider.S" {"instr":"fcvtds  d0, s0"}
+%include "arm/funopWider.S" {"instr":"vcvt.f64.f32  d0, s0"}
diff --git a/runtime/interpreter/mterp/arm/op_float_to_long.S b/runtime/interpreter/mterp/arm/op_float_to_long.S
index 24416d3..5c8680f 100644
--- a/runtime/interpreter/mterp/arm/op_float_to_long.S
+++ b/runtime/interpreter/mterp/arm/op_float_to_long.S
@@ -17,7 +17,7 @@
     cmp     r0, #0                      @ nonzero == yes
     mvnne   r0, #0                      @ return maxlong (7fffffff)
     mvnne   r1, #0x80000000
-    ldmnefd sp!, {r4, pc}
+    popne   {r4, pc}
 
     mov     r0, r4                      @ recover arg
     mov     r1, #0xdf000000             @ (float)minlong
@@ -25,14 +25,14 @@
     cmp     r0, #0                      @ nonzero == yes
     movne   r0, #0                      @ return minlong (80000000)
     movne   r1, #0x80000000
-    ldmnefd sp!, {r4, pc}
+    popne   {r4, pc}
 
     mov     r0, r4                      @ recover arg
     mov     r1, r4
     bl      __aeabi_fcmpeq              @ is arg == self?
     cmp     r0, #0                      @ zero == no
     moveq   r1, #0                      @ return zero for NaN
-    ldmeqfd sp!, {r4, pc}
+    popeq   {r4, pc}
 
     mov     r0, r4                      @ recover arg
     bl      __aeabi_f2lz                @ convert float to long
diff --git a/runtime/interpreter/mterp/out/mterp_arm.S b/runtime/interpreter/mterp/out/mterp_arm.S
index 2b74d4c..b26a63a 100644
--- a/runtime/interpreter/mterp/out/mterp_arm.S
+++ b/runtime/interpreter/mterp/out/mterp_arm.S
@@ -1353,7 +1353,7 @@
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
     flds    s0, [r2]                    @ s0<- vBB
     flds    s1, [r3]                    @ s1<- vCC
-    fcmpes  s0, s1                      @ compare (vBB, vCC)
+    vcmpe.f32  s0, s1                   @ compare (vBB, vCC)
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     mvn     r0, #0                      @ r0<- -1 (default)
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1392,7 +1392,7 @@
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
     flds    s0, [r2]                    @ s0<- vBB
     flds    s1, [r3]                    @ s1<- vCC
-    fcmpes  s0, s1                      @ compare (vBB, vCC)
+    vcmpe.f32 s0, s1                    @ compare (vBB, vCC)
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     mov     r0, #1                      @ r0<- 1 (default)
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1431,7 +1431,7 @@
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
     fldd    d0, [r2]                    @ d0<- vBB
     fldd    d1, [r3]                    @ d1<- vCC
-    fcmped  d0, d1                      @ compare (vBB, vCC)
+    vcmpe.f64 d0, d1                    @ compare (vBB, vCC)
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     mvn     r0, #0                      @ r0<- -1 (default)
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1470,7 +1470,7 @@
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
     fldd    d0, [r2]                    @ d0<- vBB
     fldd    d1, [r3]                    @ d1<- vCC
-    fcmped  d0, d1                      @ compare (vBB, vCC)
+    vcmpe.f64 d0, d1                    @ compare (vBB, vCC)
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     mov     r0, #1                      @ r0<- 1 (default)
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -3994,7 +3994,7 @@
     flds    s0, [r3]                    @ s0<- vB
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
     and     r9, r9, #15                 @ r9<- A
-    fcvtds  d0, s0                              @ d0<- op
+    vcvt.f64.f32  d0, s0                              @ d0<- op
     CLEAR_SHADOW_PAIR r9, ip, lr        @ Zero shadow regs
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
@@ -4075,7 +4075,7 @@
     fldd    d0, [r3]                    @ d0<- vB
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
     and     r9, r9, #15                 @ r9<- A
-    fcvtsd  s0, d0                              @ s0<- op
+    vcvt.f32.f64  s0, d0                              @ s0<- op
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
     fsts    s0, [r9]                    @ vA<- s0
@@ -7673,7 +7673,7 @@
     cmp     r0, #0                      @ nonzero == yes
     mvnne   r0, #0                      @ return maxlong (7fffffff)
     mvnne   r1, #0x80000000
-    ldmnefd sp!, {r4, pc}
+    popne   {r4, pc}
 
     mov     r0, r4                      @ recover arg
     mov     r1, #0xdf000000             @ (float)minlong
@@ -7681,14 +7681,14 @@
     cmp     r0, #0                      @ nonzero == yes
     movne   r0, #0                      @ return minlong (80000000)
     movne   r1, #0x80000000
-    ldmnefd sp!, {r4, pc}
+    popne   {r4, pc}
 
     mov     r0, r4                      @ recover arg
     mov     r1, r4
     bl      __aeabi_fcmpeq              @ is arg == self?
     cmp     r0, #0                      @ zero == no
     moveq   r1, #0                      @ return zero for NaN
-    ldmeqfd sp!, {r4, pc}
+    popeq   {r4, pc}
 
     mov     r0, r4                      @ recover arg
     bl      __aeabi_f2lz                @ convert float to long
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 91b006a..a3b99e3 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -59,12 +59,7 @@
 }
 
 void Jit::DumpInfo(std::ostream& os) {
-  os << "JIT code cache size=" << PrettySize(code_cache_->CodeCacheSize()) << "\n"
-     << "JIT data cache size=" << PrettySize(code_cache_->DataCacheSize()) << "\n"
-     << "JIT current capacity=" << PrettySize(code_cache_->GetCurrentCapacity()) << "\n"
-     << "JIT number of compiled code=" << code_cache_->NumberOfCompiledCode() << "\n"
-     << "JIT total number of compilations=" << code_cache_->NumberOfCompilations() << "\n"
-     << "JIT total number of osr compilations=" << code_cache_->NumberOfOsrCompilations() << "\n";
+  code_cache_->Dump(os);
   cumulative_timings_.Dump(os);
 }
 
@@ -97,7 +92,7 @@
     return nullptr;
   }
   jit->save_profiling_info_ = options->GetSaveProfilingInfo();
-  LOG(INFO) << "JIT created with initial_capacity="
+  VLOG(jit) << "JIT created with initial_capacity="
       << PrettySize(options->GetCodeCacheInitialCapacity())
       << ", max_capacity=" << PrettySize(options->GetCodeCacheMaxCapacity())
       << ", compile_threshold=" << options->GetCompileThreshold()
@@ -174,7 +169,6 @@
   // of that proxy method, as the compiler does not expect a proxy method.
   ArtMethod* method_to_compile = method->GetInterfaceMethodIfProxy(sizeof(void*));
   if (!code_cache_->NotifyCompilationOf(method_to_compile, self, osr)) {
-    VLOG(jit) << "JIT not compiling " << PrettyMethod(method) << " due to code cache";
     return false;
   }
   bool success = jit_compile_method_(jit_compiler_handle_, method_to_compile, self, osr);
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index e0380bd..e5be2a4 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -24,6 +24,7 @@
 #include "debugger_interface.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "gc/accounting/bitmap-inl.h"
+#include "jit/jit.h"
 #include "jit/profiling_info.h"
 #include "linear_alloc.h"
 #include "mem_map.h"
@@ -129,7 +130,9 @@
       used_memory_for_data_(0),
       used_memory_for_code_(0),
       number_of_compilations_(0),
-      number_of_osr_compilations_(0) {
+      number_of_osr_compilations_(0),
+      number_of_deoptimizations_(0),
+      number_of_collections_(0) {
 
   DCHECK_GE(max_capacity, initial_code_capacity + initial_data_capacity);
   code_mspace_ = create_mspace_with_base(code_map_->Begin(), code_end_, false /*locked*/);
@@ -363,16 +366,6 @@
   return reinterpret_cast<uint8_t*>(method_header);
 }
 
-size_t JitCodeCache::NumberOfCompilations() {
-  MutexLock mu(Thread::Current(), lock_);
-  return number_of_compilations_;
-}
-
-size_t JitCodeCache::NumberOfOsrCompilations() {
-  MutexLock mu(Thread::Current(), lock_);
-  return number_of_osr_compilations_;
-}
-
 size_t JitCodeCache::CodeCacheSize() {
   MutexLock mu(Thread::Current(), lock_);
   return CodeCacheSizeLocked();
@@ -391,11 +384,6 @@
   return used_memory_for_data_;
 }
 
-size_t JitCodeCache::NumberOfCompiledCode() {
-  MutexLock mu(Thread::Current(), lock_);
-  return method_code_map_.size();
-}
-
 void JitCodeCache::ClearData(Thread* self, void* data) {
   MutexLock mu(self, lock_);
   FreeData(reinterpret_cast<uint8_t*>(data));
@@ -577,6 +565,7 @@
     if (WaitForPotentialCollectionToComplete(self)) {
       return;
     } else {
+      number_of_collections_++;
       live_bitmap_.reset(CodeCacheBitmap::Create(
           "code-cache-bitmap",
           reinterpret_cast<uintptr_t>(code_map_->Begin()),
@@ -585,80 +574,84 @@
     }
   }
 
-  bool do_full_collection = false;
+  TimingLogger logger("JIT code cache timing logger", true, VLOG_IS_ON(jit));
   {
-    MutexLock mu(self, lock_);
-    do_full_collection = ShouldDoFullCollection();
-  }
+    TimingLogger::ScopedTiming st("Code cache collection", &logger);
 
-  if (!kIsDebugBuild || VLOG_IS_ON(jit)) {
-    LOG(INFO) << "Do "
-              << (do_full_collection ? "full" : "partial")
-              << " code cache collection, code="
-              << PrettySize(CodeCacheSize())
-              << ", data=" << PrettySize(DataCacheSize());
-  }
-
-  DoCollection(self, /* collect_profiling_info */ do_full_collection);
-
-  if (!kIsDebugBuild || VLOG_IS_ON(jit)) {
-    LOG(INFO) << "After code cache collection, code="
-              << PrettySize(CodeCacheSize())
-              << ", data=" << PrettySize(DataCacheSize());
-  }
-
-  {
-    MutexLock mu(self, lock_);
-
-    // Increase the code cache only when we do partial collections.
-    // TODO: base this strategy on how full the code cache is?
-    if (do_full_collection) {
-      last_collection_increased_code_cache_ = false;
-    } else {
-      last_collection_increased_code_cache_ = true;
-      IncreaseCodeCacheCapacity();
+    bool do_full_collection = false;
+    {
+      MutexLock mu(self, lock_);
+      do_full_collection = ShouldDoFullCollection();
     }
 
-    bool next_collection_will_be_full = ShouldDoFullCollection();
+    if (!kIsDebugBuild || VLOG_IS_ON(jit)) {
+      LOG(INFO) << "Do "
+                << (do_full_collection ? "full" : "partial")
+                << " code cache collection, code="
+                << PrettySize(CodeCacheSize())
+                << ", data=" << PrettySize(DataCacheSize());
+    }
 
-    // Start polling the liveness of compiled code to prepare for the next full collection.
-    // We avoid doing this if exit stubs are installed to not mess with the instrumentation.
-    // TODO(ngeoffray): Clean up instrumentation and code cache interactions.
-    if (!Runtime::Current()->GetInstrumentation()->AreExitStubsInstalled() &&
-        next_collection_will_be_full) {
-      // Save the entry point of methods we have compiled, and update the entry
-      // point of those methods to the interpreter. If the method is invoked, the
-      // interpreter will update its entry point to the compiled code and call it.
-      for (ProfilingInfo* info : profiling_infos_) {
-        const void* entry_point = info->GetMethod()->GetEntryPointFromQuickCompiledCode();
-        if (ContainsPc(entry_point)) {
-          info->SetSavedEntryPoint(entry_point);
-          info->GetMethod()->SetEntryPointFromQuickCompiledCode(GetQuickToInterpreterBridge());
-        }
+    DoCollection(self, /* collect_profiling_info */ do_full_collection);
+
+    if (!kIsDebugBuild || VLOG_IS_ON(jit)) {
+      LOG(INFO) << "After code cache collection, code="
+                << PrettySize(CodeCacheSize())
+                << ", data=" << PrettySize(DataCacheSize());
+    }
+
+    {
+      MutexLock mu(self, lock_);
+
+      // Increase the code cache only when we do partial collections.
+      // TODO: base this strategy on how full the code cache is?
+      if (do_full_collection) {
+        last_collection_increased_code_cache_ = false;
+      } else {
+        last_collection_increased_code_cache_ = true;
+        IncreaseCodeCacheCapacity();
       }
 
-      DCHECK(CheckLiveCompiledCodeHasProfilingInfo());
+      bool next_collection_will_be_full = ShouldDoFullCollection();
+
+      // Start polling the liveness of compiled code to prepare for the next full collection.
+      // We avoid doing this if exit stubs are installed to not mess with the instrumentation.
+      // TODO(ngeoffray): Clean up instrumentation and code cache interactions.
+      if (!Runtime::Current()->GetInstrumentation()->AreExitStubsInstalled() &&
+          next_collection_will_be_full) {
+        // Save the entry point of methods we have compiled, and update the entry
+        // point of those methods to the interpreter. If the method is invoked, the
+        // interpreter will update its entry point to the compiled code and call it.
+        for (ProfilingInfo* info : profiling_infos_) {
+          const void* entry_point = info->GetMethod()->GetEntryPointFromQuickCompiledCode();
+          if (ContainsPc(entry_point)) {
+            info->SetSavedEntryPoint(entry_point);
+            info->GetMethod()->SetEntryPointFromQuickCompiledCode(GetQuickToInterpreterBridge());
+          }
+        }
+
+        DCHECK(CheckLiveCompiledCodeHasProfilingInfo());
+      }
+      live_bitmap_.reset(nullptr);
+      NotifyCollectionDone(self);
     }
-    live_bitmap_.reset(nullptr);
-    NotifyCollectionDone(self);
   }
+  Runtime::Current()->GetJit()->AddTimingLogger(logger);
 }
 
-void JitCodeCache::RemoveUnusedAndUnmarkedCode(Thread* self) {
+void JitCodeCache::RemoveUnmarkedCode(Thread* self) {
   MutexLock mu(self, lock_);
   ScopedCodeCacheWrite scc(code_map_.get());
-  // Iterate over all compiled code and remove entries that are not marked and not
-  // the entrypoint of their corresponding ArtMethod.
+  // Iterate over all compiled code and remove entries that are not marked.
   for (auto it = method_code_map_.begin(); it != method_code_map_.end();) {
     const void* code_ptr = it->first;
     ArtMethod* method = it->second;
     uintptr_t allocation = FromCodeToAllocation(code_ptr);
-    const OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
-    const void* entrypoint = method->GetEntryPointFromQuickCompiledCode();
-    if ((entrypoint == method_header->GetEntryPoint()) || GetLiveBitmap()->Test(allocation)) {
+    if (GetLiveBitmap()->Test(allocation)) {
       ++it;
     } else {
-      if (entrypoint == GetQuickToInterpreterBridge()) {
+      const OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
+      if (method_header->GetEntryPoint() == GetQuickToInterpreterBridge()) {
         method->ClearCounter();
       }
       FreeCode(code_ptr, method);
@@ -687,6 +680,19 @@
       }
     }
 
+    // Mark compiled code that are entrypoints of ArtMethods. Compiled code that is not
+    // an entry point is either:
+    // - an osr compiled code, that will be removed if not in a thread call stack.
+    // - discarded compiled code, that will be removed if not in a thread call stack.
+    for (const auto& it : method_code_map_) {
+      ArtMethod* method = it.second;
+      const void* code_ptr = it.first;
+      const OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
+      if (method_header->GetEntryPoint() == method->GetEntryPointFromQuickCompiledCode()) {
+        GetLiveBitmap()->AtomicTestAndSet(FromCodeToAllocation(code_ptr));
+      }
+    }
+
     // Empty osr method map, as osr compiled code will be deleted (except the ones
     // on thread stacks).
     osr_code_map_.clear();
@@ -695,9 +701,10 @@
   // Run a checkpoint on all threads to mark the JIT compiled code they are running.
   MarkCompiledCodeOnThreadStacks(self);
 
-  // Remove compiled code that is not the entrypoint of their method and not in the call
-  // stack.
-  RemoveUnusedAndUnmarkedCode(self);
+  // At this point, mutator threads are still running, and entrypoints of methods can
+  // change. We do know they cannot change to a code cache entry that is not marked,
+  // therefore we can safely remove those entries.
+  RemoveUnmarkedCode(self);
 
   if (collect_profiling_info) {
     MutexLock mu(self, lock_);
@@ -874,17 +881,27 @@
 
 bool JitCodeCache::NotifyCompilationOf(ArtMethod* method, Thread* self, bool osr) {
   if (!osr && ContainsPc(method->GetEntryPointFromQuickCompiledCode())) {
+    VLOG(jit) << PrettyMethod(method) << " is already compiled";
     return false;
   }
 
   MutexLock mu(self, lock_);
   if (osr && (osr_code_map_.find(method) != osr_code_map_.end())) {
+    VLOG(jit) << PrettyMethod(method) << " is already osr compiled";
     return false;
   }
+
   ProfilingInfo* info = method->GetProfilingInfo(sizeof(void*));
-  if (info == nullptr || info->IsMethodBeingCompiled()) {
+  if (info == nullptr) {
+    VLOG(jit) << PrettyMethod(method) << " needs a ProfilingInfo to be compiled";
     return false;
   }
+
+  if (info->IsMethodBeingCompiled()) {
+    VLOG(jit) << PrettyMethod(method) << " is already being compiled";
+    return false;
+  }
+
   info->SetIsMethodBeingCompiled(true);
   return true;
 }
@@ -924,6 +941,8 @@
       osr_code_map_.erase(it);
     }
   }
+  MutexLock mu(Thread::Current(), lock_);
+  number_of_deoptimizations_++;
 }
 
 uint8_t* JitCodeCache::AllocateCode(size_t code_size) {
@@ -953,5 +972,18 @@
   mspace_free(data_mspace_, data);
 }
 
+void JitCodeCache::Dump(std::ostream& os) {
+  MutexLock mu(Thread::Current(), lock_);
+  os << "Current JIT code cache size: " << PrettySize(used_memory_for_code_) << "\n"
+     << "Current JIT data cache size: " << PrettySize(used_memory_for_data_) << "\n"
+     << "Current JIT capacity: " << PrettySize(current_capacity_) << "\n"
+     << "Current number of JIT code cache entries: " << method_code_map_.size() << "\n"
+     << "Total number of JIT compilations: " << number_of_compilations_ << "\n"
+     << "Total number of JIT compilations for on stack replacement: "
+        << number_of_osr_compilations_ << "\n"
+     << "Total number of deoptimizations: " << number_of_deoptimizations_ << "\n"
+     << "Total number of JIT code cache collections: " << number_of_collections_ << std::endl;
+}
+
 }  // namespace jit
 }  // namespace art
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index aa1b139..0bd4f7d 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -67,14 +67,6 @@
   // Number of bytes allocated in the data cache.
   size_t DataCacheSize() REQUIRES(!lock_);
 
-  // Number of compiled code in the code cache. Note that this is not the number
-  // of methods that got JIT compiled, as we might have collected some.
-  size_t NumberOfCompiledCode() REQUIRES(!lock_);
-
-  // Number of compilations done throughout the lifetime of the JIT.
-  size_t NumberOfCompilations() REQUIRES(!lock_);
-  size_t NumberOfOsrCompilations() REQUIRES(!lock_);
-
   bool NotifyCompilationOf(ArtMethod* method, Thread* self, bool osr)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!lock_);
@@ -185,6 +177,8 @@
       REQUIRES(!lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  void Dump(std::ostream& os) REQUIRES(!lock_);
+
  private:
   // Take ownership of maps.
   JitCodeCache(MemMap* code_map,
@@ -244,7 +238,7 @@
       REQUIRES(!lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void RemoveUnusedAndUnmarkedCode(Thread* self)
+  void RemoveUnmarkedCode(Thread* self)
       REQUIRES(!lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -256,6 +250,11 @@
       REQUIRES(lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  void FreeCode(uint8_t* code) REQUIRES(lock_);
+  uint8_t* AllocateCode(size_t code_size) REQUIRES(lock_);
+  void FreeData(uint8_t* data) REQUIRES(lock_);
+  uint8_t* AllocateData(size_t data_size) REQUIRES(lock_);
+
   // Lock for guarding allocations, collections, and the method_code_map_.
   Mutex lock_;
   // Condition to wait on during collection.
@@ -307,19 +306,21 @@
   // The size in bytes of used memory for the code portion of the code cache.
   size_t used_memory_for_code_ GUARDED_BY(lock_);
 
-  void FreeCode(uint8_t* code) REQUIRES(lock_);
-  uint8_t* AllocateCode(size_t code_size) REQUIRES(lock_);
-  void FreeData(uint8_t* data) REQUIRES(lock_);
-  uint8_t* AllocateData(size_t data_size) REQUIRES(lock_);
-
   // Number of compilations done throughout the lifetime of the JIT.
   size_t number_of_compilations_ GUARDED_BY(lock_);
+
+  // Number of compilations for on-stack-replacement done throughout the lifetime of the JIT.
   size_t number_of_osr_compilations_ GUARDED_BY(lock_);
 
+  // Number of deoptimizations done throughout the lifetime of the JIT.
+  size_t number_of_deoptimizations_ GUARDED_BY(lock_);
+
+  // Number of code cache collections done throughout the lifetime of the JIT.
+  size_t number_of_collections_ GUARDED_BY(lock_);
+
   DISALLOW_IMPLICIT_CONSTRUCTORS(JitCodeCache);
 };
 
-
 }  // namespace jit
 }  // namespace art
 
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 19584ed..103a8b7 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -29,6 +29,7 @@
 #include "dex_cache.h"
 #include "dex_file.h"
 #include "gc/heap-inl.h"
+#include "jit/profiling_info.h"
 #include "iftable.h"
 #include "object_array-inl.h"
 #include "read_barrier-inl.h"
@@ -939,6 +940,12 @@
   }
   for (ArtMethod& method : GetMethods(pointer_size)) {
     method.VisitRoots(visitor, pointer_size);
+    if (method.GetDeclaringClassUnchecked() != nullptr && !method.IsNative()) {
+      ProfilingInfo* profiling_info = method.GetProfilingInfo(pointer_size);
+      if (profiling_info != nullptr) {
+        profiling_info->VisitRoots(visitor);
+      }
+    }
   }
 }
 
diff --git a/runtime/trace.cc b/runtime/trace.cc
index 99b2296..6b82641 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -389,9 +389,10 @@
   bool stop_alloc_counting = false;
   Runtime* const runtime = Runtime::Current();
   Trace* the_trace = nullptr;
+  Thread* const self = Thread::Current();
   pthread_t sampling_pthread = 0U;
   {
-    MutexLock mu(Thread::Current(), *Locks::trace_lock_);
+    MutexLock mu(self, *Locks::trace_lock_);
     if (the_trace_ == nullptr) {
       LOG(ERROR) << "Trace stop requested, but no trace currently running";
     } else {
@@ -409,6 +410,9 @@
   }
 
   {
+    gc::ScopedGCCriticalSection gcs(self,
+                                    gc::kGcCauseInstrumentation,
+                                    gc::kCollectorTypeInstrumentation);
     ScopedSuspendAll ssa(__FUNCTION__);
     if (the_trace != nullptr) {
       stop_alloc_counting = (the_trace->flags_ & Trace::kTraceCountAllocs) != 0;
@@ -417,7 +421,7 @@
       }
 
       if (the_trace->trace_mode_ == TraceMode::kSampling) {
-        MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
+        MutexLock mu(self, *Locks::thread_list_lock_);
         runtime->GetThreadList()->ForEach(ClearThreadStackTraceAndClockBase, nullptr);
       } else {
         runtime->GetInstrumentation()->DisableMethodTracing(kTracerInstrumentationKey);
diff --git a/test/011-array-copy/src/Main.java b/test/011-array-copy/src/Main.java
index 96e1dbf..d9b61e7 100644
--- a/test/011-array-copy/src/Main.java
+++ b/test/011-array-copy/src/Main.java
@@ -69,6 +69,11 @@
             array[i] = (long) i;
         }
     }
+    static void initCharArray(char[] array) {
+        for (int i = 0; i < ARRAY_SIZE; i++) {
+            array[i] = (char) i;
+        }
+    }
 
     /*
      * Perform an array copy operation on primitive arrays with different
@@ -79,16 +84,19 @@
         short[] shortArray = new short[ARRAY_SIZE];
         int[] intArray = new int[ARRAY_SIZE];
         long[] longArray = new long[ARRAY_SIZE];
+        char[] charArray = new char[ARRAY_SIZE];
 
         initByteArray(byteArray);
         initShortArray(shortArray);
         initIntArray(intArray);
         initLongArray(longArray);
+        initCharArray(charArray);
 
         System.arraycopy(byteArray, srcPos, byteArray, dstPos, length);
         System.arraycopy(shortArray, srcPos, shortArray, dstPos, length);
         System.arraycopy(intArray, srcPos, intArray, dstPos, length);
         System.arraycopy(longArray, srcPos, longArray, dstPos, length);
+        System.arraycopy(charArray, srcPos, charArray, dstPos, length);
 
         for (int i = 0; i < ARRAY_SIZE; i++) {
             if (intArray[i] != byteArray[i]) {
@@ -103,6 +111,10 @@
                 System.out.println("mismatch int vs long at " + i + " : " +
                     Arrays.toString(longArray));
                 break;
+            } else if (intArray[i] != charArray[i]) {
+                System.out.println("mismatch int vs char at " + i + " : " +
+                    Arrays.toString(charArray));
+                break;
             }
         }
 
diff --git a/test/082-inline-execute/src/Main.java b/test/082-inline-execute/src/Main.java
index 93a9005..9aaed9d 100644
--- a/test/082-inline-execute/src/Main.java
+++ b/test/082-inline-execute/src/Main.java
@@ -40,6 +40,10 @@
     test_Math_rint();
     test_Math_round_D();
     test_Math_round_F();
+    test_Math_isNaN_D();
+    test_Math_isNaN_F();
+    test_Math_isInfinite_D();
+    test_Math_isInfinite_F();
     test_Short_reverseBytes();
     test_Integer_reverseBytes();
     test_Long_reverseBytes();
@@ -836,6 +840,106 @@
     Assert.assertEquals(Math.round(Float.NEGATIVE_INFINITY), Integer.MIN_VALUE);
   }
 
+  public static void test_Math_isNaN_D() {
+    // Quiet NaN.
+    Assert.assertTrue(Double.isNaN(Double.longBitsToDouble(0x7FF4000000000000l)));
+    Assert.assertTrue(Double.isNaN(Double.longBitsToDouble(0xFFF4000000000000l)));
+    // Signaling NaN.
+    Assert.assertTrue(Double.isNaN(Double.longBitsToDouble(0x7FF8000000000000l)));
+    Assert.assertTrue(Double.isNaN(Double.longBitsToDouble(0xFFF8000000000000l)));
+    // Distinct from +/- infinity.
+    Assert.assertFalse(Double.isNaN(Double.longBitsToDouble(0x7FF0000000000000l)));
+    Assert.assertFalse(Double.isNaN(Double.longBitsToDouble(0xFFF0000000000000l)));
+    // Distinct from normal numbers.
+    Assert.assertFalse(Double.isNaN(Double.longBitsToDouble(0x7FE0000000000000l)));
+    Assert.assertFalse(Double.isNaN(Double.longBitsToDouble(0xFFE0000000000000l)));
+    Assert.assertFalse(Double.isNaN(Double.longBitsToDouble(0x0010000000000000l)));
+    Assert.assertFalse(Double.isNaN(Double.longBitsToDouble(0x8010000000000000l)));
+    // Distinct from +/- zero.
+    Assert.assertFalse(Double.isNaN(Double.longBitsToDouble(0x0000000000000000l)));
+    Assert.assertFalse(Double.isNaN(Double.longBitsToDouble(0x8000000000000000l)));
+    // Distinct from subnormal numbers.
+    Assert.assertFalse(Double.isNaN(Double.longBitsToDouble(0x0008000000000000l)));
+    Assert.assertFalse(Double.isNaN(Double.longBitsToDouble(0x8008000000000000l)));
+    Assert.assertFalse(Double.isNaN(Double.longBitsToDouble(0x0000000000000001l)));
+    Assert.assertFalse(Double.isNaN(Double.longBitsToDouble(0x8000000000000001l)));
+  }
+
+  public static void test_Math_isNaN_F() {
+    // Quiet NaN.
+    Assert.assertTrue(Float.isNaN(Float.intBitsToFloat(0x7FA00000)));
+    Assert.assertTrue(Float.isNaN(Float.intBitsToFloat(0xFFA00000)));
+    // Signaling NaN.
+    Assert.assertTrue(Float.isNaN(Float.intBitsToFloat(0x7FC00000)));
+    Assert.assertTrue(Float.isNaN(Float.intBitsToFloat(0xFFC00000)));
+    // Distinct from +/- infinity.
+    Assert.assertFalse(Float.isNaN(Float.intBitsToFloat(0x7F800000)));
+    Assert.assertFalse(Float.isNaN(Float.intBitsToFloat(0xFF800000)));
+    // Distinct from normal numbers.
+    Assert.assertFalse(Float.isNaN(Float.intBitsToFloat(0x7F000000)));
+    Assert.assertFalse(Float.isNaN(Float.intBitsToFloat(0xFF000000)));
+    Assert.assertFalse(Float.isNaN(Float.intBitsToFloat(0x00800000)));
+    Assert.assertFalse(Float.isNaN(Float.intBitsToFloat(0x80800000)));
+    // Distinct from +/- zero.
+    Assert.assertFalse(Float.isNaN(Float.intBitsToFloat(0x00000000)));
+    Assert.assertFalse(Float.isNaN(Float.intBitsToFloat(0x80000000)));
+    // Distinct from subnormal numbers.
+    Assert.assertFalse(Float.isNaN(Float.intBitsToFloat(0x00400000)));
+    Assert.assertFalse(Float.isNaN(Float.intBitsToFloat(0x80400000)));
+    Assert.assertFalse(Float.isNaN(Float.intBitsToFloat(0x00000001)));
+    Assert.assertFalse(Float.isNaN(Float.intBitsToFloat(0x80000001)));
+  }
+
+  public static void test_Math_isInfinite_D() {
+    // Distinct from Quiet NaN.
+    Assert.assertFalse(Double.isInfinite(Double.longBitsToDouble(0x7FF4000000000000l)));
+    Assert.assertFalse(Double.isInfinite(Double.longBitsToDouble(0xFFF4000000000000l)));
+    // Distinct from Signaling NaN.
+    Assert.assertFalse(Double.isInfinite(Double.longBitsToDouble(0x7FF8000000000000l)));
+    Assert.assertFalse(Double.isInfinite(Double.longBitsToDouble(0xFFF8000000000000l)));
+    // +/- infinity.
+    Assert.assertTrue(Double.isInfinite(Double.longBitsToDouble(0x7FF0000000000000l)));
+    Assert.assertTrue(Double.isInfinite(Double.longBitsToDouble(0xFFF0000000000000l)));
+    // Distinct from normal numbers.
+    Assert.assertFalse(Double.isInfinite(Double.longBitsToDouble(0x7FE0000000000000l)));
+    Assert.assertFalse(Double.isInfinite(Double.longBitsToDouble(0xFFE0000000000000l)));
+    Assert.assertFalse(Double.isInfinite(Double.longBitsToDouble(0x0010000000000000l)));
+    Assert.assertFalse(Double.isInfinite(Double.longBitsToDouble(0x8010000000000000l)));
+    // Distinct from +/- zero.
+    Assert.assertFalse(Double.isInfinite(Double.longBitsToDouble(0x0000000000000000l)));
+    Assert.assertFalse(Double.isInfinite(Double.longBitsToDouble(0x8000000000000000l)));
+    // Distinct from subnormal numbers.
+    Assert.assertFalse(Double.isInfinite(Double.longBitsToDouble(0x0008000000000000l)));
+    Assert.assertFalse(Double.isInfinite(Double.longBitsToDouble(0x8008000000000000l)));
+    Assert.assertFalse(Double.isInfinite(Double.longBitsToDouble(0x0000000000000001l)));
+    Assert.assertFalse(Double.isInfinite(Double.longBitsToDouble(0x8000000000000001l)));
+  }
+
+  public static void test_Math_isInfinite_F() {
+    // Distinct from Quiet NaN.
+    Assert.assertFalse(Float.isInfinite(Float.intBitsToFloat(0x7FA00000)));
+    Assert.assertFalse(Float.isInfinite(Float.intBitsToFloat(0xFFA00000)));
+    // Distinct from Signaling NaN.
+    Assert.assertFalse(Float.isInfinite(Float.intBitsToFloat(0x7FC00000)));
+    Assert.assertFalse(Float.isInfinite(Float.intBitsToFloat(0xFFC00000)));
+    // +/- infinity.
+    Assert.assertTrue(Float.isInfinite(Float.intBitsToFloat(0x7F800000)));
+    Assert.assertTrue(Float.isInfinite(Float.intBitsToFloat(0xFF800000)));
+    // Distinct from normal numbers.
+    Assert.assertFalse(Float.isInfinite(Float.intBitsToFloat(0x7F000000)));
+    Assert.assertFalse(Float.isInfinite(Float.intBitsToFloat(0xFF000000)));
+    Assert.assertFalse(Float.isInfinite(Float.intBitsToFloat(0x00800000)));
+    Assert.assertFalse(Float.isInfinite(Float.intBitsToFloat(0x80800000)));
+    // Distinct from +/- zero.
+    Assert.assertFalse(Float.isInfinite(Float.intBitsToFloat(0x00000000)));
+    Assert.assertFalse(Float.isInfinite(Float.intBitsToFloat(0x80000000)));
+    // Distinct from subnormal numbers.
+    Assert.assertFalse(Float.isInfinite(Float.intBitsToFloat(0x00400000)));
+    Assert.assertFalse(Float.isInfinite(Float.intBitsToFloat(0x80400000)));
+    Assert.assertFalse(Float.isInfinite(Float.intBitsToFloat(0x00000001)));
+    Assert.assertFalse(Float.isInfinite(Float.intBitsToFloat(0x80000001)));
+  }
+
   public static void test_StrictMath_abs_I() {
     StrictMath.abs(-1);
     Assert.assertEquals(StrictMath.abs(0), 0);
diff --git a/test/137-cfi/expected.txt b/test/137-cfi/expected.txt
index 6a5618e..8db7853 100644
--- a/test/137-cfi/expected.txt
+++ b/test/137-cfi/expected.txt
@@ -1 +1,2 @@
 JNI_OnLoad called
+JNI_OnLoad called
diff --git a/test/137-cfi/run b/test/137-cfi/run
index 8ec98c1..ebc729b 100755
--- a/test/137-cfi/run
+++ b/test/137-cfi/run
@@ -16,9 +16,10 @@
 
 # Test with full DWARF debugging information.
 # Check full signatures of methods.
-${RUN} "$@" -Xcompiler-option --generate-debug-info --args --full-signatures
+${RUN} "$@" -Xcompiler-option --generate-debug-info \
+  --args --full-signatures --args --test-local --args --test-remote
 
 # Test with minimal compressed debugging information.
 # Check only method names (parameters are omitted to save space).
-# Temporarily disable due to bug 27172087 (leak/race in libunwind).
-# ${RUN} "$@" -Xcompiler-option --generate-mini-debug-info
+# Check only remote unwinding since decompression is disabled in local unwinds (b/27391690).
+${RUN} "$@" -Xcompiler-option --generate-mini-debug-info --args --test-remote
diff --git a/test/137-cfi/src/Main.java b/test/137-cfi/src/Main.java
index d60a4eb..5cfe33d 100644
--- a/test/137-cfi/src/Main.java
+++ b/test/137-cfi/src/Main.java
@@ -21,43 +21,48 @@
 import java.util.Comparator;
 
 public class Main implements Comparator<Main> {
-  // Whether to test local unwinding. Libunwind uses linker info to find executables. As we do
-  // not dlopen at the moment, this doesn't work, so keep it off for now.
-  public final static boolean TEST_LOCAL_UNWINDING = true;
+  // Whether to test local unwinding.
+  private boolean testLocal;
 
-  // Unwinding another process, modelling debuggerd. This doesn't use the linker, so should work
-  // no matter whether we're using dlopen or not.
-  public final static boolean TEST_REMOTE_UNWINDING = true;
+  // Unwinding another process, modelling debuggerd.
+  private boolean testRemote;
 
+  // We fork ourself to create the secondary process for remote unwinding.
   private boolean secondary;
 
-  private boolean full_signatures;
+  // Expect the symbols to contain full method signatures including parameters.
+  private boolean fullSignatures;
 
   private boolean passed;
 
-  public Main(boolean secondary, boolean full_signatures) {
-      this.secondary = secondary;
-      this.full_signatures = full_signatures;
+  public Main(String[] args) throws Exception {
+      System.loadLibrary(args[0]);
+      for (String arg : args) {
+          if (arg.equals("--test-local")) {
+              testLocal = true;
+          }
+          if (arg.equals("--test-remote")) {
+              testRemote = true;
+          }
+          if (arg.equals("--secondary")) {
+              secondary = true;
+          }
+          if (arg.equals("--full-signatures")) {
+              fullSignatures = true;
+          }
+      }
+      if (!testLocal && !testRemote) {
+          System.out.println("No test selected.");
+      }
   }
 
   public static void main(String[] args) throws Exception {
-    System.loadLibrary(args[0]);
-      boolean secondary = false;
-      boolean full_signatures = false;
-      for (String arg : args) {
-        if (arg.equals("--secondary")) {
-          secondary = true;
-        }
-        if (arg.equals("--full-signatures")) {
-          full_signatures = true;
-        }
-      }
-      new Main(secondary, full_signatures).run();
+      new Main(args).run();
   }
 
   private void run() {
       if (secondary) {
-          if (!TEST_REMOTE_UNWINDING) {
+          if (!testRemote) {
               throw new RuntimeException("Should not be running secondary!");
           }
           runSecondary();
@@ -73,11 +78,11 @@
 
   private void runPrimary() {
       // First do the in-process unwinding.
-      if (TEST_LOCAL_UNWINDING && !foo()) {
+      if (testLocal && !foo()) {
           System.out.println("Unwinding self failed.");
       }
 
-      if (!TEST_REMOTE_UNWINDING) {
+      if (!testRemote) {
           // Skip the remote step.
           return;
       }
@@ -105,7 +110,7 @@
               throw new RuntimeException(e);
           }
 
-          if (!unwindOtherProcess(full_signatures, pid)) {
+          if (!unwindOtherProcess(fullSignatures, pid)) {
               System.out.println("Unwinding other process failed.");
           }
       } finally {
@@ -163,7 +168,7 @@
       if (b) {
           return sleep(2, b, 1.0);
       } else {
-          return unwindInProcess(full_signatures, 1, b);
+          return unwindInProcess(fullSignatures, 1, b);
       }
   }
 
@@ -171,6 +176,6 @@
 
   public native boolean sleep(int i, boolean b, double dummy);
 
-  public native boolean unwindInProcess(boolean full_signatures, int i, boolean b);
-  public native boolean unwindOtherProcess(boolean full_signatures, int pid);
+  public native boolean unwindInProcess(boolean fullSignatures, int i, boolean b);
+  public native boolean unwindOtherProcess(boolean fullSignatures, int pid);
 }
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index e05d4f5..c4f0171 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -541,7 +541,9 @@
 TEST_ART_BROKEN_OPTIMIZING_DEBUGGABLE_RUN_TESTS :=
 
 # Tests that should fail in the read barrier configuration with the interpreter.
-TEST_ART_BROKEN_INTERPRETER_READ_BARRIER_RUN_TESTS :=
+# 145: Test sometimes times out in read barrier configuration (b/27467554).
+TEST_ART_BROKEN_INTERPRETER_READ_BARRIER_RUN_TESTS := \
+  145-alloc-tracking-stress
 
 # Tests that should fail in the read barrier configuration with the default (Quick) compiler (AOT).
 # Quick has no support for read barriers and punts to the interpreter, so this list is composed of
diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt
index 46100ae..fab4599 100644
--- a/tools/libcore_failures.txt
+++ b/tools/libcore_failures.txt
@@ -270,5 +270,10 @@
   description: "Only work with --mode=activity",
   result: EXEC_FAILED,
   names: [ "libcore.java.io.FileTest#testJavaIoTmpdirMutable" ]
+},
+{
+  description: "Temporary suppressing while test is fixed",
+  result: EXEC_FAILED,
+  names: [ "org.apache.harmony.tests.java.util.ArrayDequeTest#test_forEachRemaining_iterator" ]
 }
 ]
diff --git a/tools/setup-buildbot-device.sh b/tools/setup-buildbot-device.sh
index 45b60dc..9e085b5 100755
--- a/tools/setup-buildbot-device.sh
+++ b/tools/setup-buildbot-device.sh
@@ -37,6 +37,14 @@
 echo -e "${green}Battery info${nc}"
 adb shell dumpsys battery
 
+echo -e "${green}Setting adb buffer size to 32MB${nc}"
+adb logcat -G 32M
+adb logcat -g
+
+echo -e "${green}Removing adb spam filter${nc}"
+adb logcat -P ""
+adb logcat -p
+
 echo -e "${green}Kill stalled dalvikvm processes${nc}"
 processes=$(adb shell "ps" | grep dalvikvm | awk '{print $2}')
 for i in $processes; do adb shell kill -9 $i; done