Merge "ARM64 read barrier support for concurrent GC in Optimizing."
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index e42a737..d67087e 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -1114,25 +1114,23 @@
 }
 
 bool CompilerDriver::CanAssumeTypeIsPresentInDexCache(const DexFile& dex_file, uint32_t type_idx) {
-  if (IsBootImage() &&
-      IsImageClass(dex_file.StringDataByIdx(dex_file.GetTypeId(type_idx).descriptor_idx_))) {
-    {
-      ScopedObjectAccess soa(Thread::Current());
-      mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(
-          soa.Self(), dex_file, false);
-      mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx);
-      if (resolved_class == nullptr) {
-        // Erroneous class.
-        stats_->TypeNotInDexCache();
-        return false;
-      }
-    }
+  bool result = false;
+  if ((IsBootImage() &&
+       IsImageClass(dex_file.StringDataByIdx(dex_file.GetTypeId(type_idx).descriptor_idx_))) ||
+      Runtime::Current()->UseJit()) {
+    ScopedObjectAccess soa(Thread::Current());
+    mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(
+        soa.Self(), dex_file, false);
+    mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx);
+    result = (resolved_class != nullptr);
+  }
+
+  if (result) {
     stats_->TypeInDexCache();
-    return true;
   } else {
     stats_->TypeNotInDexCache();
-    return false;
   }
+  return result;
 }
 
 bool CompilerDriver::CanAssumeStringIsPresentInDexCache(const DexFile& dex_file,
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index dae785b..d90d610 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -482,6 +482,10 @@
     return &compiled_method_storage_;
   }
 
+  // Can we assume that the klass is loaded?
+  bool CanAssumeClassIsLoaded(mirror::Class* klass)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
  private:
   // Return whether the declaring class of `resolved_member` is
   // available to `referrer_class` for read or write access using two
@@ -516,10 +520,6 @@
   bool CanReferrerAssumeClassIsInitialized(mirror::Class* referrer_class, mirror::Class* klass)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // Can we assume that the klass is loaded?
-  bool CanAssumeClassIsLoaded(mirror::Class* klass)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
   // These flags are internal to CompilerDriver for collecting INVOKE resolution statistics.
   // The only external contract is that unresolved method has flags 0 and resolved non-0.
   enum {
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 32968a5..d7754e8 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -905,14 +905,15 @@
   HLoadClass* load_class = new (arena_) HLoadClass(
       graph_->GetCurrentMethod(),
       type_index,
-      *dex_compilation_unit_->GetDexFile(),
+      outer_dex_file,
       IsOutermostCompilingClass(type_index),
       dex_pc,
-      /*needs_access_check*/ can_throw);
+      /*needs_access_check*/ can_throw,
+      compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, type_index));
 
   current_block_->AddInstruction(load_class);
   HInstruction* cls = load_class;
-  if (!IsInitialized(resolved_class, type_index)) {
+  if (!IsInitialized(resolved_class)) {
     cls = new (arena_) HClinitCheck(load_class, dex_pc);
     current_block_->AddInstruction(cls);
   }
@@ -929,17 +930,34 @@
   return true;
 }
 
-bool HGraphBuilder::IsInitialized(Handle<mirror::Class> cls, uint16_t type_index) const {
+static bool IsSubClass(mirror::Class* to_test, mirror::Class* super_class)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  return to_test != nullptr && !to_test->IsInterface() && to_test->IsSubClass(super_class);
+}
+
+bool HGraphBuilder::IsInitialized(Handle<mirror::Class> cls) const {
   if (cls.Get() == nullptr) {
     return false;
   }
-  if (GetOutermostCompilingClass() == cls.Get()) {
+
+  // `CanAssumeClassIsLoaded` will return true if we're JITting, or will
+  // check whether the class is in an image for the AOT compilation.
+  if (cls->IsInitialized() &&
+      compiler_driver_->CanAssumeClassIsLoaded(cls.Get())) {
     return true;
   }
-  // TODO: find out why this check is needed.
-  bool is_in_dex_cache = compiler_driver_->CanAssumeTypeIsPresentInDexCache(
-      *outer_compilation_unit_->GetDexFile(), type_index);
-  return cls->IsInitialized() && is_in_dex_cache;
+
+  if (IsSubClass(GetOutermostCompilingClass(), cls.Get())) {
+    return true;
+  }
+
+  // TODO: We should walk over the inlined methods, but we don't pass
+  //       that information to the builder.
+  if (IsSubClass(GetCompilingClass(), cls.Get())) {
+    return true;
+  }
+
+  return false;
 }
 
 HClinitCheck* HGraphBuilder::ProcessClinitCheckForInvoke(
@@ -962,6 +980,7 @@
   Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle(
       outer_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), outer_dex_file)));
   Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
+  Handle<mirror::Class> resolved_method_class(hs.NewHandle(resolved_method->GetDeclaringClass()));
 
   // The index at which the method's class is stored in the DexCache's type array.
   uint32_t storage_index = DexFile::kDexNoIndex;
@@ -979,36 +998,21 @@
 
   HClinitCheck* clinit_check = nullptr;
 
-  if (!outer_class->IsInterface()
-      && outer_class->IsSubClass(resolved_method->GetDeclaringClass())) {
-    // If the outer class is the declaring class or a subclass
-    // of the declaring class, no class initialization is needed
-    // before the static method call.
-    // Note that in case of inlining, we do not need to add clinit checks
-    // to calls that satisfy this subclass check with any inlined methods. This
-    // will be detected by the optimization passes.
+  if (IsInitialized(resolved_method_class)) {
     *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kNone;
   } else if (storage_index != DexFile::kDexNoIndex) {
-    // If the method's class type index is available, check
-    // whether we should add an explicit class initialization
-    // check for its declaring class before the static method call.
-
-    Handle<mirror::Class> cls(hs.NewHandle(resolved_method->GetDeclaringClass()));
-    if (IsInitialized(cls, storage_index)) {
-      *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kNone;
-    } else {
-      *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit;
-      HLoadClass* load_class = new (arena_) HLoadClass(
-          graph_->GetCurrentMethod(),
-          storage_index,
-          *dex_compilation_unit_->GetDexFile(),
-          is_outer_class,
-          dex_pc,
-          /*needs_access_check*/ false);
-      current_block_->AddInstruction(load_class);
-      clinit_check = new (arena_) HClinitCheck(load_class, dex_pc);
-      current_block_->AddInstruction(clinit_check);
-    }
+    *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit;
+    HLoadClass* load_class = new (arena_) HLoadClass(
+        graph_->GetCurrentMethod(),
+        storage_index,
+        outer_dex_file,
+        is_outer_class,
+        dex_pc,
+        /*needs_access_check*/ false,
+        compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, storage_index));
+    current_block_->AddInstruction(load_class);
+    clinit_check = new (arena_) HClinitCheck(load_class, dex_pc);
+    current_block_->AddInstruction(clinit_check);
   }
   return clinit_check;
 }
@@ -1379,18 +1383,21 @@
     }
   }
 
+  bool is_in_cache =
+      compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, storage_index);
   HLoadClass* constant = new (arena_) HLoadClass(graph_->GetCurrentMethod(),
                                                  storage_index,
-                                                 *dex_compilation_unit_->GetDexFile(),
+                                                 outer_dex_file,
                                                  is_outer_class,
                                                  dex_pc,
-                                                 /*needs_access_check*/ false);
+                                                 /*needs_access_check*/ false,
+                                                 is_in_cache);
   current_block_->AddInstruction(constant);
 
   HInstruction* cls = constant;
 
   Handle<mirror::Class> klass(hs.NewHandle(resolved_field->GetDeclaringClass()));
-  if (!IsInitialized(klass, storage_index)) {
+  if (!IsInitialized(klass)) {
     cls = new (arena_) HClinitCheck(constant, dex_pc);
     current_block_->AddInstruction(cls);
   }
@@ -1659,19 +1666,20 @@
 
   ScopedObjectAccess soa(Thread::Current());
   StackHandleScope<2> hs(soa.Self());
+  const DexFile& dex_file = *dex_compilation_unit_->GetDexFile();
   Handle<mirror::DexCache> dex_cache(hs.NewHandle(
-      dex_compilation_unit_->GetClassLinker()->FindDexCache(
-          soa.Self(), *dex_compilation_unit_->GetDexFile())));
+      dex_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), dex_file)));
   Handle<mirror::Class> resolved_class(hs.NewHandle(dex_cache->GetResolvedType(type_index)));
 
   HInstruction* object = LoadLocal(reference, Primitive::kPrimNot, dex_pc);
   HLoadClass* cls = new (arena_) HLoadClass(
       graph_->GetCurrentMethod(),
       type_index,
-      *dex_compilation_unit_->GetDexFile(),
+      dex_file,
       IsOutermostCompilingClass(type_index),
       dex_pc,
-      !can_access);
+      !can_access,
+      compiler_driver_->CanAssumeTypeIsPresentInDexCache(dex_file, type_index));
   current_block_->AddInstruction(cls);
 
   // The class needs a temporary before being used by the type check.
@@ -2797,10 +2805,11 @@
       current_block_->AddInstruction(new (arena_) HLoadClass(
           graph_->GetCurrentMethod(),
           type_index,
-          *dex_compilation_unit_->GetDexFile(),
+          *dex_file_,
           IsOutermostCompilingClass(type_index),
           dex_pc,
-          !can_access));
+          !can_access,
+          compiler_driver_->CanAssumeTypeIsPresentInDexCache(*dex_file_, type_index)));
       UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction(), dex_pc);
       break;
     }
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 615b0cd..5ada93f 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -311,9 +311,8 @@
   // Build a HNewInstance instruction.
   bool BuildNewInstance(uint16_t type_index, uint32_t dex_pc);
 
-  // Return whether the compiler can assume `cls` is initialized. `type_index` is the index
-  // of the class in the outer dex file.
-  bool IsInitialized(Handle<mirror::Class> cls, uint16_t type_index) const
+  // Return whether the compiler can assume `cls` is initialized.
+  bool IsInitialized(Handle<mirror::Class> cls) const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   ArenaAllocator* const arena_;
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 461319e..a98d9c6 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -77,6 +77,7 @@
     }
     arm_codegen->InvokeRuntime(
         QUICK_ENTRY_POINT(pThrowNullPointer), instruction_, instruction_->GetDexPc(), this);
+    CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
   }
 
   bool IsFatal() const OVERRIDE { return true; }
@@ -101,6 +102,7 @@
     }
     arm_codegen->InvokeRuntime(
         QUICK_ENTRY_POINT(pThrowDivZero), instruction_, instruction_->GetDexPc(), this);
+    CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
   }
 
   bool IsFatal() const OVERRIDE { return true; }
@@ -123,6 +125,7 @@
     SaveLiveRegisters(codegen, instruction_->GetLocations());
     arm_codegen->InvokeRuntime(
         QUICK_ENTRY_POINT(pTestSuspend), instruction_, instruction_->GetDexPc(), this);
+    CheckEntrypointTypes<kQuickTestSuspend, void, void>();
     RestoreLiveRegisters(codegen, instruction_->GetLocations());
     if (successor_ == nullptr) {
       __ b(GetReturnLabel());
@@ -179,6 +182,7 @@
         Primitive::kPrimInt);
     arm_codegen->InvokeRuntime(
         QUICK_ENTRY_POINT(pThrowArrayBounds), instruction_, instruction_->GetDexPc(), this);
+    CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
   }
 
   bool IsFatal() const OVERRIDE { return true; }
@@ -214,6 +218,11 @@
         ? QUICK_ENTRY_POINT(pInitializeStaticStorage)
         : QUICK_ENTRY_POINT(pInitializeType);
     arm_codegen->InvokeRuntime(entry_point_offset, at_, dex_pc_, this);
+    if (do_clinit_) {
+      CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
+    } else {
+      CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
+    }
 
     // Move the class to the desired location.
     Location out = locations->Out();
@@ -260,6 +269,7 @@
     __ LoadImmediate(calling_convention.GetRegisterAt(0), instruction_->GetStringIndex());
     arm_codegen->InvokeRuntime(
         QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc(), this);
+    CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
     arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0));
 
     RestoreLiveRegisters(codegen, locations);
@@ -351,6 +361,7 @@
     uint32_t dex_pc = deoptimize->GetDexPc();
     CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
     arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this);
+    CheckEntrypointTypes<kQuickDeoptimize, void, void>();
   }
 
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM"; }
@@ -393,6 +404,7 @@
                                instruction_,
                                instruction_->GetDexPc(),
                                this);
+    CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
     RestoreLiveRegisters(codegen, locations);
     __ b(GetExitLabel());
   }
@@ -2410,6 +2422,7 @@
                                   conversion,
                                   conversion->GetDexPc(),
                                   nullptr);
+          CheckEntrypointTypes<kQuickF2l, int64_t, float>();
           break;
 
         case Primitive::kPrimDouble:
@@ -2418,6 +2431,7 @@
                                   conversion,
                                   conversion->GetDexPc(),
                                   nullptr);
+          CheckEntrypointTypes<kQuickD2l, int64_t, double>();
           break;
 
         default:
@@ -2463,6 +2477,7 @@
                                   conversion,
                                   conversion->GetDexPc(),
                                   nullptr);
+          CheckEntrypointTypes<kQuickL2f, float, int64_t>();
           break;
 
         case Primitive::kPrimDouble:
@@ -2985,6 +3000,7 @@
         DCHECK_EQ(R0, out.AsRegister<Register>());
 
         codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pIdivmod), div, div->GetDexPc(), nullptr);
+        CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>();
       }
       break;
     }
@@ -2999,6 +3015,7 @@
       DCHECK_EQ(R1, out.AsRegisterPairHigh<Register>());
 
       codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLdiv), div, div->GetDexPc(), nullptr);
+      CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>();
       break;
     }
 
@@ -3127,22 +3144,26 @@
         DCHECK_EQ(R1, out.AsRegister<Register>());
 
         codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pIdivmod), rem, rem->GetDexPc(), nullptr);
+        CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>();
       }
       break;
     }
 
     case Primitive::kPrimLong: {
       codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLmod), rem, rem->GetDexPc(), nullptr);
+        CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>();
       break;
     }
 
     case Primitive::kPrimFloat: {
       codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pFmodf), rem, rem->GetDexPc(), nullptr);
+      CheckEntrypointTypes<kQuickFmodf, float, float, float>();
       break;
     }
 
     case Primitive::kPrimDouble: {
       codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pFmod), rem, rem->GetDexPc(), nullptr);
+      CheckEntrypointTypes<kQuickFmod, double, double, double>();
       break;
     }
 
@@ -3437,6 +3458,7 @@
                           instruction,
                           instruction->GetDexPc(),
                           nullptr);
+  CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
 }
 
 void LocationsBuilderARM::VisitNewArray(HNewArray* instruction) {
@@ -3458,6 +3480,7 @@
                           instruction,
                           instruction->GetDexPc(),
                           nullptr);
+  CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>();
 }
 
 void LocationsBuilderARM::VisitParameterValue(HParameterValue* instruction) {
@@ -4947,6 +4970,7 @@
                             cls,
                             cls->GetDexPc(),
                             nullptr);
+    CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
     return;
   }
 
@@ -4968,7 +4992,6 @@
       __ LoadFromOffset(kLoadWord, out, current_method, declaring_class_offset);
     }
   } else {
-    DCHECK(cls->CanCallRuntime());
     // /* GcRoot<mirror::Class>[] */ out =
     //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
     __ LoadFromOffset(kLoadWord,
@@ -4987,14 +5010,19 @@
       __ LoadFromOffset(kLoadWord, out, out, cache_offset);
     }
 
-    SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM(
-        cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
-    codegen_->AddSlowPath(slow_path);
-    __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
-    if (cls->MustGenerateClinitCheck()) {
-      GenerateClassInitializationCheck(slow_path, out);
-    } else {
-      __ Bind(slow_path->GetExitLabel());
+    if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
+      DCHECK(cls->CanCallRuntime());
+      SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM(
+          cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
+      codegen_->AddSlowPath(slow_path);
+      if (!cls->IsInDexCache()) {
+        __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
+      }
+      if (cls->MustGenerateClinitCheck()) {
+        GenerateClassInitializationCheck(slow_path, out);
+      } else {
+        __ Bind(slow_path->GetExitLabel());
+      }
     }
   }
 }
@@ -5107,6 +5135,7 @@
 void InstructionCodeGeneratorARM::VisitThrow(HThrow* instruction) {
   codegen_->InvokeRuntime(
       QUICK_ENTRY_POINT(pDeliverException), instruction, instruction->GetDexPc(), nullptr);
+  CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
 }
 
 void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) {
@@ -5547,6 +5576,11 @@
       instruction,
       instruction->GetDexPc(),
       nullptr);
+  if (instruction->IsEnter()) {
+    CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
+  } else {
+    CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
+  }
 }
 
 void LocationsBuilderARM::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction, AND); }
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 397b3be..db2defb 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -448,11 +448,11 @@
     if (instruction_->IsInstanceOf()) {
       arm64_codegen->InvokeRuntime(
           QUICK_ENTRY_POINT(pInstanceofNonTrivial), instruction_, dex_pc, this);
+      CheckEntrypointTypes<kQuickInstanceofNonTrivial, uint32_t,
+                           const mirror::Class*, const mirror::Class*>();
       Primitive::Type ret_type = instruction_->GetType();
       Location ret_loc = calling_convention.GetReturnLocation(ret_type);
       arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
-      CheckEntrypointTypes<kQuickInstanceofNonTrivial, uint32_t,
-                           const mirror::Class*, const mirror::Class*>();
     } else {
       DCHECK(instruction_->IsCheckCast());
       arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc, this);
@@ -488,6 +488,7 @@
     uint32_t dex_pc = deoptimize->GetDexPc();
     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
     arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this);
+    CheckEntrypointTypes<kQuickDeoptimize, void, void>();
   }
 
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM64"; }
@@ -3685,6 +3686,7 @@
                             cls,
                             cls->GetDexPc(),
                             nullptr);
+    CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
     return;
   }
 
@@ -3705,7 +3707,6 @@
       __ Ldr(out, MemOperand(current_method, declaring_class_offset));
     }
   } else {
-    DCHECK(cls->CanCallRuntime());
     MemberOffset resolved_types_offset = ArtMethod::DexCacheResolvedTypesOffset(kArm64PointerSize);
     // /* GcRoot<mirror::Class>[] */ out =
     //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
@@ -3722,14 +3723,19 @@
       __ Ldr(out, MemOperand(out.X(), cache_offset));
     }
 
-    SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64(
-        cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
-    codegen_->AddSlowPath(slow_path);
-    __ Cbz(out, slow_path->GetEntryLabel());
-    if (cls->MustGenerateClinitCheck()) {
-      GenerateClassInitializationCheck(slow_path, out);
-    } else {
-      __ Bind(slow_path->GetExitLabel());
+    if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
+      DCHECK(cls->CanCallRuntime());
+      SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64(
+          cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
+      codegen_->AddSlowPath(slow_path);
+      if (!cls->IsInDexCache()) {
+        __ Cbz(out, slow_path->GetEntryLabel());
+      }
+      if (cls->MustGenerateClinitCheck()) {
+        GenerateClassInitializationCheck(slow_path, out);
+      } else {
+        __ Bind(slow_path->GetExitLabel());
+      }
     }
   }
 }
@@ -3838,7 +3844,11 @@
       instruction,
       instruction->GetDexPc(),
       nullptr);
-  CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
+  if (instruction->IsEnter()) {
+    CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
+  } else {
+    CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
+  }
 }
 
 void LocationsBuilderARM64::VisitMul(HMul* mul) {
@@ -3927,8 +3937,6 @@
   locations->SetOut(LocationFrom(x0));
   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1)));
   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(2)));
-  CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck,
-                       void*, uint32_t, int32_t, ArtMethod*>();
 }
 
 void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) {
@@ -3953,7 +3961,6 @@
   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
   locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
-  CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
 }
 
 void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) {
@@ -4133,6 +4140,11 @@
       int32_t entry_offset = (type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pFmodf)
                                                              : QUICK_ENTRY_POINT(pFmod);
       codegen_->InvokeRuntime(entry_offset, rem, rem->GetDexPc(), nullptr);
+      if (type == Primitive::kPrimFloat) {
+        CheckEntrypointTypes<kQuickFmodf, float, float, float>();
+      } else {
+        CheckEntrypointTypes<kQuickFmod, double, double, double>();
+      }
       break;
     }
 
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index f3178bd..9dc9167 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -415,13 +415,11 @@
                                   dex_pc,
                                   this,
                                   IsDirectEntrypoint(kQuickInstanceofNonTrivial));
+      CheckEntrypointTypes<
+          kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>();
       Primitive::Type ret_type = instruction_->GetType();
       Location ret_loc = calling_convention.GetReturnLocation(ret_type);
       mips_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
-      CheckEntrypointTypes<kQuickInstanceofNonTrivial,
-                           uint32_t,
-                           const mirror::Class*,
-                           const mirror::Class*>();
     } else {
       DCHECK(instruction_->IsCheckCast());
       mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
@@ -461,6 +459,7 @@
                                 dex_pc,
                                 this,
                                 IsDirectEntrypoint(kQuickDeoptimize));
+    CheckEntrypointTypes<kQuickDeoptimize, void, void>();
   }
 
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS"; }
@@ -2638,6 +2637,7 @@
   Register obj = locations->InAt(0).AsRegister<Register>();
   LoadOperandType load_type = kLoadUnsignedByte;
   bool is_volatile = field_info.IsVolatile();
+  uint32_t offset = field_info.GetFieldOffset().Uint32Value();
 
   switch (type) {
     case Primitive::kPrimBoolean:
@@ -2668,8 +2668,7 @@
 
   if (is_volatile && load_type == kLoadDoubleword) {
     InvokeRuntimeCallingConvention calling_convention;
-    __ Addiu32(locations->GetTemp(0).AsRegister<Register>(),
-               obj, field_info.GetFieldOffset().Uint32Value());
+    __ Addiu32(locations->GetTemp(0).AsRegister<Register>(), obj, offset);
     // Do implicit Null check
     __ Lw(ZERO, locations->GetTemp(0).AsRegister<Register>(), 0);
     codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
@@ -2692,21 +2691,34 @@
       if (type == Primitive::kPrimLong) {
         DCHECK(locations->Out().IsRegisterPair());
         dst = locations->Out().AsRegisterPairLow<Register>();
+        Register dst_high = locations->Out().AsRegisterPairHigh<Register>();
+        if (obj == dst) {
+          __ LoadFromOffset(kLoadWord, dst_high, obj, offset + kMipsWordSize);
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          __ LoadFromOffset(kLoadWord, dst, obj, offset);
+        } else {
+          __ LoadFromOffset(kLoadWord, dst, obj, offset);
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          __ LoadFromOffset(kLoadWord, dst_high, obj, offset + kMipsWordSize);
+        }
       } else {
         DCHECK(locations->Out().IsRegister());
         dst = locations->Out().AsRegister<Register>();
+        __ LoadFromOffset(load_type, dst, obj, offset);
       }
-      __ LoadFromOffset(load_type, dst, obj, field_info.GetFieldOffset().Uint32Value());
     } else {
       DCHECK(locations->Out().IsFpuRegister());
       FRegister dst = locations->Out().AsFpuRegister<FRegister>();
       if (type == Primitive::kPrimFloat) {
-        __ LoadSFromOffset(dst, obj, field_info.GetFieldOffset().Uint32Value());
+        __ LoadSFromOffset(dst, obj, offset);
       } else {
-        __ LoadDFromOffset(dst, obj, field_info.GetFieldOffset().Uint32Value());
+        __ LoadDFromOffset(dst, obj, offset);
       }
     }
-    codegen_->MaybeRecordImplicitNullCheck(instruction);
+    // Longs are handled earlier.
+    if (type != Primitive::kPrimLong) {
+      codegen_->MaybeRecordImplicitNullCheck(instruction);
+    }
   }
 
   if (is_volatile) {
@@ -2752,6 +2764,7 @@
   Register obj = locations->InAt(0).AsRegister<Register>();
   StoreOperandType store_type = kStoreByte;
   bool is_volatile = field_info.IsVolatile();
+  uint32_t offset = field_info.GetFieldOffset().Uint32Value();
 
   switch (type) {
     case Primitive::kPrimBoolean:
@@ -2782,8 +2795,7 @@
 
   if (is_volatile && store_type == kStoreDoubleword) {
     InvokeRuntimeCallingConvention calling_convention;
-    __ Addiu32(locations->GetTemp(0).AsRegister<Register>(),
-               obj, field_info.GetFieldOffset().Uint32Value());
+    __ Addiu32(locations->GetTemp(0).AsRegister<Register>(), obj, offset);
     // Do implicit Null check.
     __ Lw(ZERO, locations->GetTemp(0).AsRegister<Register>(), 0);
     codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
@@ -2806,21 +2818,28 @@
       if (type == Primitive::kPrimLong) {
         DCHECK(locations->InAt(1).IsRegisterPair());
         src = locations->InAt(1).AsRegisterPairLow<Register>();
+        Register src_high = locations->InAt(1).AsRegisterPairHigh<Register>();
+        __ StoreToOffset(kStoreWord, src, obj, offset);
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        __ StoreToOffset(kStoreWord, src_high, obj, offset + kMipsWordSize);
       } else {
         DCHECK(locations->InAt(1).IsRegister());
         src = locations->InAt(1).AsRegister<Register>();
+        __ StoreToOffset(store_type, src, obj, offset);
       }
-      __ StoreToOffset(store_type, src, obj, field_info.GetFieldOffset().Uint32Value());
     } else {
       DCHECK(locations->InAt(1).IsFpuRegister());
       FRegister src = locations->InAt(1).AsFpuRegister<FRegister>();
       if (type == Primitive::kPrimFloat) {
-        __ StoreSToOffset(src, obj, field_info.GetFieldOffset().Uint32Value());
+        __ StoreSToOffset(src, obj, offset);
       } else {
-        __ StoreDToOffset(src, obj, field_info.GetFieldOffset().Uint32Value());
+        __ StoreDToOffset(src, obj, offset);
       }
     }
-    codegen_->MaybeRecordImplicitNullCheck(instruction);
+    // Longs are handled earlier.
+    if (type != Primitive::kPrimLong) {
+      codegen_->MaybeRecordImplicitNullCheck(instruction);
+    }
   }
 
   // TODO: memory barriers?
@@ -3170,6 +3189,7 @@
                             cls->GetDexPc(),
                             nullptr,
                             IsDirectEntrypoint(kQuickInitializeTypeAndVerifyAccess));
+    CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
     return;
   }
 
@@ -3181,21 +3201,26 @@
     __ LoadFromOffset(kLoadWord, out, current_method,
                       ArtMethod::DeclaringClassOffset().Int32Value());
   } else {
-    DCHECK(cls->CanCallRuntime());
     __ LoadFromOffset(kLoadWord, out, current_method,
                       ArtMethod::DexCacheResolvedTypesOffset(kMipsPointerSize).Int32Value());
     __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
-    SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS(
-        cls,
-        cls,
-        cls->GetDexPc(),
-        cls->MustGenerateClinitCheck());
-    codegen_->AddSlowPath(slow_path);
-    __ Beqz(out, slow_path->GetEntryLabel());
-    if (cls->MustGenerateClinitCheck()) {
-      GenerateClassInitializationCheck(slow_path, out);
-    } else {
-      __ Bind(slow_path->GetExitLabel());
+
+    if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
+      DCHECK(cls->CanCallRuntime());
+      SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS(
+          cls,
+          cls,
+          cls->GetDexPc(),
+          cls->MustGenerateClinitCheck());
+      codegen_->AddSlowPath(slow_path);
+      if (!cls->IsInDexCache()) {
+        __ Beqz(out, slow_path->GetEntryLabel());
+      }
+      if (cls->MustGenerateClinitCheck()) {
+        GenerateClassInitializationCheck(slow_path, out);
+      } else {
+        __ Bind(slow_path->GetExitLabel());
+      }
     }
   }
 }
@@ -3700,7 +3725,7 @@
                               instruction, instruction->GetDexPc(),
                               nullptr,
                               IsDirectEntrypoint(kQuickFmodf));
-      CheckEntrypointTypes<kQuickL2f, float, int64_t>();
+      CheckEntrypointTypes<kQuickFmodf, float, float, float>();
       break;
     }
     case Primitive::kPrimDouble: {
@@ -3708,7 +3733,7 @@
                               instruction, instruction->GetDexPc(),
                               nullptr,
                               IsDirectEntrypoint(kQuickFmod));
-      CheckEntrypointTypes<kQuickL2d, double, int64_t>();
+      CheckEntrypointTypes<kQuickFmod, double, double, double>();
       break;
     }
     default:
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 802c435..934f24b 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -366,13 +366,11 @@
                                     instruction_,
                                     dex_pc,
                                     this);
+      CheckEntrypointTypes<
+          kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>();
       Primitive::Type ret_type = instruction_->GetType();
       Location ret_loc = calling_convention.GetReturnLocation(ret_type);
       mips64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
-      CheckEntrypointTypes<kQuickInstanceofNonTrivial,
-                           uint32_t,
-                           const mirror::Class*,
-                           const mirror::Class*>();
     } else {
       DCHECK(instruction_->IsCheckCast());
       mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc, this);
@@ -404,6 +402,7 @@
     uint32_t dex_pc = deoptimize->GetDexPc();
     CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
     mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this);
+    CheckEntrypointTypes<kQuickDeoptimize, void, void>();
   }
 
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS64"; }
@@ -1611,6 +1610,7 @@
                                 instruction,
                                 instruction->GetDexPc(),
                                 nullptr);
+        CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
       }
       break;
     }
@@ -1819,6 +1819,19 @@
                                                      : QUICK_ENTRY_POINT(pCmplDouble);
       }
       codegen_->InvokeRuntime(entry_point_offset, instruction, instruction->GetDexPc(), nullptr);
+      if (in_type == Primitive::kPrimFloat) {
+        if (instruction->IsGtBias()) {
+          CheckEntrypointTypes<kQuickCmpgFloat, int32_t, float, float>();
+        } else {
+          CheckEntrypointTypes<kQuickCmplFloat, int32_t, float, float>();
+        }
+      } else {
+        if (instruction->IsGtBias()) {
+          CheckEntrypointTypes<kQuickCmpgDouble, int32_t, double, double>();
+        } else {
+          CheckEntrypointTypes<kQuickCmplDouble, int32_t, double, double>();
+        }
+      }
       break;
     }
 
@@ -3020,6 +3033,7 @@
                             cls,
                             cls->GetDexPc(),
                             nullptr);
+    CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
     return;
   }
 
@@ -3031,22 +3045,26 @@
     __ LoadFromOffset(kLoadUnsignedWord, out, current_method,
                       ArtMethod::DeclaringClassOffset().Int32Value());
   } else {
-    DCHECK(cls->CanCallRuntime());
     __ LoadFromOffset(kLoadDoubleword, out, current_method,
                       ArtMethod::DexCacheResolvedTypesOffset(kMips64PointerSize).Int32Value());
     __ LoadFromOffset(kLoadUnsignedWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
     // TODO: We will need a read barrier here.
-    SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS64(
-        cls,
-        cls,
-        cls->GetDexPc(),
-        cls->MustGenerateClinitCheck());
-    codegen_->AddSlowPath(slow_path);
-    __ Beqzc(out, slow_path->GetEntryLabel());
-    if (cls->MustGenerateClinitCheck()) {
-      GenerateClassInitializationCheck(slow_path, out);
-    } else {
-      __ Bind(slow_path->GetExitLabel());
+    if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
+      DCHECK(cls->CanCallRuntime());
+      SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS64(
+          cls,
+          cls,
+          cls->GetDexPc(),
+          cls->MustGenerateClinitCheck());
+      codegen_->AddSlowPath(slow_path);
+      if (!cls->IsInDexCache()) {
+        __ Beqzc(out, slow_path->GetEntryLabel());
+      }
+      if (cls->MustGenerateClinitCheck()) {
+        GenerateClassInitializationCheck(slow_path, out);
+      } else {
+        __ Bind(slow_path->GetExitLabel());
+      }
     }
   }
 }
@@ -3136,7 +3154,11 @@
                           instruction,
                           instruction->GetDexPc(),
                           nullptr);
-  CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
+  if (instruction->IsEnter()) {
+    CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
+  } else {
+    CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
+  }
 }
 
 void LocationsBuilderMIPS64::VisitMul(HMul* mul) {
@@ -3455,6 +3477,11 @@
       int32_t entry_offset = (type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pFmodf)
                                                              : QUICK_ENTRY_POINT(pFmod);
       codegen_->InvokeRuntime(entry_offset, instruction, instruction->GetDexPc(), nullptr);
+      if (type == Primitive::kPrimFloat) {
+        CheckEntrypointTypes<kQuickFmodf, float, float, float>();
+      } else {
+        CheckEntrypointTypes<kQuickFmod, double, double, double>();
+      }
       break;
     }
     default:
@@ -3764,6 +3791,11 @@
                               conversion,
                               conversion->GetDexPc(),
                               nullptr);
+      if (result_type == Primitive::kPrimFloat) {
+        CheckEntrypointTypes<kQuickL2f, float, int64_t>();
+      } else {
+        CheckEntrypointTypes<kQuickL2d, double, int64_t>();
+      }
     }
   } else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) {
     CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong);
@@ -3779,6 +3811,19 @@
                             conversion,
                             conversion->GetDexPc(),
                             nullptr);
+    if (result_type != Primitive::kPrimLong) {
+      if (input_type == Primitive::kPrimFloat) {
+        CheckEntrypointTypes<kQuickF2iz, int32_t, float>();
+      } else {
+        CheckEntrypointTypes<kQuickD2iz, int32_t, double>();
+      }
+    } else {
+      if (input_type == Primitive::kPrimFloat) {
+        CheckEntrypointTypes<kQuickF2l, int64_t, float>();
+      } else {
+        CheckEntrypointTypes<kQuickD2l, int64_t, double>();
+      }
+    }
   } else if (Primitive::IsFloatingPointType(result_type) &&
              Primitive::IsFloatingPointType(input_type)) {
     FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 6a9177d..1fc09a8 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -67,6 +67,7 @@
                                instruction_,
                                instruction_->GetDexPc(),
                                this);
+    CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
   }
 
   bool IsFatal() const OVERRIDE { return true; }
@@ -93,6 +94,7 @@
                                instruction_,
                                instruction_->GetDexPc(),
                                this);
+    CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
   }
 
   bool IsFatal() const OVERRIDE { return true; }
@@ -152,6 +154,7 @@
                                instruction_,
                                instruction_->GetDexPc(),
                                this);
+    CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
   }
 
   bool IsFatal() const OVERRIDE { return true; }
@@ -177,6 +180,7 @@
                                instruction_,
                                instruction_->GetDexPc(),
                                this);
+    CheckEntrypointTypes<kQuickTestSuspend, void, void>();
     RestoreLiveRegisters(codegen, instruction_->GetLocations());
     if (successor_ == nullptr) {
       __ jmp(GetReturnLabel());
@@ -222,6 +226,7 @@
                                instruction_,
                                instruction_->GetDexPc(),
                                this);
+    CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
     x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
     RestoreLiveRegisters(codegen, locations);
 
@@ -257,6 +262,11 @@
     x86_codegen->InvokeRuntime(do_clinit_ ? QUICK_ENTRY_POINT(pInitializeStaticStorage)
                                           : QUICK_ENTRY_POINT(pInitializeType),
                                at_, dex_pc_, this);
+    if (do_clinit_) {
+      CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
+    } else {
+      CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
+    }
 
     // Move the class to the desired location.
     Location out = locations->Out();
@@ -368,6 +378,7 @@
                                instruction_,
                                instruction_->GetDexPc(),
                                this);
+    CheckEntrypointTypes<kQuickDeoptimize, void, void>();
   }
 
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86"; }
@@ -410,6 +421,7 @@
                                instruction_,
                                instruction_->GetDexPc(),
                                this);
+    CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
     RestoreLiveRegisters(codegen, locations);
     __ jmp(GetExitLabel());
   }
@@ -2460,6 +2472,7 @@
                                   conversion,
                                   conversion->GetDexPc(),
                                   nullptr);
+          CheckEntrypointTypes<kQuickF2l, int64_t, float>();
           break;
 
         case Primitive::kPrimDouble:
@@ -2468,6 +2481,7 @@
                                   conversion,
                                   conversion->GetDexPc(),
                                   nullptr);
+          CheckEntrypointTypes<kQuickD2l, int64_t, double>();
           break;
 
         default:
@@ -3298,11 +3312,13 @@
                                 instruction,
                                 instruction->GetDexPc(),
                                 nullptr);
+        CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>();
       } else {
         codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLmod),
                                 instruction,
                                 instruction->GetDexPc(),
                                 nullptr);
+        CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>();
       }
       break;
     }
@@ -3780,6 +3796,7 @@
                           instruction,
                           instruction->GetDexPc(),
                           nullptr);
+  CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
   DCHECK(!codegen_->IsLeafMethod());
 }
 
@@ -3796,13 +3813,13 @@
 void InstructionCodeGeneratorX86::VisitNewArray(HNewArray* instruction) {
   InvokeRuntimeCallingConvention calling_convention;
   __ movl(calling_convention.GetRegisterAt(0), Immediate(instruction->GetTypeIndex()));
-
   // Note: if heap poisoning is enabled, the entry point takes cares
   // of poisoning the reference.
   codegen_->InvokeRuntime(instruction->GetEntrypoint(),
                           instruction,
                           instruction->GetDexPc(),
                           nullptr);
+  CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>();
   DCHECK(!codegen_->IsLeafMethod());
 }
 
@@ -5501,6 +5518,7 @@
                             cls,
                             cls->GetDexPc(),
                             nullptr);
+    CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
     return;
   }
 
@@ -5522,7 +5540,6 @@
       __ movl(out, Address(current_method, declaring_class_offset));
     }
   } else {
-    DCHECK(cls->CanCallRuntime());
     // /* GcRoot<mirror::Class>[] */ out =
     //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
     __ movl(out, Address(current_method,
@@ -5539,15 +5556,22 @@
       __ movl(out, Address(out, cache_offset));
     }
 
-    SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86(
-        cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
-    codegen_->AddSlowPath(slow_path);
-    __ testl(out, out);
-    __ j(kEqual, slow_path->GetEntryLabel());
-    if (cls->MustGenerateClinitCheck()) {
-      GenerateClassInitializationCheck(slow_path, out);
-    } else {
-      __ Bind(slow_path->GetExitLabel());
+    if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
+      DCHECK(cls->CanCallRuntime());
+      SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86(
+          cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
+      codegen_->AddSlowPath(slow_path);
+
+      if (!cls->IsInDexCache()) {
+        __ testl(out, out);
+        __ j(kEqual, slow_path->GetEntryLabel());
+      }
+
+      if (cls->MustGenerateClinitCheck()) {
+        GenerateClassInitializationCheck(slow_path, out);
+      } else {
+        __ Bind(slow_path->GetExitLabel());
+      }
     }
   }
 }
@@ -5659,6 +5683,7 @@
                           instruction,
                           instruction->GetDexPc(),
                           nullptr);
+  CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
 }
 
 void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
@@ -6148,6 +6173,11 @@
                           instruction,
                           instruction->GetDexPc(),
                           nullptr);
+  if (instruction->IsEnter()) {
+    CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
+  } else {
+    CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
+  }
 }
 
 void LocationsBuilderX86::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 8cfd8cb..534ee1c 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -65,6 +65,7 @@
                                   instruction_,
                                   instruction_->GetDexPc(),
                                   this);
+    CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
   }
 
   bool IsFatal() const OVERRIDE { return true; }
@@ -91,6 +92,7 @@
                                   instruction_,
                                   instruction_->GetDexPc(),
                                   this);
+    CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
   }
 
   bool IsFatal() const OVERRIDE { return true; }
@@ -149,6 +151,7 @@
                                   instruction_,
                                   instruction_->GetDexPc(),
                                   this);
+    CheckEntrypointTypes<kQuickTestSuspend, void, void>();
     RestoreLiveRegisters(codegen, instruction_->GetLocations());
     if (successor_ == nullptr) {
       __ jmp(GetReturnLabel());
@@ -203,6 +206,7 @@
                                   instruction_,
                                   instruction_->GetDexPc(),
                                   this);
+    CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
   }
 
   bool IsFatal() const OVERRIDE { return true; }
@@ -240,6 +244,11 @@
                                   at_,
                                   dex_pc_,
                                   this);
+    if (do_clinit_) {
+      CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
+    } else {
+      CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
+    }
 
     Location out = locations->Out();
     // Move the class to the desired location.
@@ -290,6 +299,7 @@
                                   instruction_,
                                   instruction_->GetDexPc(),
                                   this);
+    CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
     x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
     RestoreLiveRegisters(codegen, locations);
     __ jmp(GetExitLabel());
@@ -386,6 +396,7 @@
                                   deoptimize,
                                   deoptimize->GetDexPc(),
                                   this);
+    CheckEntrypointTypes<kQuickDeoptimize, void, void>();
   }
 
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; }
@@ -428,6 +439,7 @@
                                   instruction_,
                                   instruction_->GetDexPc(),
                                   this);
+    CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
     RestoreLiveRegisters(codegen, locations);
     __ jmp(GetExitLabel());
   }
@@ -3777,6 +3789,7 @@
                           instruction,
                           instruction->GetDexPc(),
                           nullptr);
+  CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
 
   DCHECK(!codegen_->IsLeafMethod());
 }
@@ -3795,13 +3808,13 @@
   InvokeRuntimeCallingConvention calling_convention;
   codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)),
                            instruction->GetTypeIndex());
-
   // Note: if heap poisoning is enabled, the entry point takes cares
   // of poisoning the reference.
   codegen_->InvokeRuntime(instruction->GetEntrypoint(),
                           instruction,
                           instruction->GetDexPc(),
                           nullptr);
+  CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>();
 
   DCHECK(!codegen_->IsLeafMethod());
 }
@@ -5123,6 +5136,7 @@
                             cls,
                             cls->GetDexPc(),
                             nullptr);
+    CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
     return;
   }
 
@@ -5144,7 +5158,6 @@
       __ movl(out, Address(current_method, declaring_class_offset));
     }
   } else {
-    DCHECK(cls->CanCallRuntime());
     // /* GcRoot<mirror::Class>[] */ out =
     //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
     __ movq(out, Address(current_method,
@@ -5161,15 +5174,20 @@
       __ movl(out, Address(out, cache_offset));
     }
 
-    SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64(
-        cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
-    codegen_->AddSlowPath(slow_path);
-    __ testl(out, out);
-    __ j(kEqual, slow_path->GetEntryLabel());
-    if (cls->MustGenerateClinitCheck()) {
-      GenerateClassInitializationCheck(slow_path, out);
-    } else {
-      __ Bind(slow_path->GetExitLabel());
+    if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
+      DCHECK(cls->CanCallRuntime());
+      SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64(
+          cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
+      codegen_->AddSlowPath(slow_path);
+      if (!cls->IsInDexCache()) {
+        __ testl(out, out);
+        __ j(kEqual, slow_path->GetEntryLabel());
+      }
+      if (cls->MustGenerateClinitCheck()) {
+        GenerateClassInitializationCheck(slow_path, out);
+      } else {
+        __ Bind(slow_path->GetExitLabel());
+      }
     }
   }
 }
@@ -5272,6 +5290,7 @@
                           instruction,
                           instruction->GetDexPc(),
                           nullptr);
+  CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
 }
 
 void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
@@ -5766,6 +5785,11 @@
                           instruction,
                           instruction->GetDexPc(),
                           nullptr);
+  if (instruction->IsEnter()) {
+    CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
+  } else {
+    CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
+  }
 }
 
 void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 1bd626f..d3f30cb 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -4796,13 +4796,15 @@
              const DexFile& dex_file,
              bool is_referrers_class,
              uint32_t dex_pc,
-             bool needs_access_check)
+             bool needs_access_check,
+             bool is_in_dex_cache)
       : HExpression(Primitive::kPrimNot, SideEffectsForArchRuntimeCalls(), dex_pc),
         type_index_(type_index),
         dex_file_(dex_file),
         is_referrers_class_(is_referrers_class),
         generate_clinit_check_(false),
         needs_access_check_(needs_access_check),
+        is_in_dex_cache_(is_in_dex_cache),
         loaded_class_rti_(ReferenceTypeInfo::CreateInvalid()) {
     // Referrers class should not need access check. We never inline unverified
     // methods so we can't possibly end up in this situation.
@@ -4827,14 +4829,13 @@
   bool CanBeNull() const OVERRIDE { return false; }
 
   bool NeedsEnvironment() const OVERRIDE {
-    // Will call runtime and load the class if the class is not loaded yet.
-    // TODO: finer grain decision.
-    return !is_referrers_class_;
+    return CanCallRuntime();
   }
 
   bool MustGenerateClinitCheck() const {
     return generate_clinit_check_;
   }
+
   void SetMustGenerateClinitCheck(bool generate_clinit_check) {
     // The entrypoint the code generator is going to call does not do
     // clinit of the class.
@@ -4843,7 +4844,9 @@
   }
 
   bool CanCallRuntime() const {
-    return MustGenerateClinitCheck() || !is_referrers_class_ || needs_access_check_;
+    return MustGenerateClinitCheck() ||
+           (!is_referrers_class_ && !is_in_dex_cache_) ||
+           needs_access_check_;
   }
 
   bool NeedsAccessCheck() const {
@@ -4851,8 +4854,6 @@
   }
 
   bool CanThrow() const OVERRIDE {
-    // May call runtime and and therefore can throw.
-    // TODO: finer grain decision.
     return CanCallRuntime();
   }
 
@@ -4874,6 +4875,8 @@
     return SideEffects::CanTriggerGC();
   }
 
+  bool IsInDexCache() const { return is_in_dex_cache_; }
+
   DECLARE_INSTRUCTION(LoadClass);
 
  private:
@@ -4883,7 +4886,8 @@
   // Whether this instruction must generate the initialization check.
   // Used for code generation.
   bool generate_clinit_check_;
-  bool needs_access_check_;
+  const bool needs_access_check_;
+  const bool is_in_dex_cache_;
 
   ReferenceTypeInfo loaded_class_rti_;
 
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 0b0f094..571a2f5 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -103,6 +103,7 @@
   jit/jit.cc \
   jit/jit_code_cache.cc \
   jit/jit_instrumentation.cc \
+  jit/offline_profiling_info.cc \
   jit/profiling_info.cc \
   lambda/art_lambda_method.cc \
   lambda/box_table.cc \
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index bf95a0e..c9831e6 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -66,6 +66,11 @@
     }                                                                                           \
   } while (false)
 
+#define BACKWARD_BRANCH_INSTRUMENTATION(offset) \
+  do { \
+    instrumentation->BackwardBranch(self, shadow_frame.GetMethod(), offset); \
+  } while (false)
+
 static bool IsExperimentalInstructionEnabled(const Instruction *inst) {
   DCHECK(inst->IsExperimental());
   return Runtime::Current()->AreExperimentalFlagsEnabled(ExperimentalFlags::kLambdas);
@@ -542,6 +547,7 @@
         PREAMBLE();
         int8_t offset = inst->VRegA_10t(inst_data);
         if (IsBackwardBranch(offset)) {
+          BACKWARD_BRANCH_INSTRUMENTATION(offset);
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -551,6 +557,7 @@
         PREAMBLE();
         int16_t offset = inst->VRegA_20t();
         if (IsBackwardBranch(offset)) {
+          BACKWARD_BRANCH_INSTRUMENTATION(offset);
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -560,6 +567,7 @@
         PREAMBLE();
         int32_t offset = inst->VRegA_30t();
         if (IsBackwardBranch(offset)) {
+          BACKWARD_BRANCH_INSTRUMENTATION(offset);
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -569,6 +577,7 @@
         PREAMBLE();
         int32_t offset = DoPackedSwitch(inst, shadow_frame, inst_data);
         if (IsBackwardBranch(offset)) {
+          BACKWARD_BRANCH_INSTRUMENTATION(offset);
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -578,6 +587,7 @@
         PREAMBLE();
         int32_t offset = DoSparseSwitch(inst, shadow_frame, inst_data);
         if (IsBackwardBranch(offset)) {
+          BACKWARD_BRANCH_INSTRUMENTATION(offset);
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -681,6 +691,7 @@
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
           if (IsBackwardBranch(offset)) {
+            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -695,6 +706,7 @@
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
           if (IsBackwardBranch(offset)) {
+            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -709,6 +721,7 @@
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
           if (IsBackwardBranch(offset)) {
+            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -723,6 +736,7 @@
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
           if (IsBackwardBranch(offset)) {
+            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -737,6 +751,7 @@
         shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
           if (IsBackwardBranch(offset)) {
+            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -751,6 +766,7 @@
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
           if (IsBackwardBranch(offset)) {
+            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -764,6 +780,7 @@
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) == 0) {
           int16_t offset = inst->VRegB_21t();
           if (IsBackwardBranch(offset)) {
+            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -777,6 +794,7 @@
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) != 0) {
           int16_t offset = inst->VRegB_21t();
           if (IsBackwardBranch(offset)) {
+            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -790,6 +808,7 @@
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) < 0) {
           int16_t offset = inst->VRegB_21t();
           if (IsBackwardBranch(offset)) {
+            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -803,6 +822,7 @@
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) >= 0) {
           int16_t offset = inst->VRegB_21t();
           if (IsBackwardBranch(offset)) {
+            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -816,6 +836,7 @@
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) > 0) {
           int16_t offset = inst->VRegB_21t();
           if (IsBackwardBranch(offset)) {
+            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -829,6 +850,7 @@
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) <= 0) {
           int16_t offset = inst->VRegB_21t();
           if (IsBackwardBranch(offset)) {
+            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index ecbf13c..4660426 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -24,6 +24,8 @@
 #include "interpreter/interpreter.h"
 #include "jit_code_cache.h"
 #include "jit_instrumentation.h"
+#include "oat_file_manager.h"
+#include "offline_profiling_info.h"
 #include "runtime.h"
 #include "runtime_options.h"
 #include "utils.h"
@@ -44,6 +46,8 @@
       options.GetOrDefault(RuntimeArgumentMap::JITWarmupThreshold);
   jit_options->dump_info_on_shutdown_ =
       options.Exists(RuntimeArgumentMap::DumpJITInfoOnShutdown);
+  jit_options->save_profiling_info_ =
+      options.GetOrDefault(RuntimeArgumentMap::JITSaveProfilingInfo);;
   return jit_options;
 }
 
@@ -76,6 +80,10 @@
   if (jit->GetCodeCache() == nullptr) {
     return nullptr;
   }
+  jit->offline_profile_info_.reset(nullptr);
+  if (true) {
+    jit->offline_profile_info_.reset(new OfflineProfilingInfo());
+  }
   LOG(INFO) << "JIT created with initial_capacity="
       << PrettySize(options->GetCodeCacheInitialCapacity())
       << ", max_capacity=" << PrettySize(options->GetCodeCacheMaxCapacity())
@@ -152,6 +160,33 @@
   }
 }
 
+void Jit::SaveProfilingInfo(const std::string& filename) {
+  if (offline_profile_info_ == nullptr) {
+    return;
+  }
+  // Note that we can't check the PrimaryOatFile when constructing the offline_profilie_info_
+  // because it becomes known to the Runtime after we create and initialize the JIT.
+  const OatFile* primary_oat_file = Runtime::Current()->GetOatFileManager().GetPrimaryOatFile();
+  if (primary_oat_file == nullptr) {
+    LOG(WARNING) << "Couldn't find a primary oat file when trying to save profile info to "
+                 << filename;
+    return;
+  }
+
+  uint64_t last_update_ns = code_cache_->GetLastUpdateTimeNs();
+  if (offline_profile_info_->NeedsSaving(last_update_ns)) {
+    VLOG(profiler) << "Iniate save profiling information to: " << filename;
+    std::set<ArtMethod*> methods;
+    {
+      ScopedObjectAccess soa(Thread::Current());
+      code_cache_->GetCompiledArtMethods(primary_oat_file, methods);
+    }
+    offline_profile_info_->SaveProfilingInfo(filename, last_update_ns, methods);
+  } else {
+    VLOG(profiler) << "No need to save profiling information to: " << filename;
+  }
+}
+
 Jit::~Jit() {
   if (dump_info_on_shutdown_) {
     DumpInfo(LOG(INFO));
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index fc76549..630eba3 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -26,6 +26,7 @@
 #include "gc_root.h"
 #include "jni.h"
 #include "object_callbacks.h"
+#include "offline_profiling_info.h"
 #include "thread_pool.h"
 
 namespace art {
@@ -71,6 +72,8 @@
     return instrumentation_cache_.get();
   }
 
+  void SaveProfilingInfo(const std::string& filename);
+
  private:
   Jit();
   bool LoadCompiler(std::string* error_msg);
@@ -90,6 +93,7 @@
   std::unique_ptr<jit::JitCodeCache> code_cache_;
   CompilerCallbacks* compiler_callbacks_;  // Owned by the jit compiler.
 
+  std::unique_ptr<OfflineProfilingInfo> offline_profile_info_;
   DISALLOW_COPY_AND_ASSIGN(Jit);
 };
 
@@ -111,12 +115,18 @@
   bool DumpJitInfoOnShutdown() const {
     return dump_info_on_shutdown_;
   }
+  bool GetSaveProfilingInfo() const {
+    return save_profiling_info_;
+  }
   bool UseJIT() const {
     return use_jit_;
   }
   void SetUseJIT(bool b) {
     use_jit_ = b;
   }
+  void SetSaveProfilingInfo(bool b) {
+    save_profiling_info_ = b;
+  }
 
  private:
   bool use_jit_;
@@ -125,13 +135,15 @@
   size_t compile_threshold_;
   size_t warmup_threshold_;
   bool dump_info_on_shutdown_;
+  bool save_profiling_info_;
 
   JitOptions()
       : use_jit_(false),
         code_cache_initial_capacity_(0),
         code_cache_max_capacity_(0),
         compile_threshold_(0),
-        dump_info_on_shutdown_(false) { }
+        dump_info_on_shutdown_(false),
+        save_profiling_info_(false) { }
 
   DISALLOW_COPY_AND_ASSIGN(JitOptions);
 };
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index da79109..804d69f 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -19,6 +19,7 @@
 #include <sstream>
 
 #include "art_method-inl.h"
+#include "base/time_utils.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "gc/accounting/bitmap-inl.h"
 #include "jit/profiling_info.h"
@@ -109,7 +110,8 @@
       current_capacity_(initial_code_capacity + initial_data_capacity),
       code_end_(initial_code_capacity),
       data_end_(initial_data_capacity),
-      has_done_one_collection_(false) {
+      has_done_one_collection_(false),
+      last_update_time_ns_(0) {
 
   code_mspace_ = create_mspace_with_base(code_map_->Begin(), code_end_, false /*locked*/);
   data_mspace_ = create_mspace_with_base(data_map_->Begin(), data_end_, false /*locked*/);
@@ -314,6 +316,7 @@
       // code.
       GetLiveBitmap()->AtomicTestAndSet(FromCodeToAllocation(code_ptr));
     }
+    last_update_time_ns_ = NanoTime();
     VLOG(jit)
         << "JIT added "
         << PrettyMethod(method) << "@" << method
@@ -677,5 +680,19 @@
   }
 }
 
+void JitCodeCache::GetCompiledArtMethods(const OatFile* oat_file,
+                                         std::set<ArtMethod*>& methods) {
+  MutexLock mu(Thread::Current(), lock_);
+  for (auto it : method_code_map_) {
+    if (it.second->GetDexFile()->GetOatDexFile()->GetOatFile() == oat_file) {
+      methods.insert(it.second);
+    }
+  }
+}
+
+uint64_t JitCodeCache::GetLastUpdateTimeNs() {
+  MutexLock mu(Thread::Current(), lock_);
+  return last_update_time_ns_;
+}
 }  // namespace jit
 }  // namespace art
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index 13481e0..acd7c62 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -139,6 +139,13 @@
 
   void* MoreCore(const void* mspace, intptr_t increment);
 
+  // Adds to `methods` all the compiled ArtMethods which are part of the given `oat_file`.
+  void GetCompiledArtMethods(const OatFile* oat_file, std::set<ArtMethod*>& methods)
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  uint64_t GetLastUpdateTimeNs() REQUIRES(!lock_);
+
  private:
   // Take ownership of maps.
   JitCodeCache(MemMap* code_map,
@@ -228,6 +235,9 @@
   // Whether a collection has already been done on the current capacity.
   bool has_done_one_collection_ GUARDED_BY(lock_);
 
+  // Last time the the code_cache was updated.
+  uint64_t last_update_time_ns_ GUARDED_BY(lock_);
+
   DISALLOW_IMPLICIT_CONSTRUCTORS(JitCodeCache);
 };
 
diff --git a/runtime/jit/offline_profiling_info.cc b/runtime/jit/offline_profiling_info.cc
new file mode 100644
index 0000000..4450653
--- /dev/null
+++ b/runtime/jit/offline_profiling_info.cc
@@ -0,0 +1,171 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "offline_profiling_info.h"
+
+#include <fstream>
+#include <set>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/uio.h>
+
+#include "art_method-inl.h"
+#include "base/mutex.h"
+#include "jit/profiling_info.h"
+#include "safe_map.h"
+#include "utils.h"
+
+namespace art {
+
+// An arbitrary value to throttle save requests. Set to 500ms for now.
+static constexpr const uint64_t kMilisecondsToNano = 1000000;
+static constexpr const uint64_t kMinimumTimeBetweenSavesNs = 500 * kMilisecondsToNano;
+
+bool OfflineProfilingInfo::NeedsSaving(uint64_t last_update_time_ns) const {
+  return last_update_time_ns - last_update_time_ns_.LoadRelaxed() > kMinimumTimeBetweenSavesNs;
+}
+
+void OfflineProfilingInfo::SaveProfilingInfo(const std::string& filename,
+                                             uint64_t last_update_time_ns,
+                                             const std::set<ArtMethod*>& methods) {
+  if (!NeedsSaving(last_update_time_ns)) {
+    VLOG(profiler) << "No need to saved profile info to " << filename;
+    return;
+  }
+
+  if (methods.empty()) {
+    VLOG(profiler) << "No info to save to " << filename;
+    return;
+  }
+
+  DexFileToMethodsMap info;
+  {
+    ScopedObjectAccess soa(Thread::Current());
+    for (auto it = methods.begin(); it != methods.end(); it++) {
+      AddMethodInfo(*it, &info);
+    }
+  }
+
+  // This doesn't need locking because we are trying to lock the file for exclusive
+  // access and fail immediately if we can't.
+  if (Serialize(filename, info)) {
+    last_update_time_ns_.StoreRelaxed(last_update_time_ns);
+    VLOG(profiler) << "Successfully saved profile info to "
+                   << filename << " with time stamp: " << last_update_time_ns;
+  }
+}
+
+
+void OfflineProfilingInfo::AddMethodInfo(ArtMethod* method, DexFileToMethodsMap* info) {
+  DCHECK(method != nullptr);
+  const DexFile* dex_file = method->GetDexFile();
+
+  auto info_it = info->find(dex_file);
+  if (info_it == info->end()) {
+    info_it = info->Put(dex_file, std::set<uint32_t>());
+  }
+  info_it->second.insert(method->GetDexMethodIndex());
+}
+
+static int OpenOrCreateFile(const std::string& filename) {
+  // TODO(calin) allow the shared uid of the app to access the file.
+  int fd = open(filename.c_str(),
+                O_CREAT | O_WRONLY | O_TRUNC | O_NOFOLLOW | O_CLOEXEC,
+                S_IRUSR | S_IWUSR);
+  if (fd < 0) {
+    PLOG(WARNING) << "Failed to open profile file " << filename;
+    return -1;
+  }
+
+  // Lock the file for exclusive access but don't wait if we can't lock it.
+  int err = flock(fd, LOCK_EX | LOCK_NB);
+  if (err < 0) {
+    PLOG(WARNING) << "Failed to lock profile file " << filename;
+    return -1;
+  }
+
+  return fd;
+}
+
+static bool CloseDescriptorForFile(int fd, const std::string& filename) {
+  // Now unlock the file, allowing another process in.
+  int err = flock(fd, LOCK_UN);
+  if (err < 0) {
+    PLOG(WARNING) << "Failed to unlock profile file " << filename;
+    return false;
+  }
+
+  // Done, close the file.
+  err = ::close(fd);
+  if (err < 0) {
+    PLOG(WARNING) << "Failed to close descriptor for profile file" << filename;
+    return false;
+  }
+
+  return true;
+}
+
+static void WriteToFile(int fd, const std::ostringstream& os) {
+  std::string data(os.str());
+  const char *p = data.c_str();
+  size_t length = data.length();
+  do {
+    int n = ::write(fd, p, length);
+    p += n;
+    length -= n;
+  } while (length > 0);
+}
+
+static constexpr char kFieldSeparator = ',';
+static constexpr char kLineSeparator = '\n';
+
+/**
+ * Serialization format:
+ *    multidex_suffix1,dex_location_checksum1,method_id11,method_id12...
+ *    multidex_suffix2,dex_location_checksum2,method_id21,method_id22...
+ * e.g.
+ *    ,131232145,11,23,454,54               -> this is the first dex file, it has no multidex suffix
+ *    :classes5.dex,218490184,39,13,49,1    -> this is the fifth dex file.
+ **/
+bool OfflineProfilingInfo::Serialize(const std::string& filename,
+                                     const DexFileToMethodsMap& info) const {
+  int fd = OpenOrCreateFile(filename);
+  if (fd == -1) {
+    return false;
+  }
+
+  // TODO(calin): Merge with a previous existing profile.
+  // TODO(calin): Profile this and see how much memory it takes. If too much,
+  // write to file directly.
+  std::ostringstream os;
+  for (auto it : info) {
+    const DexFile* dex_file = it.first;
+    const std::set<uint32_t>& method_dex_ids = it.second;
+
+    os << DexFile::GetMultiDexSuffix(dex_file->GetLocation())
+        << kFieldSeparator
+        << dex_file->GetLocationChecksum();
+    for (auto method_it : method_dex_ids) {
+      os << kFieldSeparator << method_it;
+    }
+    os << kLineSeparator;
+  }
+
+  WriteToFile(fd, os);
+
+  return CloseDescriptorForFile(fd, filename);
+}
+}  // namespace art
diff --git a/runtime/jit/offline_profiling_info.h b/runtime/jit/offline_profiling_info.h
new file mode 100644
index 0000000..e3117eb
--- /dev/null
+++ b/runtime/jit/offline_profiling_info.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_JIT_OFFLINE_PROFILING_INFO_H_
+#define ART_RUNTIME_JIT_OFFLINE_PROFILING_INFO_H_
+
+#include <set>
+
+#include "atomic.h"
+#include "dex_file.h"
+#include "safe_map.h"
+
+namespace art {
+
+class ArtMethod;
+
+/**
+ * Profiling information in a format that can be serialized to disk.
+ * It is a serialize-friendly format based on information collected
+ * by the interpreter (ProfileInfo).
+ * Currently it stores only the hot compiled methods.
+ */
+class OfflineProfilingInfo {
+ public:
+  bool NeedsSaving(uint64_t last_update_time_ns) const;
+  void SaveProfilingInfo(const std::string& filename,
+                         uint64_t last_update_time_ns,
+                         const std::set<ArtMethod*>& methods);
+
+ private:
+  // Map identifying the location of the profiled methods.
+  // dex_file_ -> [dex_method_index]+
+  using DexFileToMethodsMap = SafeMap<const DexFile*, std::set<uint32_t>>;
+
+  void AddMethodInfo(ArtMethod* method, DexFileToMethodsMap* info)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+  bool Serialize(const std::string& filename, const DexFileToMethodsMap& info) const;
+
+  // TODO(calin): Verify if Atomic is really needed (are we sure to be called from a
+  // singe thread?)
+  Atomic<uint64_t> last_update_time_ns_;
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_JIT_OFFLINE_PROFILING_INFO_H_
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 415109f..5e3fa19 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -1689,7 +1689,8 @@
     } else {
       CHECK_NON_NULL_MEMCPY_ARGUMENT(length, buf);
       const jchar* chars = s->GetValue();
-      ConvertUtf16ToModifiedUtf8(buf, chars + start, length);
+      size_t bytes = CountUtf8Bytes(chars + start, length);
+      ConvertUtf16ToModifiedUtf8(buf, bytes, chars + start, length);
     }
   }
 
@@ -1772,7 +1773,7 @@
     char* bytes = new char[byte_count + 1];
     CHECK(bytes != nullptr);  // bionic aborts anyway.
     const uint16_t* chars = s->GetValue();
-    ConvertUtf16ToModifiedUtf8(bytes, chars, s->GetLength());
+    ConvertUtf16ToModifiedUtf8(bytes, byte_count, chars, s->GetLength());
     bytes[byte_count] = '\0';
     return bytes;
   }
diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc
index be869d4..33aca03 100644
--- a/runtime/mirror/string.cc
+++ b/runtime/mirror/string.cc
@@ -109,12 +109,17 @@
 
 String* String::AllocFromModifiedUtf8(Thread* self, const char* utf) {
   DCHECK(utf != nullptr);
-  size_t char_count = CountModifiedUtf8Chars(utf);
-  return AllocFromModifiedUtf8(self, char_count, utf);
+  size_t byte_count = strlen(utf);
+  size_t char_count = CountModifiedUtf8Chars(utf, byte_count);
+  return AllocFromModifiedUtf8(self, char_count, utf, byte_count);
+}
+
+String* String::AllocFromModifiedUtf8(Thread* self, int32_t utf16_length, const char* utf8_data_in) {
+  return AllocFromModifiedUtf8(self, utf16_length, utf8_data_in, strlen(utf8_data_in));
 }
 
 String* String::AllocFromModifiedUtf8(Thread* self, int32_t utf16_length,
-                                      const char* utf8_data_in) {
+                                      const char* utf8_data_in, int32_t utf8_length) {
   gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
   SetStringCountVisitor visitor(utf16_length);
   String* string = Alloc<true>(self, utf16_length, allocator_type, visitor);
@@ -122,7 +127,7 @@
     return nullptr;
   }
   uint16_t* utf16_data_out = string->GetValue();
-  ConvertModifiedUtf8ToUtf16(utf16_data_out, utf8_data_in);
+  ConvertModifiedUtf8ToUtf16(utf16_data_out, utf16_length, utf8_data_in, utf8_length);
   return string;
 }
 
@@ -217,7 +222,7 @@
   const uint16_t* chars = GetValue();
   size_t byte_count = GetUtfLength();
   std::string result(byte_count, static_cast<char>(0));
-  ConvertUtf16ToModifiedUtf8(&result[0], chars, GetLength());
+  ConvertUtf16ToModifiedUtf8(&result[0], byte_count, chars, GetLength());
   return result;
 }
 
diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h
index 80ebd2c..e2cfb8d 100644
--- a/runtime/mirror/string.h
+++ b/runtime/mirror/string.h
@@ -116,6 +116,10 @@
   static String* AllocFromModifiedUtf8(Thread* self, const char* utf)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
+  static String* AllocFromModifiedUtf8(Thread* self, int32_t utf16_length,
+                                       const char* utf8_data_in, int32_t utf8_length)
+      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
+
   static String* AllocFromModifiedUtf8(Thread* self, int32_t utf16_length, const char* utf8_data_in)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index 4c5dc3a..b49d68f 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -562,17 +562,20 @@
 
 /*
  * This is called by the framework when it knows the application directory and
- * process name.  We use this information to start up the sampling profiler for
- * for ART.
+ * process name.
  */
-static void VMRuntime_registerAppInfo(JNIEnv* env, jclass, jstring pkgName,
-                                      jstring appDir ATTRIBUTE_UNUSED,
+static void VMRuntime_registerAppInfo(JNIEnv* env,
+                                      jclass clazz ATTRIBUTE_UNUSED,
+                                      jstring pkgName,
+                                      jstring appDir,
                                       jstring procName ATTRIBUTE_UNUSED) {
-  const char *pkgNameChars = env->GetStringUTFChars(pkgName, nullptr);
-  std::string profileFile = StringPrintf("/data/dalvik-cache/profiles/%s", pkgNameChars);
+  const char* appDirChars = env->GetStringUTFChars(appDir, nullptr);
+  const char* pkgNameChars = env->GetStringUTFChars(pkgName, nullptr);
+  std::string profileFile = StringPrintf("%s/code_cache/%s.prof", appDirChars, pkgNameChars);
 
-  Runtime::Current()->StartProfiler(profileFile.c_str());
+  Runtime::Current()->SetJitProfilingFilename(profileFile.c_str());
 
+  env->ReleaseStringUTFChars(appDir, appDirChars);
   env->ReleaseStringUTFChars(pkgName, pkgNameChars);
 }
 
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index dfd783b..585c7c4 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -164,6 +164,9 @@
       .Define("-Xjitwarmupthreshold:_")
           .WithType<unsigned int>()
           .IntoKey(M::JITWarmupThreshold)
+      .Define("-Xjitsaveprofilinginfo")
+          .WithValue(true)
+          .IntoKey(M::JITSaveProfilingInfo)
       .Define("-XX:HspaceCompactForOOMMinIntervalMs=_")  // in ms
           .WithType<MillisecondsToNanoseconds>()  // store as ns
           .IntoKey(M::HSpaceCompactForOOMMinIntervalsMs)
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index a210aa8..931e581 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -218,6 +218,9 @@
   if (is_native_bridge_loaded_) {
     UnloadNativeBridge();
   }
+
+  MaybeSaveJitProfilingInfo();
+
   if (dump_gc_performance_on_shutdown_) {
     // This can't be called from the Heap destructor below because it
     // could call RosAlloc::InspectAll() which needs the thread_list
@@ -601,7 +604,6 @@
       LOG(INFO) << "Failed to access the profile file. Profiler disabled.";
       return true;
     }
-    StartProfiler(profile_output_filename_.c_str());
   }
 
   if (trace_config_.get() != nullptr && trace_config_->trace_file != "") {
@@ -1618,10 +1620,8 @@
   callee_save_methods_[type] = reinterpret_cast<uintptr_t>(method);
 }
 
-void Runtime::StartProfiler(const char* profile_output_filename) {
+void Runtime::SetJitProfilingFilename(const char* profile_output_filename) {
   profile_output_filename_ = profile_output_filename;
-  profiler_started_ =
-      BackgroundMethodSamplingProfiler::Start(profile_output_filename_, profiler_options_);
 }
 
 // Transaction support.
@@ -1767,8 +1767,16 @@
   argv->push_back(feature_string);
 }
 
+void Runtime::MaybeSaveJitProfilingInfo() {
+  if (jit_.get() != nullptr && !profile_output_filename_.empty()) {
+    jit_->SaveProfilingInfo(profile_output_filename_);
+  }
+}
+
 void Runtime::UpdateProfilerState(int state) {
-  VLOG(profiler) << "Profiler state updated to " << state;
+  if (state == kProfileBackground) {
+    MaybeSaveJitProfilingInfo();
+  }
 }
 
 void Runtime::CreateJit() {
diff --git a/runtime/runtime.h b/runtime/runtime.h
index d61663c..bd36414 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -457,7 +457,7 @@
     return &instrumentation_;
   }
 
-  void StartProfiler(const char* profile_output_filename);
+  void SetJitProfilingFilename(const char* profile_output_filename);
   void UpdateProfilerState(int state);
 
   // Transaction support.
@@ -608,12 +608,14 @@
   void StartDaemonThreads();
   void StartSignalCatcher();
 
+  void MaybeSaveJitProfilingInfo();
+
   // A pointer to the active runtime or null.
   static Runtime* instance_;
 
   // NOTE: these must match the gc::ProcessState values as they come directly from the framework.
   static constexpr int kProfileForground = 0;
-  static constexpr int kProfileBackgrouud = 1;
+  static constexpr int kProfileBackground = 1;
 
   // 64 bit so that we can share the same asm offsets for both 32 and 64 bits.
   uint64_t callee_save_methods_[kLastCalleeSaveType];
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index 9051eda..5624285 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -71,6 +71,7 @@
 RUNTIME_OPTIONS_KEY (unsigned int,        JITWarmupThreshold,             jit::Jit::kDefaultWarmupThreshold)
 RUNTIME_OPTIONS_KEY (MemoryKiB,           JITCodeCacheInitialCapacity,    jit::JitCodeCache::kInitialCapacity)
 RUNTIME_OPTIONS_KEY (MemoryKiB,           JITCodeCacheMaxCapacity,        jit::JitCodeCache::kMaxCapacity)
+RUNTIME_OPTIONS_KEY (bool,                JITSaveProfilingInfo,           false)
 RUNTIME_OPTIONS_KEY (MillisecondsToNanoseconds, \
                                           HSpaceCompactForOOMMinIntervalsMs,\
                                                                           MsToNs(100 * 1000))  // 100s
diff --git a/runtime/safe_map.h b/runtime/safe_map.h
index 7ac17b6..4e62dda 100644
--- a/runtime/safe_map.h
+++ b/runtime/safe_map.h
@@ -92,7 +92,7 @@
     DCHECK(result.second);  // Check we didn't accidentally overwrite an existing value.
     return result.first;
   }
-  iterator Put(const K& k, const V&& v) {
+  iterator Put(const K& k, V&& v) {
     std::pair<iterator, bool> result = map_.emplace(k, std::move(v));
     DCHECK(result.second);  // Check we didn't accidentally overwrite an existing value.
     return result.first;
@@ -105,7 +105,7 @@
     DCHECK(pos == map_.begin() || map_.key_comp()((--iterator(pos))->first, k));
     return map_.emplace_hint(pos, k, v);
   }
-  iterator PutBefore(iterator pos, const K& k, const V&& v) {
+  iterator PutBefore(iterator pos, const K& k, V&& v) {
     // Check that we're using the correct position and the key is not in the map.
     DCHECK(pos == map_.end() || map_.key_comp()(k, pos->first));
     DCHECK(pos == map_.begin() || map_.key_comp()((--iterator(pos))->first, k));
diff --git a/runtime/utf.cc b/runtime/utf.cc
index 10600e2..5a11698 100644
--- a/runtime/utf.cc
+++ b/runtime/utf.cc
@@ -23,28 +23,50 @@
 
 namespace art {
 
+// This is used only from debugger and test code.
 size_t CountModifiedUtf8Chars(const char* utf8) {
+  return CountModifiedUtf8Chars(utf8, strlen(utf8));
+}
+
+/*
+ * This does not validate UTF8 rules (nor did older code). But it gets the right answer
+ * for valid UTF-8 and that's fine because it's used only to size a buffer for later
+ * conversion.
+ *
+ * Modified UTF-8 consists of a series of bytes up to 21 bit Unicode code points as follows:
+ * U+0001  - U+007F   0xxxxxxx
+ * U+0080  - U+07FF   110xxxxx 10xxxxxx
+ * U+0800  - U+FFFF   1110xxxx 10xxxxxx 10xxxxxx
+ * U+10000 - U+1FFFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+ *
+ * U+0000 is encoded using the 2nd form to avoid nulls inside strings (this differs from
+ * standard UTF-8).
+ * The four byte encoding converts to two utf16 characters.
+ */
+size_t CountModifiedUtf8Chars(const char* utf8, size_t byte_count) {
+  DCHECK_LE(byte_count, strlen(utf8));
   size_t len = 0;
-  int ic;
-  while ((ic = *utf8++) != '\0') {
+  const char* end = utf8 + byte_count;
+  for (; utf8 < end; ++utf8) {
+    int ic = *utf8;
     len++;
-    if ((ic & 0x80) == 0) {
-      // one-byte encoding
+    if (LIKELY((ic & 0x80) == 0)) {
+      // One-byte encoding.
       continue;
     }
-    // two- or three-byte encoding
+    // Two- or three-byte encoding.
     utf8++;
     if ((ic & 0x20) == 0) {
-      // two-byte encoding
+      // Two-byte encoding.
       continue;
     }
     utf8++;
     if ((ic & 0x10) == 0) {
-      // three-byte encoding
+      // Three-byte encoding.
       continue;
     }
 
-    // four-byte encoding: needs to be converted into a surrogate
+    // Four-byte encoding: needs to be converted into a surrogate
     // pair.
     utf8++;
     len++;
@@ -52,6 +74,7 @@
   return len;
 }
 
+// This is used only from debugger and test code.
 void ConvertModifiedUtf8ToUtf16(uint16_t* utf16_data_out, const char* utf8_data_in) {
   while (*utf8_data_in != '\0') {
     const uint32_t ch = GetUtf16FromUtf8(&utf8_data_in);
@@ -65,13 +88,53 @@
   }
 }
 
-void ConvertUtf16ToModifiedUtf8(char* utf8_out, const uint16_t* utf16_in, size_t char_count) {
+void ConvertModifiedUtf8ToUtf16(uint16_t* utf16_data_out, size_t out_chars,
+                                const char* utf8_data_in, size_t in_bytes) {
+  const char *in_start = utf8_data_in;
+  const char *in_end = utf8_data_in + in_bytes;
+  uint16_t *out_p = utf16_data_out;
+
+  if (LIKELY(out_chars == in_bytes)) {
+    // Common case where all characters are ASCII.
+    for (const char *p = in_start; p < in_end;) {
+      // Safe even if char is signed because ASCII characters always have
+      // the high bit cleared.
+      *out_p++ = dchecked_integral_cast<uint16_t>(*p++);
+    }
+    return;
+  }
+
+  // String contains non-ASCII characters.
+  for (const char *p = in_start; p < in_end;) {
+    const uint32_t ch = GetUtf16FromUtf8(&p);
+    const uint16_t leading = GetLeadingUtf16Char(ch);
+    const uint16_t trailing = GetTrailingUtf16Char(ch);
+
+    *out_p++ = leading;
+    if (trailing != 0) {
+      *out_p++ = trailing;
+    }
+  }
+}
+
+void ConvertUtf16ToModifiedUtf8(char* utf8_out, size_t byte_count,
+                                const uint16_t* utf16_in, size_t char_count) {
+  if (LIKELY(byte_count == char_count)) {
+    // Common case where all characters are ASCII.
+    const uint16_t *utf16_end = utf16_in + char_count;
+    for (const uint16_t *p = utf16_in; p < utf16_end;) {
+      *utf8_out++ = dchecked_integral_cast<char>(*p++);
+    }
+    return;
+  }
+
+  // String contains non-ASCII characters.
   while (char_count--) {
     const uint16_t ch = *utf16_in++;
     if (ch > 0 && ch <= 0x7f) {
       *utf8_out++ = ch;
     } else {
-      // char_count == 0 here implies we've encountered an unpaired
+      // Char_count == 0 here implies we've encountered an unpaired
       // surrogate and we have no choice but to encode it as 3-byte UTF
       // sequence. Note that unpaired surrogates can occur as a part of
       // "normal" operation.
@@ -161,34 +224,31 @@
 
 size_t CountUtf8Bytes(const uint16_t* chars, size_t char_count) {
   size_t result = 0;
-  while (char_count--) {
+  const uint16_t *end = chars + char_count;
+  while (chars < end) {
     const uint16_t ch = *chars++;
-    if (ch > 0 && ch <= 0x7f) {
-      ++result;
-    } else if (ch >= 0xd800 && ch <= 0xdbff) {
-      if (char_count > 0) {
+    if (LIKELY(ch != 0 && ch < 0x80)) {
+      result++;
+      continue;
+    }
+    if (ch < 0x800) {
+      result += 2;
+      continue;
+    }
+    if (ch >= 0xd800 && ch < 0xdc00) {
+      if (chars < end) {
         const uint16_t ch2 = *chars;
         // If we find a properly paired surrogate, we emit it as a 4 byte
         // UTF sequence. If we find an unpaired leading or trailing surrogate,
         // we emit it as a 3 byte sequence like would have done earlier.
-        if (ch2 >= 0xdc00 && ch2 <= 0xdfff) {
+        if (ch2 >= 0xdc00 && ch2 < 0xe000) {
           chars++;
-          char_count--;
-
           result += 4;
-        } else {
-          result += 3;
+          continue;
         }
-      } else {
-        // This implies we found an unpaired trailing surrogate at the end
-        // of a string.
-        result += 3;
       }
-    } else if (ch > 0x7ff) {
-      result += 3;
-    } else {
-      result += 2;
     }
+    result += 3;
   }
   return result;
 }
diff --git a/runtime/utf.h b/runtime/utf.h
index 1193d29..03158c4 100644
--- a/runtime/utf.h
+++ b/runtime/utf.h
@@ -40,6 +40,7 @@
  * Returns the number of UTF-16 characters in the given modified UTF-8 string.
  */
 size_t CountModifiedUtf8Chars(const char* utf8);
+size_t CountModifiedUtf8Chars(const char* utf8, size_t byte_count);
 
 /*
  * Returns the number of modified UTF-8 bytes needed to represent the given
@@ -51,6 +52,8 @@
  * Convert from Modified UTF-8 to UTF-16.
  */
 void ConvertModifiedUtf8ToUtf16(uint16_t* utf16_out, const char* utf8_in);
+void ConvertModifiedUtf8ToUtf16(uint16_t* utf16_out, size_t out_chars,
+                                const char* utf8_in, size_t in_bytes);
 
 /*
  * Compare two modified UTF-8 strings as UTF-16 code point values in a non-locale sensitive manner
@@ -71,7 +74,8 @@
  * this anyway, so if you want a NUL-terminated string, you know where to
  * put the NUL byte.
  */
-void ConvertUtf16ToModifiedUtf8(char* utf8_out, const uint16_t* utf16_in, size_t char_count);
+void ConvertUtf16ToModifiedUtf8(char* utf8_out, size_t byte_count,
+                                const uint16_t* utf16_in, size_t char_count);
 
 /*
  * The java.lang.String hashCode() algorithm.
diff --git a/runtime/utf_test.cc b/runtime/utf_test.cc
index 94a6ea5..5239e40 100644
--- a/runtime/utf_test.cc
+++ b/runtime/utf_test.cc
@@ -19,6 +19,7 @@
 #include "common_runtime_test.h"
 #include "utf-inl.h"
 
+#include <map>
 #include <vector>
 
 namespace art {
@@ -48,7 +49,7 @@
 };
 
 // A test string that contains a UTF-8 encoding of a surrogate pair
-// (code point = U+10400)
+// (code point = U+10400).
 static const uint8_t kSurrogateEncoding[] = {
     0xed, 0xa0, 0x81,
     0xed, 0xb0, 0x80,
@@ -66,13 +67,13 @@
   EXPECT_EQ(0, GetTrailingUtf16Char(pair));
   EXPECT_ARRAY_POSITION(1, ptr, start);
 
-  // Two byte sequence
+  // Two byte sequence.
   pair = GetUtf16FromUtf8(&ptr);
   EXPECT_EQ(0xa2, GetLeadingUtf16Char(pair));
   EXPECT_EQ(0, GetTrailingUtf16Char(pair));
   EXPECT_ARRAY_POSITION(3, ptr, start);
 
-  // Three byte sequence
+  // Three byte sequence.
   pair = GetUtf16FromUtf8(&ptr);
   EXPECT_EQ(0x20ac, GetLeadingUtf16Char(pair));
   EXPECT_EQ(0, GetTrailingUtf16Char(pair));
@@ -84,7 +85,7 @@
   EXPECT_EQ(0xdfe0, GetTrailingUtf16Char(pair));
   EXPECT_ARRAY_POSITION(10, ptr, start);
 
-  // Null terminator
+  // Null terminator.
   pair = GetUtf16FromUtf8(&ptr);
   EXPECT_EQ(0, GetLeadingUtf16Char(pair));
   EXPECT_EQ(0, GetTrailingUtf16Char(pair));
@@ -117,7 +118,8 @@
   ASSERT_EQ(expected.size(), CountUtf8Bytes(&input[0], input.size()));
 
   std::vector<uint8_t> output(expected.size());
-  ConvertUtf16ToModifiedUtf8(reinterpret_cast<char*>(&output[0]), &input[0], input.size());
+  ConvertUtf16ToModifiedUtf8(reinterpret_cast<char*>(&output[0]), expected.size(),
+                             &input[0], input.size());
   EXPECT_EQ(expected, output);
 }
 
@@ -139,10 +141,10 @@
   AssertConversion({ 'h', 'e', 'l', 'l', 'o' }, { 0x68, 0x65, 0x6c, 0x6c, 0x6f });
 
   AssertConversion({
-      0xd802, 0xdc02,  // Surrogate pair
-      0xdef0, 0xdcff,  // Three byte encodings
-      0x0101, 0x0000,  // Two byte encodings
-      'p'   , 'p'      // One byte encoding
+      0xd802, 0xdc02,  // Surrogate pair.
+      0xdef0, 0xdcff,  // Three byte encodings.
+      0x0101, 0x0000,  // Two byte encodings.
+      'p'   , 'p'      // One byte encoding.
     }, {
       0xf0, 0x90, 0xa0, 0x82,
       0xed, 0xbb, 0xb0, 0xed, 0xb3, 0xbf,
@@ -155,9 +157,225 @@
   // Unpaired trailing surrogate at the end of input.
   AssertConversion({ 'h', 'e', 0xd801 }, { 'h', 'e', 0xed, 0xa0, 0x81 });
   // Unpaired (or incorrectly paired) surrogates in the middle of the input.
-  AssertConversion({ 'h', 0xd801, 'e' }, { 'h', 0xed, 0xa0, 0x81, 'e' });
-  AssertConversion({ 'h', 0xd801, 0xd801, 'e' }, { 'h', 0xed, 0xa0, 0x81, 0xed, 0xa0, 0x81, 'e' });
-  AssertConversion({ 'h', 0xdc00, 0xdc00, 'e' }, { 'h', 0xed, 0xb0, 0x80, 0xed, 0xb0, 0x80, 'e' });
+  const std::map<std::vector<uint16_t>, std::vector<uint8_t>> prefixes {
+      {{ 'h' }, { 'h' }},
+      {{ 0 }, { 0xc0, 0x80 }},
+      {{ 0x81 }, { 0xc2, 0x81 }},
+      {{ 0x801 }, { 0xe0, 0xa0, 0x81 }},
+  };
+  const std::map<std::vector<uint16_t>, std::vector<uint8_t>> suffixes {
+      {{ 'e' }, { 'e' }},
+      {{ 0 }, { 0xc0, 0x80 }},
+      {{ 0x7ff }, { 0xdf, 0xbf }},
+      {{ 0xffff }, { 0xef, 0xbf, 0xbf }},
+  };
+  const std::map<std::vector<uint16_t>, std::vector<uint8_t>> tests {
+      {{ 0xd801 }, { 0xed, 0xa0, 0x81 }},
+      {{ 0xdc00 }, { 0xed, 0xb0, 0x80 }},
+      {{ 0xd801, 0xd801 }, { 0xed, 0xa0, 0x81, 0xed, 0xa0, 0x81 }},
+      {{ 0xdc00, 0xdc00 }, { 0xed, 0xb0, 0x80, 0xed, 0xb0, 0x80 }},
+  };
+  for (const auto& prefix : prefixes) {
+    const std::vector<uint16_t>& prefix_in = prefix.first;
+    const std::vector<uint8_t>& prefix_out = prefix.second;
+    for (const auto& test : tests) {
+      const std::vector<uint16_t>& test_in = test.first;
+      const std::vector<uint8_t>& test_out = test.second;
+      for (const auto& suffix : suffixes) {
+        const std::vector<uint16_t>& suffix_in = suffix.first;
+        const std::vector<uint8_t>& suffix_out = suffix.second;
+        std::vector<uint16_t> in = prefix_in;
+        in.insert(in.end(), test_in.begin(), test_in.end());
+        in.insert(in.end(), suffix_in.begin(), suffix_in.end());
+        std::vector<uint8_t> out = prefix_out;
+        out.insert(out.end(), test_out.begin(), test_out.end());
+        out.insert(out.end(), suffix_out.begin(), suffix_out.end());
+        AssertConversion(in, out);
+      }
+    }
+  }
+}
+
+// Old versions of functions, here to compare answers with optimized versions.
+
+size_t CountModifiedUtf8Chars_reference(const char* utf8) {
+  size_t len = 0;
+  int ic;
+  while ((ic = *utf8++) != '\0') {
+    len++;
+    if ((ic & 0x80) == 0) {
+      // one-byte encoding
+      continue;
+    }
+    // two- or three-byte encoding
+    utf8++;
+    if ((ic & 0x20) == 0) {
+      // two-byte encoding
+      continue;
+    }
+    utf8++;
+    if ((ic & 0x10) == 0) {
+      // three-byte encoding
+      continue;
+    }
+
+    // four-byte encoding: needs to be converted into a surrogate
+    // pair.
+    utf8++;
+    len++;
+  }
+  return len;
+}
+
+static size_t CountUtf8Bytes_reference(const uint16_t* chars, size_t char_count) {
+  size_t result = 0;
+  while (char_count--) {
+    const uint16_t ch = *chars++;
+    if (ch > 0 && ch <= 0x7f) {
+      ++result;
+    } else if (ch >= 0xd800 && ch <= 0xdbff) {
+      if (char_count > 0) {
+        const uint16_t ch2 = *chars;
+        // If we find a properly paired surrogate, we emit it as a 4 byte
+        // UTF sequence. If we find an unpaired leading or trailing surrogate,
+        // we emit it as a 3 byte sequence like would have done earlier.
+        if (ch2 >= 0xdc00 && ch2 <= 0xdfff) {
+          chars++;
+          char_count--;
+
+          result += 4;
+        } else {
+          result += 3;
+        }
+      } else {
+        // This implies we found an unpaired trailing surrogate at the end
+        // of a string.
+        result += 3;
+      }
+    } else if (ch > 0x7ff) {
+      result += 3;
+    } else {
+      result += 2;
+    }
+  }
+  return result;
+}
+
+static void ConvertUtf16ToModifiedUtf8_reference(char* utf8_out, const uint16_t* utf16_in,
+                                                 size_t char_count) {
+  while (char_count--) {
+    const uint16_t ch = *utf16_in++;
+    if (ch > 0 && ch <= 0x7f) {
+      *utf8_out++ = ch;
+    } else {
+      // Char_count == 0 here implies we've encountered an unpaired
+      // surrogate and we have no choice but to encode it as 3-byte UTF
+      // sequence. Note that unpaired surrogates can occur as a part of
+      // "normal" operation.
+      if ((ch >= 0xd800 && ch <= 0xdbff) && (char_count > 0)) {
+        const uint16_t ch2 = *utf16_in;
+
+        // Check if the other half of the pair is within the expected
+        // range. If it isn't, we will have to emit both "halves" as
+        // separate 3 byte sequences.
+        if (ch2 >= 0xdc00 && ch2 <= 0xdfff) {
+          utf16_in++;
+          char_count--;
+          const uint32_t code_point = (ch << 10) + ch2 - 0x035fdc00;
+          *utf8_out++ = (code_point >> 18) | 0xf0;
+          *utf8_out++ = ((code_point >> 12) & 0x3f) | 0x80;
+          *utf8_out++ = ((code_point >> 6) & 0x3f) | 0x80;
+          *utf8_out++ = (code_point & 0x3f) | 0x80;
+          continue;
+        }
+      }
+
+      if (ch > 0x07ff) {
+        // Three byte encoding.
+        *utf8_out++ = (ch >> 12) | 0xe0;
+        *utf8_out++ = ((ch >> 6) & 0x3f) | 0x80;
+        *utf8_out++ = (ch & 0x3f) | 0x80;
+      } else /*(ch > 0x7f || ch == 0)*/ {
+        // Two byte encoding.
+        *utf8_out++ = (ch >> 6) | 0xc0;
+        *utf8_out++ = (ch & 0x3f) | 0x80;
+      }
+    }
+  }
+}
+
+// Exhaustive test of converting a single code point to UTF-16, then UTF-8, and back again.
+
+static void codePointToSurrogatePair(uint32_t code_point, uint16_t &first, uint16_t &second) {
+  first = (code_point >> 10) + 0xd7c0;
+  second = (code_point & 0x03ff) + 0xdc00;
+}
+
+static void testConversions(uint16_t *buf, int char_count) {
+  char bytes_test[8], bytes_reference[8];
+  uint16_t out_buf_test[4], out_buf_reference[4];
+  int byte_count_test, byte_count_reference;
+  int char_count_test, char_count_reference;
+
+  // Calculate the number of utf-8 bytes for the utf-16 chars.
+  byte_count_reference = CountUtf8Bytes_reference(buf, char_count);
+  byte_count_test = CountUtf8Bytes(buf, char_count);
+  EXPECT_EQ(byte_count_reference, byte_count_test);
+
+  // Convert the utf-16 string to utf-8 bytes.
+  ConvertUtf16ToModifiedUtf8_reference(bytes_reference, buf, char_count);
+  ConvertUtf16ToModifiedUtf8(bytes_test, byte_count_test, buf, char_count);
+  for (int i = 0; i < byte_count_test; ++i) {
+    EXPECT_EQ(bytes_reference[i], bytes_test[i]);
+  }
+
+  // Calculate the number of utf-16 chars from the utf-8 bytes.
+  bytes_reference[byte_count_reference] = 0;  // Reference function needs null termination.
+  char_count_reference = CountModifiedUtf8Chars_reference(bytes_reference);
+  char_count_test = CountModifiedUtf8Chars(bytes_test, byte_count_test);
+  EXPECT_EQ(char_count, char_count_reference);
+  EXPECT_EQ(char_count, char_count_test);
+
+  // Convert the utf-8 bytes back to utf-16 chars.
+  // Does not need copied _reference version of the function because the original
+  // function with the old API is retained for debug/testing code.
+  ConvertModifiedUtf8ToUtf16(out_buf_reference, bytes_reference);
+  ConvertModifiedUtf8ToUtf16(out_buf_test, char_count_test, bytes_test, byte_count_test);
+  for (int i = 0; i < char_count_test; ++i) {
+    EXPECT_EQ(buf[i], out_buf_reference[i]);
+    EXPECT_EQ(buf[i], out_buf_test[i]);
+  }
+}
+
+TEST_F(UtfTest, ExhaustiveBidirectionalCodePointCheck) {
+  for (int codePoint = 0; codePoint <= 0x10ffff; ++codePoint) {
+    uint16_t buf[4];
+    if (codePoint <= 0xffff) {
+      if (codePoint >= 0xd800 && codePoint <= 0xdfff) {
+        // According to the Unicode standard, no character will ever
+        // be assigned to these code points, and they can not be encoded
+        // into either utf-16 or utf-8.
+        continue;
+      }
+      buf[0] = 'h';
+      buf[1] = codePoint;
+      buf[2] = 'e';
+      testConversions(buf, 2);
+      testConversions(buf, 3);
+      testConversions(buf + 1, 1);
+      testConversions(buf + 1, 2);
+    } else {
+      buf[0] = 'h';
+      codePointToSurrogatePair(codePoint, buf[1], buf[2]);
+      buf[3] = 'e';
+      testConversions(buf, 2);
+      testConversions(buf, 3);
+      testConversions(buf, 4);
+      testConversions(buf + 1, 1);
+      testConversions(buf + 1, 2);
+      testConversions(buf + 1, 3);
+    }
+  }
 }
 
 }  // namespace art
diff --git a/test/551-checker-clinit/expected.txt b/test/551-checker-clinit/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/551-checker-clinit/expected.txt
diff --git a/test/551-checker-clinit/info.txt b/test/551-checker-clinit/info.txt
new file mode 100644
index 0000000..4d54bb5
--- /dev/null
+++ b/test/551-checker-clinit/info.txt
@@ -0,0 +1 @@
+Checker test to ensure we optimize aways HClinitChecks as expected.
diff --git a/test/551-checker-clinit/src/Main.java b/test/551-checker-clinit/src/Main.java
new file mode 100644
index 0000000..5ec30480
--- /dev/null
+++ b/test/551-checker-clinit/src/Main.java
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void main(String[] args) {}
+  public static int foo = 42;
+
+  /// CHECK-START: void Main.inlinedMethod() builder (after)
+  /// CHECK:                        ClinitCheck
+
+  /// CHECK-START: void Main.inlinedMethod() inliner (after)
+  /// CHECK:                        ClinitCheck
+  /// CHECK-NOT:                    ClinitCheck
+  /// CHECK-NOT:                    InvokeStaticOrDirect
+  public void inlinedMethod() {
+    SubSub.bar();
+  }
+}
+
+class Sub extends Main {
+  /// CHECK-START: void Sub.invokeSuperClass() builder (after)
+  /// CHECK-NOT:                        ClinitCheck
+  public void invokeSuperClass() {
+    int a = Main.foo;
+  }
+
+  /// CHECK-START: void Sub.invokeItself() builder (after)
+  /// CHECK-NOT:                        ClinitCheck
+  public void invokeItself() {
+    int a = foo;
+  }
+
+  /// CHECK-START: void Sub.invokeSubClass() builder (after)
+  /// CHECK:                            ClinitCheck
+  public void invokeSubClass() {
+    int a = SubSub.foo;
+  }
+
+  public static int foo = 42;
+}
+
+class SubSub {
+  public static void bar() {
+    int a = Main.foo;
+  }
+  public static int foo = 42;
+}
diff --git a/test/551-implicit-null-checks/expected.txt b/test/551-implicit-null-checks/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/551-implicit-null-checks/expected.txt
diff --git a/test/551-implicit-null-checks/info.txt b/test/551-implicit-null-checks/info.txt
new file mode 100644
index 0000000..bdd066b
--- /dev/null
+++ b/test/551-implicit-null-checks/info.txt
@@ -0,0 +1 @@
+Test that implicit null checks are recorded correctly for longs.
\ No newline at end of file
diff --git a/test/551-implicit-null-checks/src/Main.java b/test/551-implicit-null-checks/src/Main.java
new file mode 100644
index 0000000..677e8d3
--- /dev/null
+++ b/test/551-implicit-null-checks/src/Main.java
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  private class Inner {
+    private long i1;
+  }
+  private Inner inst;
+
+  public static void main(String args[]) throws Exception {
+    Main m = new Main();
+    try {
+      m.$opt$noinline$testGetLong();
+    } catch (NullPointerException ex) {
+      // good
+    }
+    try {
+      m.$opt$noinline$testPutLong(778899112233L);
+    } catch (NullPointerException ex) {
+      // good
+    }
+  }
+
+  public void $opt$noinline$testGetLong() throws Exception {
+    long result = inst.i1;
+    throw new Exception();  // prevent inline
+  }
+
+  public void $opt$noinline$testPutLong(long a) throws Exception {
+    inst.i1 = a;
+    throw new Exception();  // prevent inline
+  }
+}