Intrinsify Integer.valueOf.

Improves performance of ArrayListStress and Ritz by ~10% and ~3%.

Test: test-art-host test-art-target
bug: 30933338

Change-Id: I639046e3a18dae50069d3a7ecb538a900bb590a1
diff --git a/compiler/intrinsics_list.h b/compiler/intrinsics_list.h
index 9bd25d8..63c23cb 100644
--- a/compiler/intrinsics_list.h
+++ b/compiler/intrinsics_list.h
@@ -24,6 +24,10 @@
 // Note: adding a new intrinsic requires an art image version change,
 // as the modifiers flag for some ArtMethods will need to be changed.
 
+// Note: j.l.Integer.valueOf says kNoThrow even though it could throw an OOME.
+// The kNoThrow should be renamed to kNoVisibleThrow, as it is ok to GVN Integer.valueOf
+// (kNoSideEffects), and it is also OK to remove it if it's unused.
+
 #define INTRINSICS_LIST(V) \
   V(DoubleDoubleToRawLongBits, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Double;", "doubleToRawLongBits", "(D)J") \
   V(DoubleDoubleToLongBits, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Double;", "doubleToLongBits", "(D)J") \
@@ -149,7 +153,8 @@
   V(UnsafeLoadFence, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Lsun/misc/Unsafe;", "loadFence", "()V") \
   V(UnsafeStoreFence, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Lsun/misc/Unsafe;", "storeFence", "()V") \
   V(UnsafeFullFence, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Lsun/misc/Unsafe;", "fullFence", "()V") \
-  V(ReferenceGetReferent, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/ref/Reference;", "getReferent", "()Ljava/lang/Object;")
+  V(ReferenceGetReferent, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/ref/Reference;", "getReferent", "()Ljava/lang/Object;") \
+  V(IntegerValueOf, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Integer;", "valueOf", "(I)Ljava/lang/Integer;")
 
 #endif  // ART_COMPILER_INTRINSICS_LIST_H_
 #undef ART_COMPILER_INTRINSICS_LIST_H_   // #define is only for lint.
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index edccbd4..18c95b3 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -4094,7 +4094,7 @@
 }
 
 void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
-  IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena());
+  IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena(), codegen_);
   if (intrinsic.TryDispatch(invoke)) {
     return;
   }
@@ -4107,7 +4107,7 @@
   // art::PrepareForRegisterAllocation.
   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
-  IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena());
+  IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena(), codegen_);
   if (intrinsic.TryDispatch(invoke)) {
     return;
   }
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index 17d683f..a85dd84 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -19,6 +19,7 @@
 #include "art_method.h"
 #include "class_linker.h"
 #include "driver/compiler_driver.h"
+#include "driver/compiler_options.h"
 #include "invoke_type.h"
 #include "mirror/dex_cache-inl.h"
 #include "nodes.h"
@@ -178,4 +179,110 @@
   return os;
 }
 
+void IntrinsicVisitor::ComputeIntegerValueOfLocations(HInvoke* invoke,
+                                                      CodeGenerator* codegen,
+                                                      Location return_location,
+                                                      Location first_argument_location) {
+  if (Runtime::Current()->IsAotCompiler()) {
+    if (codegen->GetCompilerOptions().IsBootImage() ||
+        codegen->GetCompilerOptions().GetCompilePic()) {
+      // TODO(ngeoffray): Support boot image compilation.
+      return;
+    }
+  }
+
+  IntegerValueOfInfo info = ComputeIntegerValueOfInfo();
+
+  if (info.integer_cache == nullptr ||
+      info.integer == nullptr ||
+      info.cache == nullptr ||
+      info.value_offset == 0 ||
+      // low and high cannot be 0, per the spec.
+      info.low == 0 ||
+      info.high == 0) {
+    LOG(ERROR) << "Integer.valueOf will not be optimized";
+    return;
+  }
+
+  // The intrinsic will call if it needs to allocate a j.l.Integer.
+  LocationSummary* locations = new (invoke->GetBlock()->GetGraph()->GetArena()) LocationSummary(
+      invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
+  if (!invoke->InputAt(0)->IsConstant()) {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
+  locations->AddTemp(first_argument_location);
+  locations->SetOut(return_location);
+}
+
+IntrinsicVisitor::IntegerValueOfInfo IntrinsicVisitor::ComputeIntegerValueOfInfo() {
+  // Note that we could cache all of the data looked up here. but there's no good
+  // location for it. We don't want to add it to WellKnownClasses, to avoid creating global
+  // jni values. Adding it as state to the compiler singleton seems like wrong
+  // separation of concerns.
+  // The need for this data should be pretty rare though.
+
+  // The most common case is that the classes are in the boot image and initialized,
+  // which is easy to generate code for. We bail if not.
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+  Runtime* runtime = Runtime::Current();
+  ClassLinker* class_linker = runtime->GetClassLinker();
+  gc::Heap* heap = runtime->GetHeap();
+  IntegerValueOfInfo info;
+  info.integer_cache = class_linker->FindSystemClass(self, "Ljava/lang/Integer$IntegerCache;");
+  if (info.integer_cache == nullptr) {
+    self->ClearException();
+    return info;
+  }
+  if (!heap->ObjectIsInBootImageSpace(info.integer_cache) || !info.integer_cache->IsInitialized()) {
+    // Optimization only works if the class is initialized and in the boot image.
+    return info;
+  }
+  info.integer = class_linker->FindSystemClass(self, "Ljava/lang/Integer;");
+  if (info.integer == nullptr) {
+    self->ClearException();
+    return info;
+  }
+  if (!heap->ObjectIsInBootImageSpace(info.integer) || !info.integer->IsInitialized()) {
+    // Optimization only works if the class is initialized and in the boot image.
+    return info;
+  }
+
+  ArtField* field = info.integer_cache->FindDeclaredStaticField("cache", "[Ljava/lang/Integer;");
+  if (field == nullptr) {
+    return info;
+  }
+  info.cache = static_cast<mirror::ObjectArray<mirror::Object>*>(
+      field->GetObject(info.integer_cache).Ptr());
+  if (info.cache == nullptr) {
+    return info;
+  }
+
+  if (!heap->ObjectIsInBootImageSpace(info.cache)) {
+    // Optimization only works if the object is in the boot image.
+    return info;
+  }
+
+  field = info.integer->FindDeclaredInstanceField("value", "I");
+  if (field == nullptr) {
+    return info;
+  }
+  info.value_offset = field->GetOffset().Int32Value();
+
+  field = info.integer_cache->FindDeclaredStaticField("low", "I");
+  if (field == nullptr) {
+    return info;
+  }
+  info.low = field->GetInt(info.integer_cache);
+
+  field = info.integer_cache->FindDeclaredStaticField("high", "I");
+  if (field == nullptr) {
+    return info;
+  }
+  info.high = field->GetInt(info.integer_cache);
+
+  DCHECK_EQ(info.cache->GetLength(), info.high - info.low + 1);
+  return info;
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index 6425e13..9da5a7f 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -113,6 +113,39 @@
     codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
   }
 
+  static void ComputeIntegerValueOfLocations(HInvoke* invoke,
+                                             CodeGenerator* codegen,
+                                             Location return_location,
+                                             Location first_argument_location);
+
+  // Temporary data structure for holding Integer.valueOf useful data. We only
+  // use it if the mirror::Class* are in the boot image, so it is fine to keep raw
+  // mirror::Class pointers in this structure.
+  struct IntegerValueOfInfo {
+    IntegerValueOfInfo()
+        : integer_cache(nullptr),
+          integer(nullptr),
+          cache(nullptr),
+          low(0),
+          high(0),
+          value_offset(0) {}
+
+    // The java.lang.IntegerCache class.
+    mirror::Class* integer_cache;
+    // The java.lang.Integer class.
+    mirror::Class* integer;
+    // Value of java.lang.IntegerCache#cache.
+    mirror::ObjectArray<mirror::Object>* cache;
+    // Value of java.lang.IntegerCache#low.
+    int32_t low;
+    // Value of java.lang.IntegerCache#high.
+    int32_t high;
+    // The offset of java.lang.Integer.value.
+    int32_t value_offset;
+  };
+
+  static IntegerValueOfInfo ComputeIntegerValueOfInfo();
+
  protected:
   IntrinsicVisitor() {}
 
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index c262cf9..1808a99 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -129,6 +129,7 @@
 
 IntrinsicLocationsBuilderARM::IntrinsicLocationsBuilderARM(CodeGeneratorARM* codegen)
     : arena_(codegen->GetGraph()->GetArena()),
+      codegen_(codegen),
       assembler_(codegen->GetAssembler()),
       features_(codegen->GetInstructionSetFeatures()) {}
 
@@ -2644,6 +2645,74 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+void IntrinsicLocationsBuilderARM::VisitIntegerValueOf(HInvoke* invoke) {
+  InvokeRuntimeCallingConvention calling_convention;
+  IntrinsicVisitor::ComputeIntegerValueOfLocations(
+      invoke,
+      codegen_,
+      Location::RegisterLocation(R0),
+      Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+}
+
+void IntrinsicCodeGeneratorARM::VisitIntegerValueOf(HInvoke* invoke) {
+  IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
+  LocationSummary* locations = invoke->GetLocations();
+  ArmAssembler* const assembler = GetAssembler();
+
+  Register out = locations->Out().AsRegister<Register>();
+  InvokeRuntimeCallingConvention calling_convention;
+  Register argument = calling_convention.GetRegisterAt(0);
+  if (invoke->InputAt(0)->IsConstant()) {
+    int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
+    if (value >= info.low && value <= info.high) {
+      // Just embed the j.l.Integer in the code.
+      ScopedObjectAccess soa(Thread::Current());
+      mirror::Object* boxed = info.cache->Get(value + (-info.low));
+      DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
+      uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
+      __ LoadLiteral(out, codegen_->DeduplicateBootImageAddressLiteral(address));
+    } else {
+      // Allocate and initialize a new j.l.Integer.
+      // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
+      // JIT object table.
+      uint32_t address =
+          dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+      __ LoadLiteral(argument, codegen_->DeduplicateBootImageAddressLiteral(address));
+      codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+      CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+      __ LoadImmediate(IP, value);
+      __ StoreToOffset(kStoreWord, IP, out, info.value_offset);
+      // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
+      // one.
+      codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+    }
+  } else {
+    Register in = locations->InAt(0).AsRegister<Register>();
+    // Check bounds of our cache.
+    __ AddConstant(out, in, -info.low);
+    __ CmpConstant(out, info.high - info.low + 1);
+    Label allocate, done;
+    __ b(&allocate, HS);
+    // If the value is within the bounds, load the j.l.Integer directly from the array.
+    uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
+    uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
+    __ LoadLiteral(IP, codegen_->DeduplicateBootImageAddressLiteral(data_offset + address));
+    codegen_->LoadFromShiftedRegOffset(Primitive::kPrimNot, locations->Out(), IP, out);
+    __ b(&done);
+    __ Bind(&allocate);
+    // Otherwise allocate and initialize a new j.l.Integer.
+    address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+    __ LoadLiteral(argument, codegen_->DeduplicateBootImageAddressLiteral(address));
+    codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+    CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+    __ StoreToOffset(kStoreWord, in, out, info.value_offset);
+    // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
+    // one.
+    codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+    __ Bind(&done);
+  }
+}
+
 UNIMPLEMENTED_INTRINSIC(ARM, MathMinDoubleDouble)
 UNIMPLEMENTED_INTRINSIC(ARM, MathMinFloatFloat)
 UNIMPLEMENTED_INTRINSIC(ARM, MathMaxDoubleDouble)
diff --git a/compiler/optimizing/intrinsics_arm.h b/compiler/optimizing/intrinsics_arm.h
index 7f20ea4..2840863 100644
--- a/compiler/optimizing/intrinsics_arm.h
+++ b/compiler/optimizing/intrinsics_arm.h
@@ -51,6 +51,7 @@
 
  private:
   ArenaAllocator* arena_;
+  CodeGenerator* codegen_;
   ArmAssembler* assembler_;
 
   const ArmInstructionSetFeatures& features_;
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 86e5429..abb5c0a 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -2924,6 +2924,78 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+void IntrinsicLocationsBuilderARM64::VisitIntegerValueOf(HInvoke* invoke) {
+  InvokeRuntimeCallingConvention calling_convention;
+  IntrinsicVisitor::ComputeIntegerValueOfLocations(
+      invoke,
+      codegen_,
+      calling_convention.GetReturnLocation(Primitive::kPrimNot),
+      Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
+}
+
+void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) {
+  IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
+  LocationSummary* locations = invoke->GetLocations();
+  MacroAssembler* masm = GetVIXLAssembler();
+
+  Register out = RegisterFrom(locations->Out(), Primitive::kPrimNot);
+  UseScratchRegisterScope temps(masm);
+  Register temp = temps.AcquireW();
+  InvokeRuntimeCallingConvention calling_convention;
+  Register argument = calling_convention.GetRegisterAt(0);
+  if (invoke->InputAt(0)->IsConstant()) {
+    int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
+    if (value >= info.low && value <= info.high) {
+      // Just embed the j.l.Integer in the code.
+      ScopedObjectAccess soa(Thread::Current());
+      mirror::Object* boxed = info.cache->Get(value + (-info.low));
+      DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
+      uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
+      __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
+    } else {
+      // Allocate and initialize a new j.l.Integer.
+      // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
+      // JIT object table.
+      uint32_t address =
+          dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+      __ Ldr(argument.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
+      codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+      CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+      __ Mov(temp.W(), value);
+      __ Str(temp.W(), HeapOperand(out.W(), info.value_offset));
+      // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
+      // one.
+      codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+    }
+  } else {
+    Register in = RegisterFrom(locations->InAt(0), Primitive::kPrimInt);
+    // Check bounds of our cache.
+    __ Add(out.W(), in.W(), -info.low);
+    __ Cmp(out.W(), info.high - info.low + 1);
+    vixl::aarch64::Label allocate, done;
+    __ B(&allocate, hs);
+    // If the value is within the bounds, load the j.l.Integer directly from the array.
+    uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
+    uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
+    __ Ldr(temp.W(), codegen_->DeduplicateBootImageAddressLiteral(data_offset + address));
+    MemOperand source = HeapOperand(
+        temp, out.X(), LSL, Primitive::ComponentSizeShift(Primitive::kPrimNot));
+    codegen_->Load(Primitive::kPrimNot, out, source);
+    __ B(&done);
+    __ Bind(&allocate);
+    // Otherwise allocate and initialize a new j.l.Integer.
+    address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+    __ Ldr(argument.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
+    codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+    CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+    __ Str(in.W(), HeapOperand(out.W(), info.value_offset));
+    // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
+    // one.
+    codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+    __ Bind(&done);
+  }
+}
+
 UNIMPLEMENTED_INTRINSIC(ARM64, IntegerHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(ARM64, LongHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(ARM64, IntegerLowestOneBit)
diff --git a/compiler/optimizing/intrinsics_arm64.h b/compiler/optimizing/intrinsics_arm64.h
index 28e41cb..3c53517 100644
--- a/compiler/optimizing/intrinsics_arm64.h
+++ b/compiler/optimizing/intrinsics_arm64.h
@@ -38,7 +38,8 @@
 
 class IntrinsicLocationsBuilderARM64 FINAL : public IntrinsicVisitor {
  public:
-  explicit IntrinsicLocationsBuilderARM64(ArenaAllocator* arena) : arena_(arena) {}
+  explicit IntrinsicLocationsBuilderARM64(ArenaAllocator* arena, CodeGeneratorARM64* codegen)
+      : arena_(arena), codegen_(codegen) {}
 
   // Define visitor methods.
 
@@ -56,6 +57,7 @@
 
  private:
   ArenaAllocator* arena_;
+  CodeGeneratorARM64* codegen_;
 
   DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderARM64);
 };
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 70a3d38..fe05edd 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -203,6 +203,7 @@
 
 IntrinsicLocationsBuilderARMVIXL::IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL* codegen)
     : arena_(codegen->GetGraph()->GetArena()),
+      codegen_(codegen),
       assembler_(codegen->GetAssembler()),
       features_(codegen->GetInstructionSetFeatures()) {}
 
@@ -2988,6 +2989,76 @@
   __ Vrintm(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
 }
 
+void IntrinsicLocationsBuilderARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
+  InvokeRuntimeCallingConventionARMVIXL calling_convention;
+  IntrinsicVisitor::ComputeIntegerValueOfLocations(
+      invoke,
+      codegen_,
+      LocationFrom(r0),
+      LocationFrom(calling_convention.GetRegisterAt(0)));
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
+  IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
+  LocationSummary* locations = invoke->GetLocations();
+  ArmVIXLAssembler* const assembler = GetAssembler();
+
+  vixl32::Register out = RegisterFrom(locations->Out());
+  UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+  vixl32::Register temp = temps.Acquire();
+  InvokeRuntimeCallingConventionARMVIXL calling_convention;
+  vixl32::Register argument = calling_convention.GetRegisterAt(0);
+  if (invoke->InputAt(0)->IsConstant()) {
+    int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
+    if (value >= info.low && value <= info.high) {
+      // Just embed the j.l.Integer in the code.
+      ScopedObjectAccess soa(Thread::Current());
+      mirror::Object* boxed = info.cache->Get(value + (-info.low));
+      DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
+      uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
+      __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address));
+    } else {
+      // Allocate and initialize a new j.l.Integer.
+      // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
+      // JIT object table.
+      uint32_t address =
+          dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+      __ Ldr(argument, codegen_->DeduplicateBootImageAddressLiteral(address));
+      codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+      CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+      __ Mov(temp, value);
+      assembler->StoreToOffset(kStoreWord, temp, out, info.value_offset);
+      // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
+      // one.
+      codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+    }
+  } else {
+    vixl32::Register in = RegisterFrom(locations->InAt(0));
+    // Check bounds of our cache.
+    __ Add(out, in, -info.low);
+    __ Cmp(out, info.high - info.low + 1);
+    vixl32::Label allocate, done;
+    __ B(hs, &allocate);
+    // If the value is within the bounds, load the j.l.Integer directly from the array.
+    uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
+    uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
+    __ Ldr(temp, codegen_->DeduplicateBootImageAddressLiteral(data_offset + address));
+    codegen_->LoadFromShiftedRegOffset(Primitive::kPrimNot, locations->Out(), temp, out);
+    __ B(&done);
+    __ Bind(&allocate);
+    // Otherwise allocate and initialize a new j.l.Integer.
+    address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+    __ Ldr(argument, codegen_->DeduplicateBootImageAddressLiteral(address));
+    codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+    CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+    assembler->StoreToOffset(kStoreWord, in, out, info.value_offset);
+    // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
+    // one.
+    codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+    __ Bind(&done);
+  }
+}
+
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble)   // Could be done by changing rounding mode, maybe?
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundFloat)    // Could be done by changing rounding mode, maybe?
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong)     // High register pressure.
diff --git a/compiler/optimizing/intrinsics_arm_vixl.h b/compiler/optimizing/intrinsics_arm_vixl.h
index 6e79cb7..023cba1 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.h
+++ b/compiler/optimizing/intrinsics_arm_vixl.h
@@ -47,6 +47,7 @@
 
  private:
   ArenaAllocator* arena_;
+  CodeGenerator* codegen_;
   ArmVIXLAssembler* assembler_;
   const ArmInstructionSetFeatures& features_;
 
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 64a6840..536b7f6 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -2682,6 +2682,8 @@
 UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetLong)
 UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetObject)
 
+UNIMPLEMENTED_INTRINSIC(MIPS, IntegerValueOf)
+
 UNREACHABLE_INTRINSICS(MIPS)
 
 #undef __
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 3888828..1112eed 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -2075,6 +2075,8 @@
 UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetLong)
 UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetObject)
 
+UNIMPLEMENTED_INTRINSIC(MIPS64, IntegerValueOf)
+
 UNREACHABLE_INTRINSICS(MIPS64)
 
 #undef __
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index e1b7ea5..432984e 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -3335,6 +3335,64 @@
   __ Bind(intrinsic_slow_path->GetExitLabel());
 }
 
+void IntrinsicLocationsBuilderX86::VisitIntegerValueOf(HInvoke* invoke) {
+  InvokeRuntimeCallingConvention calling_convention;
+  IntrinsicVisitor::ComputeIntegerValueOfLocations(
+      invoke,
+      codegen_,
+      Location::RegisterLocation(EAX),
+      Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+}
+
+void IntrinsicCodeGeneratorX86::VisitIntegerValueOf(HInvoke* invoke) {
+  IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
+  LocationSummary* locations = invoke->GetLocations();
+  X86Assembler* assembler = GetAssembler();
+
+  Register out = locations->Out().AsRegister<Register>();
+  InvokeRuntimeCallingConvention calling_convention;
+  if (invoke->InputAt(0)->IsConstant()) {
+    int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
+    if (value >= info.low && value <= info.high) {
+      // Just embed the j.l.Integer in the code.
+      ScopedObjectAccess soa(Thread::Current());
+      mirror::Object* boxed = info.cache->Get(value + (-info.low));
+      DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
+      uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
+      __ movl(out, Immediate(address));
+    } else {
+      // Allocate and initialize a new j.l.Integer.
+      // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
+      // JIT object table.
+      uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+      __ movl(calling_convention.GetRegisterAt(0), Immediate(address));
+      codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+      CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+      __ movl(Address(out, info.value_offset), Immediate(value));
+    }
+  } else {
+    Register in = locations->InAt(0).AsRegister<Register>();
+    // Check bounds of our cache.
+    __ leal(out, Address(in, -info.low));
+    __ cmpl(out, Immediate(info.high - info.low + 1));
+    NearLabel allocate, done;
+    __ j(kAboveEqual, &allocate);
+    // If the value is within the bounds, load the j.l.Integer directly from the array.
+    uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
+    uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
+    __ movl(out, Address(out, TIMES_4, data_offset + address));
+    __ jmp(&done);
+    __ Bind(&allocate);
+    // Otherwise allocate and initialize a new j.l.Integer.
+    address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+    __ movl(calling_convention.GetRegisterAt(0), Immediate(address));
+    codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+    CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+    __ movl(Address(out, info.value_offset), in);
+    __ Bind(&done);
+  }
+}
+
 UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble)
 UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite)
 UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite)
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 05d270a..57992a9 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -2995,6 +2995,64 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+void IntrinsicLocationsBuilderX86_64::VisitIntegerValueOf(HInvoke* invoke) {
+  InvokeRuntimeCallingConvention calling_convention;
+  IntrinsicVisitor::ComputeIntegerValueOfLocations(
+      invoke,
+      codegen_,
+      Location::RegisterLocation(RAX),
+      Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitIntegerValueOf(HInvoke* invoke) {
+  IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
+  LocationSummary* locations = invoke->GetLocations();
+  X86_64Assembler* assembler = GetAssembler();
+
+  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+  InvokeRuntimeCallingConvention calling_convention;
+  if (invoke->InputAt(0)->IsConstant()) {
+    int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
+    if (value >= info.low && value <= info.high) {
+      // Just embed the j.l.Integer in the code.
+      ScopedObjectAccess soa(Thread::Current());
+      mirror::Object* boxed = info.cache->Get(value + (-info.low));
+      DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
+      uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
+      __ movl(out, Immediate(address));
+    } else {
+      // Allocate and initialize a new j.l.Integer.
+      // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
+      // JIT object table.
+      uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+      __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(address));
+      codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+      CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+      __ movl(Address(out, info.value_offset), Immediate(value));
+    }
+  } else {
+    CpuRegister in = locations->InAt(0).AsRegister<CpuRegister>();
+    // Check bounds of our cache.
+    __ leal(out, Address(in, -info.low));
+    __ cmpl(out, Immediate(info.high - info.low + 1));
+    NearLabel allocate, done;
+    __ j(kAboveEqual, &allocate);
+    // If the value is within the bounds, load the j.l.Integer directly from the array.
+    uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
+    uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
+    __ movl(out, Address(out, TIMES_4, data_offset + address));
+    __ jmp(&done);
+    __ Bind(&allocate);
+    // Otherwise allocate and initialize a new j.l.Integer.
+    address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+    __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(address));
+    codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+    CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+    __ movl(Address(out, info.value_offset), in);
+    __ Bind(&done);
+  }
+}
+
 UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite)
 UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite)
 
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 8a9e618..c39aed2 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1914,6 +1914,9 @@
 
   virtual bool IsControlFlow() const { return false; }
 
+  // Can the instruction throw?
+  // TODO: We should rename to CanVisiblyThrow, as some instructions (like HNewInstance),
+  // could throw OOME, but it is still OK to remove them if they are unused.
   virtual bool CanThrow() const { return false; }
   bool CanThrowIntoCatchBlock() const { return CanThrow() && block_->IsTryBlock(); }
 
diff --git a/runtime/image.cc b/runtime/image.cc
index 243051e..88f28f3 100644
--- a/runtime/image.cc
+++ b/runtime/image.cc
@@ -25,7 +25,7 @@
 namespace art {
 
 const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' };
-const uint8_t ImageHeader::kImageVersion[] = { '0', '3', '9', '\0' };  // Enable string compression.
+const uint8_t ImageHeader::kImageVersion[] = { '0', '4', '0', '\0' };  // Integer.valueOf intrinsic
 
 ImageHeader::ImageHeader(uint32_t image_begin,
                          uint32_t image_size,
diff --git a/test/640-checker-integer-valueof/expected.txt b/test/640-checker-integer-valueof/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/640-checker-integer-valueof/expected.txt
diff --git a/test/640-checker-integer-valueof/info.txt b/test/640-checker-integer-valueof/info.txt
new file mode 100644
index 0000000..51021a4
--- /dev/null
+++ b/test/640-checker-integer-valueof/info.txt
@@ -0,0 +1 @@
+Test for Integer.valueOf.
diff --git a/test/640-checker-integer-valueof/src/Main.java b/test/640-checker-integer-valueof/src/Main.java
new file mode 100644
index 0000000..0837fd1
--- /dev/null
+++ b/test/640-checker-integer-valueof/src/Main.java
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  /// CHECK-START: java.lang.Integer Main.foo(int) disassembly (after)
+  /// CHECK: <<Integer:l\d+>>     InvokeStaticOrDirect method_name:java.lang.Integer.valueOf intrinsic:IntegerValueOf
+  /// CHECK:                      pAllocObjectInitialized
+  /// CHECK:                      Return [<<Integer>>]
+  public static Integer foo(int a) {
+    return Integer.valueOf(a);
+  }
+
+  /// CHECK-START: java.lang.Integer Main.foo2() disassembly (after)
+  /// CHECK: <<Integer:l\d+>>     InvokeStaticOrDirect method_name:java.lang.Integer.valueOf intrinsic:IntegerValueOf
+  /// CHECK-NOT:                  pAllocObjectInitialized
+  /// CHECK:                      Return [<<Integer>>]
+  public static Integer foo2() {
+    return Integer.valueOf(-42);
+  }
+
+  /// CHECK-START: java.lang.Integer Main.foo3() disassembly (after)
+  /// CHECK: <<Integer:l\d+>>     InvokeStaticOrDirect method_name:java.lang.Integer.valueOf intrinsic:IntegerValueOf
+  /// CHECK-NOT:                  pAllocObjectInitialized
+  /// CHECK:                      Return [<<Integer>>]
+  public static Integer foo3() {
+    return Integer.valueOf(42);
+  }
+
+  /// CHECK-START: java.lang.Integer Main.foo4() disassembly (after)
+  /// CHECK: <<Integer:l\d+>>     InvokeStaticOrDirect method_name:java.lang.Integer.valueOf intrinsic:IntegerValueOf
+  /// CHECK:                      pAllocObjectInitialized
+  /// CHECK:                      Return [<<Integer>>]
+  public static Integer foo4() {
+    return Integer.valueOf(55555);
+  }
+
+  public static void main(String[] args) {
+    assertEqual("42", foo(intField));
+    assertEqual(foo(intField), foo(intField2));
+    assertEqual("-42", foo2());
+    assertEqual("42", foo3());
+    assertEqual("55555", foo4());
+    assertEqual("55555", foo(intField3));
+    assertEqual("-129", foo(intFieldMinus129));
+    assertEqual("-128", foo(intFieldMinus128));
+    assertEqual(foo(intFieldMinus128), foo(intFieldMinus128));
+    assertEqual("-127", foo(intFieldMinus127));
+    assertEqual(foo(intFieldMinus127), foo(intFieldMinus127));
+    assertEqual("126", foo(intField126));
+    assertEqual(foo(intField126), foo(intField126));
+    assertEqual("127", foo(intField127));
+    assertEqual(foo(intField127), foo(intField127));
+    assertEqual("128", foo(intField128));
+  }
+
+  static void assertEqual(String a, Integer b) {
+    if (!a.equals(b.toString())) {
+      throw new Error("Expected " + a + ", got " + b);
+    }
+  }
+
+  static void assertEqual(Integer a, Integer b) {
+    if (a != b) {
+      throw new Error("Expected " + a + ", got " + b);
+    }
+  }
+
+  static int intField = 42;
+  static int intField2 = 42;
+  static int intField3 = 55555;
+
+  // Edge cases.
+  static int intFieldMinus129 = -129;
+  static int intFieldMinus128 = -128;
+  static int intFieldMinus127 = -127;
+  static int intField126 = 126;
+  static int intField127 = 127;
+  static int intField128 = 128;
+}