Merge "ART: Compress LengthPrefixedArray on 32-bit targets."
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk
index 3a1bd09..9f15294 100644
--- a/build/Android.common_build.mk
+++ b/build/Android.common_build.mk
@@ -275,6 +275,16 @@
   -DVIXL_DEBUG \
   -UNDEBUG
 
+# The latest clang update trips over many of the files in art and never finishes
+# compiling for aarch64 with -O3 (or -O2). Drop back to -O1 while we investigate
+# to stop punishing the build server.
+ifeq ($(TARGET_ARCH),arm64)
+  ifeq ($(USE_CLANG_PLATFORM_BUILD),true)
+    art_debug_cflags += -O1
+    art_non_debug_cflags += -O1
+  endif
+endif
+
 art_host_non_debug_cflags := $(art_non_debug_cflags)
 art_target_non_debug_cflags := $(art_non_debug_cflags)
 
@@ -299,6 +309,16 @@
 ART_HOST_CFLAGS += -DART_DEFAULT_INSTRUCTION_SET_FEATURES=default
 ART_HOST_ASFLAGS += $(art_asflags)
 
+# Disable -Wpessimizing-move: triggered for art/runtime/base/variant_map.h:261
+# Adding this flag to art_clang_cflags doesn't work because -Wall gets added to
+# ART_HOST_CFLAGS (as a part of art_cflags) after
+# -Wno-pessimizing-move.  Instead, add the flag here to both
+# ART_TARGET_CLANG_CFLAGS and ART_HOST_CFLAGS
+ifeq ($(ART_HOST_CLANG),true)
+ART_HOST_CFLAGS += -Wno-pessimizing-move
+endif
+ART_TARGET_CLANG_CFLAGS += -Wno-pessimizing-move
+
 ifndef LIBART_IMG_TARGET_BASE_ADDRESS
   $(error LIBART_IMG_TARGET_BASE_ADDRESS unset)
 endif
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 3c8be27..4471d71 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -945,6 +945,97 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+void IntrinsicLocationsBuilderX86::VisitStringEquals(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+
+  // Request temporary registers, ECX and EDI needed for repe_cmpsl instruction.
+  locations->AddTemp(Location::RegisterLocation(ECX));
+  locations->AddTemp(Location::RegisterLocation(EDI));
+
+  // Set output, ESI needed for repe_cmpsl instruction anyways.
+  locations->SetOut(Location::RegisterLocation(ESI), Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorX86::VisitStringEquals(HInvoke* invoke) {
+  X86Assembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register str = locations->InAt(0).AsRegister<Register>();
+  Register arg = locations->InAt(1).AsRegister<Register>();
+  Register ecx = locations->GetTemp(0).AsRegister<Register>();
+  Register edi = locations->GetTemp(1).AsRegister<Register>();
+  Register esi = locations->Out().AsRegister<Register>();
+
+  Label end;
+  Label return_true;
+  Label return_false;
+
+  // Get offsets of count, value, and class fields within a string object.
+  const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+  const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
+  const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
+
+  // Note that the null check must have been done earlier.
+  DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+  // Check if input is null, return false if it is.
+  __ testl(arg, arg);
+  __ j(kEqual, &return_false);
+
+  // Instanceof check for the argument by comparing class fields.
+  // All string objects must have the same type since String cannot be subclassed.
+  // Receiver must be a string object, so its class field is equal to all strings' class fields.
+  // If the argument is a string object, its class field must be equal to receiver's class field.
+  __ movl(ecx, Address(str, class_offset));
+  __ cmpl(ecx, Address(arg, class_offset));
+  __ j(kNotEqual, &return_false);
+
+  // Reference equality check, return true if same reference.
+  __ cmpl(str, arg);
+  __ j(kEqual, &return_true);
+
+  // Load length of receiver string.
+  __ movl(ecx, Address(str, count_offset));
+  // Check if lengths are equal, return false if they're not.
+  __ cmpl(ecx, Address(arg, count_offset));
+  __ j(kNotEqual, &return_false);
+  // Return true if both strings are empty.
+  __ testl(ecx, ecx);
+  __ j(kEqual, &return_true);
+
+  // Load starting addresses of string values into ESI/EDI as required for repe_cmpsl instruction.
+  __ leal(esi, Address(str, value_offset));
+  __ leal(edi, Address(arg, value_offset));
+
+  // Divide string length by 2 to compare characters 2 at a time and adjust for odd lengths.
+  __ addl(ecx, Immediate(1));
+  __ shrl(ecx, Immediate(1));
+
+  // Assertions that must hold in order to compare strings 2 characters at a time.
+  DCHECK_ALIGNED(value_offset, 4);
+  static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded");
+
+  // Loop to compare strings two characters at a time starting at the beginning of the string.
+  __ repe_cmpsl();
+  // If strings are not equal, zero flag will be cleared.
+  __ j(kNotEqual, &return_false);
+
+  // Return true and exit the function.
+  // If loop does not result in returning false, we return true.
+  __ Bind(&return_true);
+  __ movl(esi, Immediate(1));
+  __ jmp(&end);
+
+  // Return false and exit the function.
+  __ Bind(&return_false);
+  __ xorl(esi, esi);
+  __ Bind(&end);
+}
+
 static void CreateStringIndexOfLocations(HInvoke* invoke,
                                          ArenaAllocator* allocator,
                                          bool start_at_zero) {
@@ -1758,7 +1849,6 @@
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(IntegerNumberOfLeadingZeros)
 UNIMPLEMENTED_INTRINSIC(LongNumberOfLeadingZeros)
-UNIMPLEMENTED_INTRINSIC(StringEquals)
 
 #undef UNIMPLEMENTED_INTRINSIC
 
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index b4926c2..9ea68ec 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -854,6 +854,97 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+void IntrinsicLocationsBuilderX86_64::VisitStringEquals(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+
+  // Request temporary registers, RCX and RDI needed for repe_cmpsq instruction.
+  locations->AddTemp(Location::RegisterLocation(RCX));
+  locations->AddTemp(Location::RegisterLocation(RDI));
+
+  // Set output, RSI needed for repe_cmpsq instruction anyways.
+  locations->SetOut(Location::RegisterLocation(RSI), Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitStringEquals(HInvoke* invoke) {
+  X86_64Assembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  CpuRegister str = locations->InAt(0).AsRegister<CpuRegister>();
+  CpuRegister arg = locations->InAt(1).AsRegister<CpuRegister>();
+  CpuRegister rcx = locations->GetTemp(0).AsRegister<CpuRegister>();
+  CpuRegister rdi = locations->GetTemp(1).AsRegister<CpuRegister>();
+  CpuRegister rsi = locations->Out().AsRegister<CpuRegister>();
+
+  Label end;
+  Label return_true;
+  Label return_false;
+
+  // Get offsets of count, value, and class fields within a string object.
+  const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+  const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
+  const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
+
+  // Note that the null check must have been done earlier.
+  DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+  // Check if input is null, return false if it is.
+  __ testl(arg, arg);
+  __ j(kEqual, &return_false);
+
+  // Instanceof check for the argument by comparing class fields.
+  // All string objects must have the same type since String cannot be subclassed.
+  // Receiver must be a string object, so its class field is equal to all strings' class fields.
+  // If the argument is a string object, its class field must be equal to receiver's class field.
+  __ movl(rcx, Address(str, class_offset));
+  __ cmpl(rcx, Address(arg, class_offset));
+  __ j(kNotEqual, &return_false);
+
+  // Reference equality check, return true if same reference.
+  __ cmpl(str, arg);
+  __ j(kEqual, &return_true);
+
+  // Load length of receiver string.
+  __ movl(rcx, Address(str, count_offset));
+  // Check if lengths are equal, return false if they're not.
+  __ cmpl(rcx, Address(arg, count_offset));
+  __ j(kNotEqual, &return_false);
+  // Return true if both strings are empty.
+  __ testl(rcx, rcx);
+  __ j(kEqual, &return_true);
+
+  // Load starting addresses of string values into RSI/RDI as required for repe_cmpsq instruction.
+  __ leal(rsi, Address(str, value_offset));
+  __ leal(rdi, Address(arg, value_offset));
+
+  // Divide string length by 4 and adjust for lengths not divisible by 4.
+  __ addl(rcx, Immediate(3));
+  __ shrl(rcx, Immediate(2));
+
+  // Assertions that must hold in order to compare strings 4 characters at a time.
+  DCHECK_ALIGNED(value_offset, 8);
+  static_assert(IsAligned<8>(kObjectAlignment), "String is not zero padded");
+
+  // Loop to compare strings four characters at a time starting at the beginning of the string.
+  __ repe_cmpsq();
+  // If strings are not equal, zero flag will be cleared.
+  __ j(kNotEqual, &return_false);
+
+  // Return true and exit the function.
+  // If loop does not result in returning false, we return true.
+  __ Bind(&return_true);
+  __ movl(rsi, Immediate(1));
+  __ jmp(&end);
+
+  // Return false and exit the function.
+  __ Bind(&return_false);
+  __ xorl(rsi, rsi);
+  __ Bind(&end);
+}
+
 static void CreateStringIndexOfLocations(HInvoke* invoke,
                                          ArenaAllocator* allocator,
                                          bool start_at_zero) {
@@ -1607,7 +1698,6 @@
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(IntegerNumberOfLeadingZeros)
 UNIMPLEMENTED_INTRINSIC(LongNumberOfLeadingZeros)
-UNIMPLEMENTED_INTRINSIC(StringEquals)
 
 #undef UNIMPLEMENTED_INTRINSIC
 
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 8c2a3ed..9b3d792 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -158,6 +158,20 @@
   EmitUint8(0xC8 + dst);
 }
 
+void X86Assembler::bsrl(Register dst, Register src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0xBD);
+  EmitRegisterOperand(dst, src);
+}
+
+void X86Assembler::bsrl(Register dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0xBD);
+  EmitOperand(dst, src);
+}
+
 void X86Assembler::movzxb(Register dst, ByteRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x0F);
@@ -1552,6 +1566,14 @@
 }
 
 
+void X86Assembler::rep_movsw() {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0xF3);
+  EmitUint8(0xA5);
+}
+
+
 X86Assembler* X86Assembler::lock() {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xF0);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 37c69fe..a9227f3 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -234,6 +234,8 @@
   void movntl(const Address& dst, Register src);
 
   void bswapl(Register dst);
+  void bsrl(Register dst, Register src);
+  void bsrl(Register dst, const Address& src);
 
   void movzxb(Register dst, ByteRegister src);
   void movzxb(Register dst, const Address& src);
@@ -470,6 +472,7 @@
   void repne_scasw();
   void repe_cmpsw();
   void repe_cmpsl();
+  void rep_movsw();
 
   X86Assembler* lock();
   void cmpxchgl(const Address& address, Register reg);
@@ -649,7 +652,6 @@
   void EmitComplex(int rm, const Operand& operand, const Immediate& immediate);
   void EmitLabel(Label* label, int instruction_size);
   void EmitLabelLink(Label* label);
-  void EmitNearLabelLink(Label* label);
 
   void EmitGenericShift(int rm, const Operand& operand, const Immediate& imm);
   void EmitGenericShift(int rm, const Operand& operand, Register shifter);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index b664d23..731b5f4 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -218,4 +218,29 @@
   DriverStr(expected, "Repecmpsl");
 }
 
+TEST_F(AssemblerX86Test, RepneScasw) {
+  GetAssembler()->repne_scasw();
+  const char* expected = "repne scasw\n";
+  DriverStr(expected, "repne_scasw");
+}
+
+TEST_F(AssemblerX86Test, RepMovsw) {
+  GetAssembler()->rep_movsw();
+  const char* expected = "rep movsw\n";
+  DriverStr(expected, "rep_movsw");
+}
+
+TEST_F(AssemblerX86Test, Bsrl) {
+  DriverStr(RepeatRR(&x86::X86Assembler::bsrl, "bsrl %{reg2}, %{reg1}"), "bsrl");
+}
+
+TEST_F(AssemblerX86Test, BsrlAddress) {
+  GetAssembler()->bsrl(x86::Register(x86::EDI), x86::Address(
+      x86::Register(x86::EDI), x86::Register(x86::EBX), x86::TIMES_4, 12));
+  const char* expected =
+    "bsrl 0xc(%EDI,%EBX,4), %EDI\n";
+
+  DriverStr(expected, "bsrl_address");
+}
+
 }  // namespace art
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 22e7b9b..dc61c99 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -2006,6 +2006,14 @@
 }
 
 
+void X86_64Assembler::rep_movsw() {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0xF3);
+  EmitUint8(0xA5);
+}
+
+
 X86_64Assembler* X86_64Assembler::lock() {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xF0);
@@ -2084,6 +2092,37 @@
   EmitUint8(0xC8 + dst.LowBits());
 }
 
+void X86_64Assembler::bsrl(CpuRegister dst, CpuRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xBD);
+  EmitRegisterOperand(dst.LowBits(), src.LowBits());
+}
+
+void X86_64Assembler::bsrl(CpuRegister dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xBD);
+  EmitOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::bsrq(CpuRegister dst, CpuRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xBD);
+  EmitRegisterOperand(dst.LowBits(), src.LowBits());
+}
+
+void X86_64Assembler::bsrq(CpuRegister dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xBD);
+  EmitOperand(dst.LowBits(), src);
+}
 
 void X86_64Assembler::repne_scasw() {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index beca037..da42213 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -606,10 +606,16 @@
   void bswapl(CpuRegister dst);
   void bswapq(CpuRegister dst);
 
+  void bsrl(CpuRegister dst, CpuRegister src);
+  void bsrl(CpuRegister dst, const Address& src);
+  void bsrq(CpuRegister dst, CpuRegister src);
+  void bsrq(CpuRegister dst, const Address& src);
+
   void repne_scasw();
   void repe_cmpsw();
   void repe_cmpsl();
   void repe_cmpsq();
+  void rep_movsw();
 
   //
   // Macros for High-level operations.
@@ -803,7 +809,6 @@
   void EmitComplex(uint8_t rm, const Operand& operand, const Immediate& immediate);
   void EmitLabel(Label* label, int instruction_size);
   void EmitLabelLink(Label* label);
-  void EmitNearLabelLink(Label* label);
 
   void EmitGenericShift(bool wide, int rm, CpuRegister reg, const Immediate& imm);
   void EmitGenericShift(bool wide, int rm, CpuRegister operand, CpuRegister shifter);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 296487e..8673f03 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -836,6 +836,18 @@
   DriverStr(expected, "xorq");
 }
 
+TEST_F(AssemblerX86_64Test, RepneScasw) {
+  GetAssembler()->repne_scasw();
+  const char* expected = "repne scasw\n";
+  DriverStr(expected, "repne_scasw");
+}
+
+TEST_F(AssemblerX86_64Test, RepMovsw) {
+  GetAssembler()->rep_movsw();
+  const char* expected = "rep movsw\n";
+  DriverStr(expected, "rep_movsw");
+}
+
 TEST_F(AssemblerX86_64Test, Movsxd) {
   DriverStr(RepeatRr(&x86_64::X86_64Assembler::movsxd, "movsxd %{reg2}, %{reg1}"), "movsxd");
 }
@@ -1129,6 +1141,44 @@
   DriverStr(RepeatR(&x86_64::X86_64Assembler::bswapq, "bswap %{reg}"), "bswapq");
 }
 
+TEST_F(AssemblerX86_64Test, Bsrl) {
+  DriverStr(Repeatrr(&x86_64::X86_64Assembler::bsrl, "bsrl %{reg2}, %{reg1}"), "bsrl");
+}
+
+TEST_F(AssemblerX86_64Test, BsrlAddress) {
+  GetAssembler()->bsrl(x86_64::CpuRegister(x86_64::R10), x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
+  GetAssembler()->bsrl(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
+      x86_64::CpuRegister(x86_64::R10), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
+  GetAssembler()->bsrl(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12));
+  const char* expected =
+    "bsrl 0xc(%RDI,%RBX,4), %R10d\n"
+    "bsrl 0xc(%R10,%RBX,4), %edi\n"
+    "bsrl 0xc(%RDI,%R9,4), %edi\n";
+
+  DriverStr(expected, "bsrl_address");
+}
+
+TEST_F(AssemblerX86_64Test, Bsrq) {
+  DriverStr(RepeatRR(&x86_64::X86_64Assembler::bsrq, "bsrq %{reg2}, %{reg1}"), "bsrq");
+}
+
+TEST_F(AssemblerX86_64Test, BsrqAddress) {
+  GetAssembler()->bsrq(x86_64::CpuRegister(x86_64::R10), x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
+  GetAssembler()->bsrq(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
+      x86_64::CpuRegister(x86_64::R10), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
+  GetAssembler()->bsrq(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12));
+  const char* expected =
+    "bsrq 0xc(%RDI,%RBX,4), %R10\n"
+    "bsrq 0xc(%R10,%RBX,4), %RDI\n"
+    "bsrq 0xc(%RDI,%R9,4), %RDI\n";
+
+  DriverStr(expected, "bsrq_address");
+}
+
 std::string setcc_test_fn(AssemblerX86_64Test::Base* assembler_test,
                           x86_64::X86_64Assembler* assembler) {
   // From Condition
diff --git a/dexdump/dexdump.cc b/dexdump/dexdump.cc
index 84c465f..282db5d 100644
--- a/dexdump/dexdump.cc
+++ b/dexdump/dexdump.cc
@@ -38,11 +38,14 @@
 #include <inttypes.h>
 #include <stdio.h>
 
+#include <iostream>
 #include <memory>
+#include <sstream>
 #include <vector>
 
 #include "dex_file-inl.h"
 #include "dex_instruction-inl.h"
+#include "utils.h"
 
 namespace art {
 
@@ -1046,6 +1049,49 @@
 }
 
 /*
+ * Dumping a CFG. Note that this will do duplicate work. utils.h doesn't expose the code-item
+ * version, so the DumpMethodCFG code will have to iterate again to find it. But dexdump is a
+ * tool, so this is not performance-critical.
+ */
+
+static void dumpCfg(const DexFile* dex_file,
+                    uint32_t dex_method_idx,
+                    const DexFile::CodeItem* code_item) {
+  if (code_item != nullptr) {
+    std::ostringstream oss;
+    DumpMethodCFG(dex_file, dex_method_idx, oss);
+    fprintf(gOutFile, "%s", oss.str().c_str());
+  }
+}
+
+static void dumpCfg(const DexFile* dex_file, int idx) {
+  const DexFile::ClassDef& class_def = dex_file->GetClassDef(idx);
+  const uint8_t* class_data = dex_file->GetClassData(class_def);
+  if (class_data == nullptr) {  // empty class such as a marker interface?
+    return;
+  }
+  ClassDataItemIterator it(*dex_file, class_data);
+  while (it.HasNextStaticField()) {
+    it.Next();
+  }
+  while (it.HasNextInstanceField()) {
+    it.Next();
+  }
+  while (it.HasNextDirectMethod()) {
+    dumpCfg(dex_file,
+            it.GetMemberIndex(),
+            it.GetMethodCodeItem());
+    it.Next();
+  }
+  while (it.HasNextVirtualMethod()) {
+    dumpCfg(dex_file,
+                it.GetMemberIndex(),
+                it.GetMethodCodeItem());
+    it.Next();
+  }
+}
+
+/*
  * Dumps the class.
  *
  * Note "idx" is a DexClassDef index, not a DexTypeId index.
@@ -1061,6 +1107,11 @@
     return;
   }
 
+  if (gOptions.cfg) {
+    dumpCfg(pDexFile, idx);
+    return;
+  }
+
   // For the XML output, show the package name.  Ideally we'd gather
   // up the classes, sort them, and dump them alphabetically so the
   // package name wouldn't jump around, but that's not a great plan
diff --git a/dexdump/dexdump.h b/dexdump/dexdump.h
index f2cd16a..50280a9 100644
--- a/dexdump/dexdump.h
+++ b/dexdump/dexdump.h
@@ -45,6 +45,7 @@
   bool showFileHeaders;
   bool showSectionHeaders;
   bool verbose;
+  bool cfg;
   OutputFormat outputFormat;
   const char* outputFileName;
   const char* tempFileName;
diff --git a/dexdump/dexdump_main.cc b/dexdump/dexdump_main.cc
index 9be0922..2466f33 100644
--- a/dexdump/dexdump_main.cc
+++ b/dexdump/dexdump_main.cc
@@ -46,6 +46,7 @@
   fprintf(stderr, " -c : verify checksum and exit\n");
   fprintf(stderr, " -d : disassemble code sections\n");
   fprintf(stderr, " -f : display summary information from file header\n");
+  fprintf(stderr, " -g : dump CFG for dex\n");
   fprintf(stderr, " -h : display file header details\n");
   fprintf(stderr, " -i : ignore checksum failures\n");
   fprintf(stderr, " -l : output layout, either 'plain' or 'xml'\n");
@@ -68,7 +69,7 @@
 
   // Parse all arguments.
   while (1) {
-    const int ic = getopt(argc, argv, "cdfhil:t:o:");
+    const int ic = getopt(argc, argv, "cdfghil:t:o:");
     if (ic < 0) {
       break;  // done
     }
@@ -82,6 +83,9 @@
       case 'f':  // dump outer file header
         gOptions.showFileHeaders = true;
         break;
+      case 'g':  // dump cfg
+        gOptions.cfg = true;
+        break;
       case 'h':  // dump section headers, i.e. all meta-data
         gOptions.showSectionHeaders = true;
         break;
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index 44787a7..d4574f4 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -928,6 +928,11 @@
         has_modrm = true;
         load = true;
         break;
+      case 0xBD:
+        opcode1 = "bsr";
+        has_modrm = true;
+        load = true;
+        break;
       case 0xBE:
         opcode1 = "movsxb";
         has_modrm = true;
@@ -1117,6 +1122,9 @@
       opcode1 = opcode_tmp.c_str();
     }
     break;
+  case 0xA5:
+    opcode1 = (prefix[2] == 0x66 ? "movsw" : "movsl");
+    break;
   case 0xA7:
     opcode1 = (prefix[2] == 0x66 ? "cmpsw" : "cmpsl");
     break;
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index c11c134..fc2a801 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -614,7 +614,9 @@
   for (size_t i = 0; i < count; ++i) {
     auto* root = roots[i];
     auto ref = StackReference<mirror::Object>::FromMirrorPtr(*root);
-    MarkObject(&ref);
+    // The root can be in the to-space since we may visit the declaring class of an ArtMethod
+    // multiple times if it is on the call stack.
+    MarkObjectIfNotInToSpace(&ref);
     if (*root != ref.AsMirrorPtr()) {
       *root = ref.AsMirrorPtr();
     }
@@ -624,7 +626,7 @@
 void SemiSpace::VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count,
                            const RootInfo& info ATTRIBUTE_UNUSED) {
   for (size_t i = 0; i < count; ++i) {
-    MarkObject(roots[i]);
+    MarkObjectIfNotInToSpace(roots[i]);
   }
 }
 
diff --git a/runtime/utils.cc b/runtime/utils.cc
index 20512f9..db3f2fe 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -30,6 +30,7 @@
 #include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
 #include "dex_file-inl.h"
+#include "dex_instruction.h"
 #include "mirror/class-inl.h"
 #include "mirror/class_loader.h"
 #include "mirror/object-inl.h"
@@ -1452,4 +1453,372 @@
   return PrettyDescriptor(Primitive::Descriptor(type));
 }
 
+static void DumpMethodCFGImpl(const DexFile* dex_file,
+                              uint32_t dex_method_idx,
+                              const DexFile::CodeItem* code_item,
+                              std::ostream& os) {
+  os << "digraph {\n";
+  os << "  # /* " << PrettyMethod(dex_method_idx, *dex_file, true) << " */\n";
+
+  std::set<uint32_t> dex_pc_is_branch_target;
+  {
+    // Go and populate.
+    const Instruction* inst = Instruction::At(code_item->insns_);
+    for (uint32_t dex_pc = 0;
+         dex_pc < code_item->insns_size_in_code_units_;
+         dex_pc += inst->SizeInCodeUnits(), inst = inst->Next()) {
+      if (inst->IsBranch()) {
+        dex_pc_is_branch_target.insert(dex_pc + inst->GetTargetOffset());
+      } else if (inst->IsSwitch()) {
+        const uint16_t* insns = code_item->insns_ + dex_pc;
+        int32_t switch_offset = insns[1] | ((int32_t) insns[2]) << 16;
+        const uint16_t* switch_insns = insns + switch_offset;
+        uint32_t switch_count = switch_insns[1];
+        int32_t targets_offset;
+        if ((*insns & 0xff) == Instruction::PACKED_SWITCH) {
+          /* 0=sig, 1=count, 2/3=firstKey */
+          targets_offset = 4;
+        } else {
+          /* 0=sig, 1=count, 2..count*2 = keys */
+          targets_offset = 2 + 2 * switch_count;
+        }
+        for (uint32_t targ = 0; targ < switch_count; targ++) {
+          int32_t offset = (int32_t) switch_insns[targets_offset + targ * 2] |
+              (int32_t) (switch_insns[targets_offset + targ * 2 + 1] << 16);
+          dex_pc_is_branch_target.insert(dex_pc + offset);
+        }
+      }
+    }
+  }
+
+  // Create nodes for "basic blocks."
+  std::map<uint32_t, uint32_t> dex_pc_to_node_id;  // This only has entries for block starts.
+  std::map<uint32_t, uint32_t> dex_pc_to_incl_id;  // This has entries for all dex pcs.
+
+  {
+    const Instruction* inst = Instruction::At(code_item->insns_);
+    bool first_in_block = true;
+    bool force_new_block = false;
+    for (uint32_t dex_pc = 0; dex_pc < code_item->insns_size_in_code_units_;
+        dex_pc += inst->SizeInCodeUnits(), inst = inst->Next()) {
+      if (dex_pc == 0 ||
+          (dex_pc_is_branch_target.find(dex_pc) != dex_pc_is_branch_target.end()) ||
+          force_new_block) {
+        uint32_t id = dex_pc_to_node_id.size();
+        if (id > 0) {
+          // End last node.
+          os << "}\"];\n";
+        }
+        // Start next node.
+        os << "  node" << id << " [shape=record,label=\"{";
+        dex_pc_to_node_id.insert(std::make_pair(dex_pc, id));
+        first_in_block = true;
+        force_new_block = false;
+      }
+
+      // Register instruction.
+      dex_pc_to_incl_id.insert(std::make_pair(dex_pc, dex_pc_to_node_id.size() - 1));
+
+      // Print instruction.
+      if (!first_in_block) {
+        os << " | ";
+      } else {
+        first_in_block = false;
+      }
+
+      // Dump the instruction. Need to escape '"', '<', '>', '{' and '}'.
+      os << "<" << "p" << dex_pc << ">";
+      os << " 0x" << std::hex << dex_pc << std::dec << ": ";
+      std::string inst_str = inst->DumpString(dex_file);
+      size_t cur_start = 0;  // It's OK to start at zero, instruction dumps don't start with chars
+      // we need to escape.
+      while (cur_start != std::string::npos) {
+        size_t next_escape = inst_str.find_first_of("\"{}<>", cur_start + 1);
+        if (next_escape == std::string::npos) {
+          os << inst_str.substr(cur_start, inst_str.size() - cur_start);
+          break;
+        } else {
+          os << inst_str.substr(cur_start, next_escape - cur_start);
+          // Escape all necessary characters.
+          while (next_escape < inst_str.size()) {
+            char c = inst_str.at(next_escape);
+            if (c == '"' || c == '{' || c == '}' || c == '<' || c == '>') {
+              os << '\\' << c;
+            } else {
+              break;
+            }
+            next_escape++;
+          }
+          if (next_escape >= inst_str.size()) {
+            next_escape = std::string::npos;
+          }
+          cur_start = next_escape;
+        }
+      }
+
+      // Force a new block for some fall-throughs and some instructions that terminate the "local"
+      // control flow.
+      force_new_block = inst->IsSwitch() || inst->IsBasicBlockEnd();
+    }
+    // Close last node.
+    if (dex_pc_to_node_id.size() > 0) {
+      os << "}\"];\n";
+    }
+  }
+
+  // Create edges between them.
+  {
+    std::ostringstream regular_edges;
+    std::ostringstream taken_edges;
+    std::ostringstream exception_edges;
+
+    // Common set of exception edges.
+    std::set<uint32_t> exception_targets;
+
+    // These blocks (given by the first dex pc) need exception per dex-pc handling in a second
+    // pass. In the first pass we try and see whether we can use a common set of edges.
+    std::set<uint32_t> blocks_with_detailed_exceptions;
+
+    {
+      uint32_t last_node_id = std::numeric_limits<uint32_t>::max();
+      uint32_t old_dex_pc = 0;
+      uint32_t block_start_dex_pc = std::numeric_limits<uint32_t>::max();
+      const Instruction* inst = Instruction::At(code_item->insns_);
+      for (uint32_t dex_pc = 0;
+          dex_pc < code_item->insns_size_in_code_units_;
+          old_dex_pc = dex_pc, dex_pc += inst->SizeInCodeUnits(), inst = inst->Next()) {
+        {
+          auto it = dex_pc_to_node_id.find(dex_pc);
+          if (it != dex_pc_to_node_id.end()) {
+            if (!exception_targets.empty()) {
+              // It seems the last block had common exception handlers. Add the exception edges now.
+              uint32_t node_id = dex_pc_to_node_id.find(block_start_dex_pc)->second;
+              for (uint32_t handler_pc : exception_targets) {
+                auto node_id_it = dex_pc_to_incl_id.find(handler_pc);
+                if (node_id_it != dex_pc_to_incl_id.end()) {
+                  exception_edges << "  node" << node_id
+                      << " -> node" << node_id_it->second << ":p" << handler_pc
+                      << ";\n";
+                }
+              }
+              exception_targets.clear();
+            }
+
+            block_start_dex_pc = dex_pc;
+
+            // Seems to be a fall-through, connect to last_node_id. May be spurious edges for things
+            // like switch data.
+            uint32_t old_last = last_node_id;
+            last_node_id = it->second;
+            if (old_last != std::numeric_limits<uint32_t>::max()) {
+              regular_edges << "  node" << old_last << ":p" << old_dex_pc
+                  << " -> node" << last_node_id << ":p" << dex_pc
+                  << ";\n";
+            }
+          }
+
+          // Look at the exceptions of the first entry.
+          CatchHandlerIterator catch_it(*code_item, dex_pc);
+          for (; catch_it.HasNext(); catch_it.Next()) {
+            exception_targets.insert(catch_it.GetHandlerAddress());
+          }
+        }
+
+        // Handle instruction.
+
+        // Branch: something with at most two targets.
+        if (inst->IsBranch()) {
+          const int32_t offset = inst->GetTargetOffset();
+          const bool conditional = !inst->IsUnconditional();
+
+          auto target_it = dex_pc_to_node_id.find(dex_pc + offset);
+          if (target_it != dex_pc_to_node_id.end()) {
+            taken_edges << "  node" << last_node_id << ":p" << dex_pc
+                << " -> node" << target_it->second << ":p" << (dex_pc + offset)
+                << ";\n";
+          }
+          if (!conditional) {
+            // No fall-through.
+            last_node_id = std::numeric_limits<uint32_t>::max();
+          }
+        } else if (inst->IsSwitch()) {
+          // TODO: Iterate through all switch targets.
+          const uint16_t* insns = code_item->insns_ + dex_pc;
+          /* make sure the start of the switch is in range */
+          int32_t switch_offset = insns[1] | ((int32_t) insns[2]) << 16;
+          /* offset to switch table is a relative branch-style offset */
+          const uint16_t* switch_insns = insns + switch_offset;
+          uint32_t switch_count = switch_insns[1];
+          int32_t targets_offset;
+          if ((*insns & 0xff) == Instruction::PACKED_SWITCH) {
+            /* 0=sig, 1=count, 2/3=firstKey */
+            targets_offset = 4;
+          } else {
+            /* 0=sig, 1=count, 2..count*2 = keys */
+            targets_offset = 2 + 2 * switch_count;
+          }
+          /* make sure the end of the switch is in range */
+          /* verify each switch target */
+          for (uint32_t targ = 0; targ < switch_count; targ++) {
+            int32_t offset = (int32_t) switch_insns[targets_offset + targ * 2] |
+                (int32_t) (switch_insns[targets_offset + targ * 2 + 1] << 16);
+            int32_t abs_offset = dex_pc + offset;
+            auto target_it = dex_pc_to_node_id.find(abs_offset);
+            if (target_it != dex_pc_to_node_id.end()) {
+              // TODO: value label.
+              taken_edges << "  node" << last_node_id << ":p" << dex_pc
+                  << " -> node" << target_it->second << ":p" << (abs_offset)
+                  << ";\n";
+            }
+          }
+        }
+
+        // Exception edges. If this is not the first instruction in the block
+        if (block_start_dex_pc != dex_pc) {
+          std::set<uint32_t> current_handler_pcs;
+          CatchHandlerIterator catch_it(*code_item, dex_pc);
+          for (; catch_it.HasNext(); catch_it.Next()) {
+            current_handler_pcs.insert(catch_it.GetHandlerAddress());
+          }
+          if (current_handler_pcs != exception_targets) {
+            exception_targets.clear();  // Clear so we don't do something at the end.
+            blocks_with_detailed_exceptions.insert(block_start_dex_pc);
+          }
+        }
+
+        if (inst->IsReturn() ||
+            (inst->Opcode() == Instruction::THROW) ||
+            (inst->IsBranch() && inst->IsUnconditional())) {
+          // No fall-through.
+          last_node_id = std::numeric_limits<uint32_t>::max();
+        }
+      }
+      // Finish up the last block, if it had common exceptions.
+      if (!exception_targets.empty()) {
+        // It seems the last block had common exception handlers. Add the exception edges now.
+        uint32_t node_id = dex_pc_to_node_id.find(block_start_dex_pc)->second;
+        for (uint32_t handler_pc : exception_targets) {
+          auto node_id_it = dex_pc_to_incl_id.find(handler_pc);
+          if (node_id_it != dex_pc_to_incl_id.end()) {
+            exception_edges << "  node" << node_id
+                << " -> node" << node_id_it->second << ":p" << handler_pc
+                << ";\n";
+          }
+        }
+        exception_targets.clear();
+      }
+    }
+
+    // Second pass for detailed exception blocks.
+    // TODO
+    // Exception edges. If this is not the first instruction in the block
+    for (uint32_t dex_pc : blocks_with_detailed_exceptions) {
+      const Instruction* inst = Instruction::At(&code_item->insns_[dex_pc]);
+      uint32_t this_node_id = dex_pc_to_incl_id.find(dex_pc)->second;
+      for (;;) {
+        CatchHandlerIterator catch_it(*code_item, dex_pc);
+        if (catch_it.HasNext()) {
+          std::set<uint32_t> handled_targets;
+          for (; catch_it.HasNext(); catch_it.Next()) {
+            uint32_t handler_pc = catch_it.GetHandlerAddress();
+            auto it = handled_targets.find(handler_pc);
+            if (it == handled_targets.end()) {
+              auto node_id_it = dex_pc_to_incl_id.find(handler_pc);
+              if (node_id_it != dex_pc_to_incl_id.end()) {
+                exception_edges << "  node" << this_node_id << ":p" << dex_pc
+                    << " -> node" << node_id_it->second << ":p" << handler_pc
+                    << ";\n";
+              }
+
+              // Mark as done.
+              handled_targets.insert(handler_pc);
+            }
+          }
+        }
+        if (inst->IsBasicBlockEnd()) {
+          break;
+        }
+
+        // Loop update. In the body to have a late break-out if the next instruction is a branch
+        // target and thus in another block.
+        dex_pc += inst->SizeInCodeUnits();
+        if (dex_pc >= code_item->insns_size_in_code_units_) {
+          break;
+        }
+        if (dex_pc_to_node_id.find(dex_pc) != dex_pc_to_node_id.end()) {
+          break;
+        }
+        inst = inst->Next();
+      }
+    }
+
+    // Write out the sub-graphs to make edges styled.
+    os << "\n";
+    os << "  subgraph regular_edges {\n";
+    os << "    edge [color=\"#000000\",weight=.3,len=3];\n\n";
+    os << "    " << regular_edges.str() << "\n";
+    os << "  }\n\n";
+
+    os << "  subgraph taken_edges {\n";
+    os << "    edge [color=\"#00FF00\",weight=.3,len=3];\n\n";
+    os << "    " << taken_edges.str() << "\n";
+    os << "  }\n\n";
+
+    os << "  subgraph exception_edges {\n";
+    os << "    edge [color=\"#FF0000\",weight=.3,len=3];\n\n";
+    os << "    " << exception_edges.str() << "\n";
+    os << "  }\n\n";
+  }
+
+  os << "}\n";
+}
+
+void DumpMethodCFG(ArtMethod* method, std::ostream& os) {
+  const DexFile* dex_file = method->GetDexFile();
+  const DexFile::CodeItem* code_item = dex_file->GetCodeItem(method->GetCodeItemOffset());
+
+  DumpMethodCFGImpl(dex_file, method->GetDexMethodIndex(), code_item, os);
+}
+
+void DumpMethodCFG(const DexFile* dex_file, uint32_t dex_method_idx, std::ostream& os) {
+  // This is painful, we need to find the code item. That means finding the class, and then
+  // iterating the table.
+  if (dex_method_idx >= dex_file->NumMethodIds()) {
+    os << "Could not find method-idx.";
+    return;
+  }
+  const DexFile::MethodId& method_id = dex_file->GetMethodId(dex_method_idx);
+
+  const DexFile::ClassDef* class_def = dex_file->FindClassDef(method_id.class_idx_);
+  if (class_def == nullptr) {
+    os << "Could not find class-def.";
+    return;
+  }
+
+  const uint8_t* class_data = dex_file->GetClassData(*class_def);
+  if (class_data == nullptr) {
+    os << "No class data.";
+    return;
+  }
+
+  ClassDataItemIterator it(*dex_file, class_data);
+  // Skip fields
+  while (it.HasNextStaticField() || it.HasNextInstanceField()) {
+    it.Next();
+  }
+
+  // Find method, and dump it.
+  while (it.HasNextDirectMethod() || it.HasNextVirtualMethod()) {
+    uint32_t method_idx = it.GetMemberIndex();
+    if (method_idx == dex_method_idx) {
+      DumpMethodCFGImpl(dex_file, dex_method_idx, it.GetMethodCodeItem(), os);
+      return;
+    }
+    it.Next();
+  }
+
+  // Otherwise complain.
+  os << "Something went wrong, didn't find the method in the class data.";
+}
+
 }  // namespace art
diff --git a/runtime/utils.h b/runtime/utils.h
index 4fa5f5a..d1be51a 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -324,6 +324,9 @@
   return pointer_size == 4 || pointer_size == 8;
 }
 
+void DumpMethodCFG(ArtMethod* method, std::ostream& os) SHARED_REQUIRES(Locks::mutator_lock_);
+void DumpMethodCFG(const DexFile* dex_file, uint32_t dex_method_idx, std::ostream& os);
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_UTILS_H_
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 1661534..7c6add1 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -53,6 +53,9 @@
 static constexpr bool gDebugVerify = false;
 // TODO: Add a constant to method_verifier to turn on verbose logging?
 
+// On VLOG(verifier), should we dump the whole state when we run into a hard failure?
+static constexpr bool kDumpRegLinesOnHardFailureIfVLOG = true;
+
 void PcToRegisterLineTable::Init(RegisterTrackingMode mode, InstructionFlags* flags,
                                  uint32_t insns_size, uint16_t registers_size,
                                  MethodVerifier* verifier) {
@@ -638,6 +641,12 @@
         Runtime::Current()->GetCompilerCallbacks()->ClassRejected(ref);
       }
       have_pending_hard_failure_ = true;
+      if (VLOG_IS_ON(verifier) && kDumpRegLinesOnHardFailureIfVLOG) {
+        ScopedObjectAccess soa(Thread::Current());
+        std::ostringstream oss;
+        Dump(oss);
+        LOG(ERROR) << oss.str();
+      }
       break;
     }
   }
@@ -1319,7 +1328,7 @@
   ScopedIndentation indent1(vios);
   const Instruction* inst = Instruction::At(code_item_->insns_);
   for (size_t dex_pc = 0; dex_pc < code_item_->insns_size_in_code_units_;
-      dex_pc += inst->SizeInCodeUnits()) {
+      dex_pc += inst->SizeInCodeUnits(), inst = inst->Next()) {
     RegisterLine* reg_line = reg_table_.GetLine(dex_pc);
     if (reg_line != nullptr) {
       vios->Stream() << reg_line->Dump(this) << "\n";
@@ -1331,7 +1340,6 @@
       vios->Stream() << inst->DumpHex(5) << " ";
     }
     vios->Stream() << inst->DumpString(dex_file_) << "\n";
-    inst = inst->Next();
   }
 }
 
@@ -3938,7 +3946,24 @@
     if (array_type.IsZero()) {
       // Null array type; this code path will fail at runtime.
       // Still check that the given value matches the instruction's type.
-      work_line_->VerifyRegisterType(this, inst->VRegA_23x(), insn_type);
+      // Note: this is, as usual, complicated by the fact the the instruction isn't fully typed
+      //       and fits multiple register types.
+      const RegType* modified_reg_type = &insn_type;
+      if ((modified_reg_type == &reg_types_.Integer()) ||
+          (modified_reg_type == &reg_types_.LongLo())) {
+        // May be integer or float | long or double. Overwrite insn_type accordingly.
+        const RegType& value_type = work_line_->GetRegisterType(this, inst->VRegA_23x());
+        if (modified_reg_type == &reg_types_.Integer()) {
+          if (&value_type == &reg_types_.Float()) {
+            modified_reg_type = &value_type;
+          }
+        } else {
+          if (&value_type == &reg_types_.DoubleLo()) {
+            modified_reg_type = &value_type;
+          }
+        }
+      }
+      work_line_->VerifyRegisterType(this, inst->VRegA_23x(), *modified_reg_type);
     } else if (!array_type.IsArrayTypes()) {
       Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "not array type " << array_type << " with aput";
     } else {
diff --git a/test/800-smali/expected.txt b/test/800-smali/expected.txt
index e2101a6..dd37cdb 100644
--- a/test/800-smali/expected.txt
+++ b/test/800-smali/expected.txt
@@ -39,4 +39,6 @@
 b/22777307
 b/22881413
 b/20843113
+b/23201502 (float)
+b/23201502 (double)
 Done!
diff --git a/test/800-smali/smali/b_23201502.smali b/test/800-smali/smali/b_23201502.smali
new file mode 100644
index 0000000..d958938
--- /dev/null
+++ b/test/800-smali/smali/b_23201502.smali
@@ -0,0 +1,23 @@
+.class public LB23201502;
+
+.super Ljava/lang/Object;
+
+.method public static runFloat()V
+   .registers 3
+   const v0, 0             # Null array.
+   const v1, 0             # 0 index into array.
+   const v2, 0             # 0 value, will be turned into float.
+   int-to-float v2, v2     # Definitely make v2 float.
+   aput v2 , v0, v1        # Put into null array.
+   return-void
+.end method
+
+.method public static runDouble()V
+   .registers 4
+   const v0, 0             # Null array.
+   const v1, 0             # 0 index into array.
+   const v2, 0             # 0 value, will be turned into double.
+   int-to-double v2, v2    # Definitely make v2+v3 double.
+   aput-wide v2 , v0, v1   # Put into null array.
+   return-void
+.end method
diff --git a/test/800-smali/src/Main.java b/test/800-smali/src/Main.java
index 3c88040..b481a1d 100644
--- a/test/800-smali/src/Main.java
+++ b/test/800-smali/src/Main.java
@@ -123,6 +123,10 @@
                 null));
         testCases.add(new TestCase("b/22881413", "B22881413", "run", null, null, null));
         testCases.add(new TestCase("b/20843113", "B20843113", "run", null, null, null));
+        testCases.add(new TestCase("b/23201502 (float)", "B23201502", "runFloat", null,
+                new NullPointerException(), null));
+        testCases.add(new TestCase("b/23201502 (double)", "B23201502", "runDouble", null,
+                new NullPointerException(), null));
     }
 
     public void runTests() {