Merge "Fix JNI ToReflectedMethod for constructor"
diff --git a/build/Android.common_test.mk b/build/Android.common_test.mk
index 21a8931..542e888 100644
--- a/build/Android.common_test.mk
+++ b/build/Android.common_test.mk
@@ -21,11 +21,13 @@
 
 # List of known broken tests that we won't attempt to execute. The test name must be the full
 # rule name such as test-art-host-oat-optimizing-HelloWorld64.
-ART_TEST_KNOWN_BROKEN :=
+ART_TEST_KNOWN_BROKEN := \
+  test-art-host-oat-optimizing-SignalTest64 \
+  test-art-host-oat-optimizing-SignalTest32
 
-# List of known failing tests that when executed won't cause test execution to finish. The test name
-# must be the full rule name such as test-art-host-oat-optimizing-HelloWorld64.
-ART_TEST_KNOWN_FAILING := $(ART_TEST_KNOWN_BROKEN)
+# List of known failing tests that when executed won't cause test execution to not finish.
+# The test name must be the full rule name such as test-art-host-oat-optimizing-HelloWorld64.
+ART_TEST_KNOWN_FAILING :=
 
 # Keep going after encountering a test failure?
 ART_TEST_KEEP_GOING ?= false
diff --git a/build/Android.oat.mk b/build/Android.oat.mk
index 3117f71..916fd58 100644
--- a/build/Android.oat.mk
+++ b/build/Android.oat.mk
@@ -26,7 +26,7 @@
 # Use dex2oat debug version for better error reporting
 # $(1): 2ND_ or undefined, 2ND_ for 32-bit host builds.
 define create-core-oat-host-rules
-$$($(1)HOST_CORE_IMG_OUT): $$($(1)HOST_CORE_DEX_FILES) $$(DEX2OATD_DEPENDENCY)
+$$($(1)HOST_CORE_IMG_OUT): $$(HOST_CORE_DEX_FILES) $$(DEX2OATD_DEPENDENCY)
 	@echo "host dex2oat: $$@ ($$?)"
 	@mkdir -p $$(dir $$@)
 	$$(hide) $$(DEX2OATD) --runtime-arg -Xms16m --runtime-arg -Xmx16m \
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index c5862da..ec3c815 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -707,7 +707,8 @@
 
 void InstructionCodeGeneratorARM::VisitInvokeStatic(HInvokeStatic* invoke) {
   Register temp = invoke->GetLocations()->GetTemp(0).AsArm().AsCoreRegister();
-  size_t index_in_cache = mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value() +
+  uint32_t heap_reference_size = sizeof(mirror::HeapReference<mirror::Object>);
+  size_t index_in_cache = mirror::Array::DataOffset(heap_reference_size).Int32Value() +
       invoke->GetIndexInDexCache() * kArmWordSize;
 
   // TODO: Implement all kinds of calls:
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 0e2a079..712a24c 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -20,7 +20,7 @@
 #include "code_generator.h"
 #include "nodes.h"
 #include "parallel_move_resolver.h"
-#include "utils/arm/assembler_thumb2.h"
+#include "utils/arm/assembler_arm32.h"
 
 namespace art {
 namespace arm {
@@ -180,7 +180,7 @@
   LocationsBuilderARM location_builder_;
   InstructionCodeGeneratorARM instruction_visitor_;
   ParallelMoveResolverARM move_resolver_;
-  Thumb2Assembler assembler_;
+  Arm32Assembler assembler_;
 
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM);
 };
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index a8ee6c0..f624f3c 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -691,7 +691,8 @@
 
 void InstructionCodeGeneratorX86::VisitInvokeStatic(HInvokeStatic* invoke) {
   Register temp = invoke->GetLocations()->GetTemp(0).AsX86().AsCpuRegister();
-  size_t index_in_cache = mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value() +
+  uint32_t heap_reference_size = sizeof(mirror::HeapReference<mirror::Object>);
+  size_t index_in_cache = mirror::Array::DataOffset(heap_reference_size).Int32Value() +
       invoke->GetIndexInDexCache() * kX86WordSize;
 
   // TODO: Implement all kinds of calls:
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index fd534ce..7ec0c84 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -52,10 +52,6 @@
   typedef int32_t (*fptr)();
   CommonCompilerTest::MakeExecutable(allocator.GetMemory(), allocator.GetSize());
   fptr f = reinterpret_cast<fptr>(allocator.GetMemory());
-#if defined(__arm__)
-  // For thumb we need the bottom bit set.
-  f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(f) + 1);
-#endif
   int32_t result = f();
   if (has_result) {
     CHECK_EQ(result, expected);
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 56029aa..ccacbef 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -101,6 +101,10 @@
   }
 
   InstructionSet instruction_set = GetCompilerDriver()->GetInstructionSet();
+  // The optimizing compiler currently does not have a Thumb2 assembler.
+  if (instruction_set == kThumb2) {
+    instruction_set = kArm;
+  }
   CodeGenerator* codegen = CodeGenerator::Create(&arena, graph, instruction_set);
   if (codegen == nullptr) {
     if (shouldCompile) {
diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc
index b607a1d..8a34928 100644
--- a/compiler/utils/arm/assembler_arm.cc
+++ b/compiler/utils/arm/assembler_arm.cc
@@ -111,43 +111,38 @@
   }
 }
 
-uint32_t ShifterOperand::encodingThumb(int version) const {
-  CHECK(version == 1 || version == 2);
-  if (version == 1) {
-    LOG(FATAL) << "Invalid of use encodingThumb with version 1";
-  } else {
-    switch (type_) {
-      case kImmediate:
-        return immed_;
-      case kRegister:
-        if (is_shift_) {
-          // Shifted immediate or register.
-          if (rs_ == kNoRegister) {
-            // Immediate shift.
-            if (shift_ == RRX) {
-              // RRX is encoded as an ROR with imm 0.
-              return ROR << 4 | static_cast<uint32_t>(rm_);
-            } else {
-              uint32_t imm3 = immed_ >> 2;
-              uint32_t imm2 = immed_ & 0b11;
-
-              return imm3 << 12 | imm2 << 6 | shift_ << 4 |
-                  static_cast<uint32_t>(rm_);
-            }
+uint32_t ShifterOperand::encodingThumb() const {
+  switch (type_) {
+    case kImmediate:
+      return immed_;
+    case kRegister:
+      if (is_shift_) {
+        // Shifted immediate or register.
+        if (rs_ == kNoRegister) {
+          // Immediate shift.
+          if (shift_ == RRX) {
+            // RRX is encoded as an ROR with imm 0.
+            return ROR << 4 | static_cast<uint32_t>(rm_);
           } else {
-            LOG(FATAL) << "No register-shifted register instruction available in thumb";
-            return 0;
+            uint32_t imm3 = immed_ >> 2;
+            uint32_t imm2 = immed_ & 0b11;
+
+            return imm3 << 12 | imm2 << 6 | shift_ << 4 |
+                static_cast<uint32_t>(rm_);
           }
         } else {
-          // Simple register
-          return static_cast<uint32_t>(rm_);
+          LOG(FATAL) << "No register-shifted register instruction available in thumb";
+          return 0;
         }
-        break;
-      default:
-        // Can't get here.
-        LOG(FATAL) << "Invalid shifter operand for thumb";
-        return 0;
-    }
+      } else {
+        // Simple register
+        return static_cast<uint32_t>(rm_);
+      }
+      break;
+    default:
+      // Can't get here.
+      LOG(FATAL) << "Invalid shifter operand for thumb";
+      return 0;
   }
   return 0;
 }
@@ -187,51 +182,78 @@
 uint32_t Address::encodingArm() const {
   CHECK(IsAbsoluteUint(12, offset_));
   uint32_t encoding;
-  if (offset_ < 0) {
-    encoding = (am_ ^ (1 << kUShift)) | -offset_;  // Flip U to adjust sign.
+  if (is_immed_offset_) {
+    if (offset_ < 0) {
+      encoding = (am_ ^ (1 << kUShift)) | -offset_;  // Flip U to adjust sign.
+    } else {
+      encoding =  am_ | offset_;
+    }
   } else {
-    encoding =  am_ | offset_;
+    uint32_t imm5 = offset_;
+    uint32_t shift = shift_;
+    if (shift == RRX) {
+      imm5 = 0;
+      shift = ROR;
+    }
+    encoding = am_ | static_cast<uint32_t>(rm_) | shift << 5 | offset_ << 7 | B25;
   }
   encoding |= static_cast<uint32_t>(rn_) << kRnShift;
   return encoding;
 }
 
 
-uint32_t Address::encodingThumb(int version) const {
-  CHECK(version == 1 || version == 2);
+uint32_t Address::encodingThumb(bool is_32bit) const {
   uint32_t encoding = 0;
-  if (version == 2) {
-      encoding = static_cast<uint32_t>(rn_) << 16;
-      // Check for the T3/T4 encoding.
-      // PUW must Offset for T3
-      // Convert ARM PU0W to PUW
-      // The Mode is in ARM encoding format which is:
-      // |P|U|0|W|
-      // we need this in thumb2 mode:
-      // |P|U|W|
+  if (is_immed_offset_) {
+    encoding = static_cast<uint32_t>(rn_) << 16;
+    // Check for the T3/T4 encoding.
+    // PUW must Offset for T3
+    // Convert ARM PU0W to PUW
+    // The Mode is in ARM encoding format which is:
+    // |P|U|0|W|
+    // we need this in thumb2 mode:
+    // |P|U|W|
 
-      uint32_t am = am_;
-      int32_t offset = offset_;
-      if (offset < 0) {
-        am ^= 1 << kUShift;
-        offset = -offset;
-      }
-      if (offset_ < 0 || (offset >= 0 && offset < 256 &&
+    uint32_t am = am_;
+    int32_t offset = offset_;
+    if (offset < 0) {
+      am ^= 1 << kUShift;
+      offset = -offset;
+    }
+    if (offset_ < 0 || (offset >= 0 && offset < 256 &&
         am_ != Mode::Offset)) {
-          // T4 encoding.
-        uint32_t PUW = am >> 21;   // Move down to bottom of word.
-        PUW = (PUW >> 1) | (PUW & 1);   // Bits 3, 2 and 0.
-        // If P is 0 then W must be 1 (Different from ARM).
-        if ((PUW & 0b100) == 0) {
-          PUW |= 0b1;
-        }
-        encoding |= B11 | PUW << 8 | offset;
-      } else {
-        // T3 encoding (also sets op1 to 0b01).
-        encoding |= B23 | offset_;
+      // T4 encoding.
+      uint32_t PUW = am >> 21;   // Move down to bottom of word.
+      PUW = (PUW >> 1) | (PUW & 1);   // Bits 3, 2 and 0.
+      // If P is 0 then W must be 1 (Different from ARM).
+      if ((PUW & 0b100) == 0) {
+        PUW |= 0b1;
       }
+      encoding |= B11 | PUW << 8 | offset;
+    } else {
+      // T3 encoding (also sets op1 to 0b01).
+      encoding |= B23 | offset_;
+    }
   } else {
-    LOG(FATAL) << "Invalid use of encodingThumb for version 1";
+    // Register offset, possibly shifted.
+    // Need to choose between encoding T1 (16 bit) or T2.
+    // Only Offset mode is supported.  Shift must be LSL and the count
+    // is only 2 bits.
+    CHECK_EQ(shift_, LSL);
+    CHECK_LE(offset_, 4);
+    CHECK_EQ(am_, Offset);
+    bool is_t2 = is_32bit;
+    if (ArmAssembler::IsHighRegister(rn_) || ArmAssembler::IsHighRegister(rm_)) {
+      is_t2 = true;
+    } else if (offset_ != 0) {
+      is_t2 = true;
+    }
+    if (is_t2) {
+      encoding = static_cast<uint32_t>(rn_) << 16 | static_cast<uint32_t>(rm_) |
+          offset_ << 4;
+    } else {
+      encoding = static_cast<uint32_t>(rn_) << 3 | static_cast<uint32_t>(rm_) << 6;
+    }
   }
   return encoding;
 }
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index 7b662e1..be19174 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -68,7 +68,7 @@
   }
 
   uint32_t encodingArm() const;
-  uint32_t encodingThumb(int version) const;
+  uint32_t encodingThumb() const;
 
   bool IsEmpty() const {
     return type_ == kUnknown;
@@ -196,8 +196,26 @@
     NegPostIndex = (0|0|0) << 21   // negative post-indexed with writeback
   };
 
-  explicit Address(Register rn, int32_t offset = 0, Mode am = Offset) : rn_(rn), offset_(offset),
-      am_(am) {
+  Address(Register rn, int32_t offset = 0, Mode am = Offset) : rn_(rn), rm_(R0),
+      offset_(offset),
+      am_(am), is_immed_offset_(true), shift_(LSL) {
+  }
+
+  Address(Register rn, Register rm, Mode am = Offset) : rn_(rn), rm_(rm), offset_(0),
+      am_(am), is_immed_offset_(false), shift_(LSL) {
+    CHECK_NE(rm, PC);
+  }
+
+  Address(Register rn, Register rm, Shift shift, uint32_t count, Mode am = Offset) :
+                       rn_(rn), rm_(rm), offset_(count),
+                       am_(am), is_immed_offset_(false), shift_(shift) {
+    CHECK_NE(rm, PC);
+  }
+
+  // LDR(literal) - pc relative load.
+  explicit Address(int32_t offset) :
+               rn_(PC), rm_(R0), offset_(offset),
+               am_(Offset), is_immed_offset_(false), shift_(LSL) {
   }
 
   static bool CanHoldLoadOffsetArm(LoadOperandType type, int offset);
@@ -207,7 +225,7 @@
   static bool CanHoldStoreOffsetThumb(StoreOperandType type, int offset);
 
   uint32_t encodingArm() const;
-  uint32_t encodingThumb(int version) const;
+  uint32_t encodingThumb(bool is_32bit) const;
 
   uint32_t encoding3() const;
   uint32_t vencoding() const;
@@ -218,6 +236,10 @@
     return rn_;
   }
 
+  Register GetRegisterOffset() const {
+    return rm_;
+  }
+
   int32_t GetOffset() const {
     return offset_;
   }
@@ -226,10 +248,26 @@
     return am_;
   }
 
+  bool IsImmediate() const {
+    return is_immed_offset_;
+  }
+
+  Shift GetShift() const {
+    return shift_;
+  }
+
+  int32_t GetShiftCount() const {
+    CHECK(!is_immed_offset_);
+    return offset_;
+  }
+
  private:
   Register rn_;
-  int32_t offset_;
+  Register rm_;
+  int32_t offset_;      // Used as shift amount for register offset.
   Mode am_;
+  bool is_immed_offset_;
+  Shift shift_;
 };
 
 // Instruction encoding bits.
@@ -544,11 +582,25 @@
 
   // Convenience shift instructions. Use mov instruction with shifter operand
   // for variants setting the status flags or using a register shift count.
-  virtual void Lsl(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL) = 0;
-  virtual void Lsr(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL) = 0;
-  virtual void Asr(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL) = 0;
-  virtual void Ror(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL) = 0;
-  virtual void Rrx(Register rd, Register rm, Condition cond = AL) = 0;
+  virtual void Lsl(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
+                   Condition cond = AL) = 0;
+  virtual void Lsr(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
+                   Condition cond = AL) = 0;
+  virtual void Asr(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
+                   Condition cond = AL) = 0;
+  virtual void Ror(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
+                   Condition cond = AL) = 0;
+  virtual void Rrx(Register rd, Register rm, bool setcc = false,
+                   Condition cond = AL) = 0;
+
+  virtual void Lsl(Register rd, Register rm, Register rn, bool setcc = false,
+                   Condition cond = AL) = 0;
+  virtual void Lsr(Register rd, Register rm, Register rn, bool setcc = false,
+                   Condition cond = AL) = 0;
+  virtual void Asr(Register rd, Register rm, Register rn, bool setcc = false,
+                   Condition cond = AL) = 0;
+  virtual void Ror(Register rd, Register rm, Register rn, bool setcc = false,
+                   Condition cond = AL) = 0;
 
   static bool IsInstructionForExceptionHandling(uword pc);
 
@@ -673,6 +725,14 @@
 
   static uint32_t ModifiedImmediate(uint32_t value);
 
+  static bool IsLowRegister(Register r) {
+    return r < R8;
+  }
+
+  static bool IsHighRegister(Register r) {
+     return r >= R8;
+  }
+
  protected:
   // Returns whether or not the given register is used for passing parameters.
   static int RegisterCompare(const Register* reg1, const Register* reg2) {
diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc
index b2bb20f..267bba8 100644
--- a/compiler/utils/arm/assembler_arm32.cc
+++ b/compiler/utils/arm/assembler_arm32.cc
@@ -541,20 +541,40 @@
 
 
 void Arm32Assembler::EmitMemOp(Condition cond,
-                             bool load,
-                             bool byte,
-                             Register rd,
-                             const Address& ad) {
+                               bool load,
+                               bool byte,
+                               Register rd,
+                               const Address& ad) {
   CHECK_NE(rd, kNoRegister);
   CHECK_NE(cond, kNoCondition);
   const Address& addr = static_cast<const Address&>(ad);
 
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B26 |
-                     (load ? L : 0) |
-                     (byte ? B : 0) |
-                     (static_cast<int32_t>(rd) << kRdShift) |
-                     addr.encodingArm();
+  int32_t encoding = 0;
+  if (!ad.IsImmediate() && ad.GetRegisterOffset() == PC) {
+    // PC relative LDR(literal)
+    int32_t offset = ad.GetOffset();
+    int32_t u = B23;
+    if (offset < 0) {
+      offset = -offset;
+      u = 0;
+    }
+    CHECK_LT(offset, (1 << 12));
+    encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+         B26 | B24 | u | B20 |
+         (load ? L : 0) |
+         (byte ? B : 0) |
+         (static_cast<int32_t>(rd) << kRdShift) |
+         0xf << 16 |
+         (offset & 0xfff);
+
+  } else {
+    encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+        B26 |
+        (load ? L : 0) |
+        (byte ? B : 0) |
+        (static_cast<int32_t>(rd) << kRdShift) |
+        addr.encodingArm();
+  }
   Emit(encoding);
 }
 
@@ -1020,39 +1040,98 @@
 
 
 void Arm32Assembler::Lsl(Register rd, Register rm, uint32_t shift_imm,
-                         Condition cond) {
+                         bool setcc, Condition cond) {
   CHECK_NE(shift_imm, 0u);  // Do not use Lsl if no shift is wanted.
-  mov(rd, ShifterOperand(rm, LSL, shift_imm), cond);
+  if (setcc) {
+    movs(rd, ShifterOperand(rm, LSL, shift_imm), cond);
+  } else {
+    mov(rd, ShifterOperand(rm, LSL, shift_imm), cond);
+  }
 }
 
 
 void Arm32Assembler::Lsr(Register rd, Register rm, uint32_t shift_imm,
-                         Condition cond) {
+                         bool setcc, Condition cond) {
   CHECK_NE(shift_imm, 0u);  // Do not use Lsr if no shift is wanted.
   if (shift_imm == 32) shift_imm = 0;  // Comply to UAL syntax.
-  mov(rd, ShifterOperand(rm, LSR, shift_imm), cond);
+  if (setcc) {
+    movs(rd, ShifterOperand(rm, LSR, shift_imm), cond);
+  } else {
+    mov(rd, ShifterOperand(rm, LSR, shift_imm), cond);
+  }
 }
 
 
 void Arm32Assembler::Asr(Register rd, Register rm, uint32_t shift_imm,
-                         Condition cond) {
+                         bool setcc, Condition cond) {
   CHECK_NE(shift_imm, 0u);  // Do not use Asr if no shift is wanted.
   if (shift_imm == 32) shift_imm = 0;  // Comply to UAL syntax.
-  mov(rd, ShifterOperand(rm, ASR, shift_imm), cond);
+  if (setcc) {
+    movs(rd, ShifterOperand(rm, ASR, shift_imm), cond);
+  } else {
+    mov(rd, ShifterOperand(rm, ASR, shift_imm), cond);
+  }
 }
 
 
 void Arm32Assembler::Ror(Register rd, Register rm, uint32_t shift_imm,
-                         Condition cond) {
+                         bool setcc, Condition cond) {
   CHECK_NE(shift_imm, 0u);  // Use Rrx instruction.
-  mov(rd, ShifterOperand(rm, ROR, shift_imm), cond);
+  if (setcc) {
+    movs(rd, ShifterOperand(rm, ROR, shift_imm), cond);
+  } else {
+    mov(rd, ShifterOperand(rm, ROR, shift_imm), cond);
+  }
 }
 
-void Arm32Assembler::Rrx(Register rd, Register rm, Condition cond) {
-  mov(rd, ShifterOperand(rm, ROR, 0), cond);
+void Arm32Assembler::Rrx(Register rd, Register rm, bool setcc, Condition cond) {
+  if (setcc) {
+    movs(rd, ShifterOperand(rm, ROR, 0), cond);
+  } else {
+    mov(rd, ShifterOperand(rm, ROR, 0), cond);
+  }
 }
 
 
+void Arm32Assembler::Lsl(Register rd, Register rm, Register rn,
+                         bool setcc, Condition cond) {
+  if (setcc) {
+    movs(rd, ShifterOperand(rm, LSL, rn), cond);
+  } else {
+    mov(rd, ShifterOperand(rm, LSL, rn), cond);
+  }
+}
+
+
+void Arm32Assembler::Lsr(Register rd, Register rm, Register rn,
+                         bool setcc, Condition cond) {
+  if (setcc) {
+    movs(rd, ShifterOperand(rm, LSR, rn), cond);
+  } else {
+    mov(rd, ShifterOperand(rm, LSR, rn), cond);
+  }
+}
+
+
+void Arm32Assembler::Asr(Register rd, Register rm, Register rn,
+                         bool setcc, Condition cond) {
+  if (setcc) {
+    movs(rd, ShifterOperand(rm, ASR, rn), cond);
+  } else {
+    mov(rd, ShifterOperand(rm, ASR, rn), cond);
+  }
+}
+
+
+void Arm32Assembler::Ror(Register rd, Register rm, Register rn,
+                         bool setcc, Condition cond) {
+  if (setcc) {
+    movs(rd, ShifterOperand(rm, ROR, rn), cond);
+  } else {
+    mov(rd, ShifterOperand(rm, ROR, rn), cond);
+  }
+}
+
 void Arm32Assembler::vmstat(Condition cond) {  // VMRS APSR_nzcv, FPSCR
   CHECK_NE(cond, kNoCondition);
   int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h
index 7a0fce2..7f9094d 100644
--- a/compiler/utils/arm/assembler_arm32.h
+++ b/compiler/utils/arm/assembler_arm32.h
@@ -197,11 +197,25 @@
   void bl(Label* label, Condition cond = AL);
   void blx(Register rm, Condition cond = AL) OVERRIDE;
   void bx(Register rm, Condition cond = AL) OVERRIDE;
-  void Lsl(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL);
-  void Lsr(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL);
-  void Asr(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL);
-  void Ror(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL);
-  void Rrx(Register rd, Register rm, Condition cond = AL);
+  void Lsl(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
+           Condition cond = AL) OVERRIDE;
+  void Lsr(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
+           Condition cond = AL) OVERRIDE;
+  void Asr(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
+           Condition cond = AL) OVERRIDE;
+  void Ror(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
+           Condition cond = AL) OVERRIDE;
+  void Rrx(Register rd, Register rm, bool setcc = false,
+           Condition cond = AL) OVERRIDE;
+
+  void Lsl(Register rd, Register rm, Register rn, bool setcc = false,
+           Condition cond = AL) OVERRIDE;
+  void Lsr(Register rd, Register rm, Register rn, bool setcc = false,
+           Condition cond = AL) OVERRIDE;
+  void Asr(Register rd, Register rm, Register rn, bool setcc = false,
+           Condition cond = AL) OVERRIDE;
+  void Ror(Register rd, Register rm, Register rn, bool setcc = false,
+           Condition cond = AL) OVERRIDE;
 
   void Push(Register rd, Condition cond = AL) OVERRIDE;
   void Pop(Register rd, Condition cond = AL) OVERRIDE;
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index 92a9f53..30aa625 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -329,7 +329,7 @@
       ++reg;
     }
     CHECK_LT(reg, 16);
-    CHECK(am == IA_W);      // Only writeback is supported.
+    CHECK(am == DB_W);      // Only writeback is supported.
     ldr(static_cast<Register>(reg), Address(base, kRegisterSize, Address::PostIndex), cond);
   } else {
     EmitMultiMemOp(cond, am, true, base, regs);
@@ -352,8 +352,8 @@
       ++reg;
     }
     CHECK_LT(reg, 16);
-    CHECK(am == DB || am == DB_W);
-    Address::Mode strmode = am == DB_W ? Address::PreIndex : Address::Offset;
+    CHECK(am == IA || am == IA_W);
+    Address::Mode strmode = am == IA ? Address::PreIndex : Address::Offset;
     str(static_cast<Register>(reg), Address(base, -kRegisterSize, strmode), cond);
   } else {
     EmitMultiMemOp(cond, am, false, base, regs);
@@ -642,7 +642,6 @@
            if (imm > (1 << 9)) {    // 9 bit immediate.
              return true;
            }
-           return false;      // 16 bit good.
          } else if (opcode == ADD && rd != SP && rn == SP) {   // 10 bit immediate.
            if (imm > (1 << 10)) {
              return true;
@@ -781,7 +780,7 @@
            imm8;
     } else {
       // Modified immediate.
-      uint32_t imm = ModifiedImmediate(so.encodingThumb(2));
+      uint32_t imm = ModifiedImmediate(so.encodingThumb());
       if (imm == kInvalidModifiedImmediate) {
         LOG(FATAL) << "Immediate value cannot fit in thumb2 modified immediate";
       }
@@ -799,7 +798,7 @@
          set_cc << 20 |
          rn << 16 |
          rd << 8 |
-         so.encodingThumb(2);
+         so.encodingThumb();
   }
   Emit32(encoding);
 }
@@ -1081,6 +1080,82 @@
   }
 }
 
+void Thumb2Assembler::EmitShift(Register rd, Register rm, Shift shift, uint8_t amount, bool setcc) {
+  CHECK_LT(amount, (1 << 5));
+  if (IsHighRegister(rd) || IsHighRegister(rm) || shift == ROR || shift == RRX) {
+    uint16_t opcode = 0;
+    switch (shift) {
+      case LSL: opcode = 0b00; break;
+      case LSR: opcode = 0b01; break;
+      case ASR: opcode = 0b10; break;
+      case ROR: opcode = 0b11; break;
+      case RRX: opcode = 0b11; amount = 0; break;
+      default:
+        LOG(FATAL) << "Unsupported thumb2 shift opcode";
+    }
+    // 32 bit.
+    int32_t encoding = B31 | B30 | B29 | B27 | B25 | B22 |
+        0xf << 16 | (setcc ? B20 : 0);
+    uint32_t imm3 = amount >> 2;
+    uint32_t imm2 = amount & 0b11;
+    encoding |= imm3 << 12 | imm2 << 6 | static_cast<int16_t>(rm) |
+        static_cast<int16_t>(rd) << 8 | opcode << 4;
+    Emit32(encoding);
+  } else {
+    // 16 bit shift
+    uint16_t opcode = 0;
+    switch (shift) {
+      case LSL: opcode = 0b00; break;
+      case LSR: opcode = 0b01; break;
+      case ASR: opcode = 0b10; break;
+      default:
+         LOG(FATAL) << "Unsupported thumb2 shift opcode";
+    }
+    int16_t encoding = opcode << 11 | amount << 6 | static_cast<int16_t>(rm) << 3 |
+        static_cast<int16_t>(rd);
+    Emit16(encoding);
+  }
+}
+
+void Thumb2Assembler::EmitShift(Register rd, Register rn, Shift shift, Register rm, bool setcc) {
+  CHECK_NE(shift, RRX);
+  bool must_be_32bit = false;
+  if (IsHighRegister(rd) || IsHighRegister(rm) || IsHighRegister(rn) || rd != rn) {
+    must_be_32bit = true;
+  }
+
+  if (must_be_32bit) {
+    uint16_t opcode = 0;
+     switch (shift) {
+       case LSL: opcode = 0b00; break;
+       case LSR: opcode = 0b01; break;
+       case ASR: opcode = 0b10; break;
+       case ROR: opcode = 0b11; break;
+       default:
+         LOG(FATAL) << "Unsupported thumb2 shift opcode";
+     }
+     // 32 bit.
+     int32_t encoding = B31 | B30 | B29 | B28 | B27 | B25 |
+         0xf << 12 | (setcc ? B20 : 0);
+     encoding |= static_cast<int16_t>(rn) << 16 | static_cast<int16_t>(rm) |
+         static_cast<int16_t>(rd) << 8 | opcode << 21;
+     Emit32(encoding);
+  } else {
+    uint16_t opcode = 0;
+    switch (shift) {
+      case LSL: opcode = 0b0010; break;
+      case LSR: opcode = 0b0011; break;
+      case ASR: opcode = 0b0100; break;
+      default:
+         LOG(FATAL) << "Unsupported thumb2 shift opcode";
+    }
+    int16_t encoding = B14 | opcode << 6 | static_cast<int16_t>(rm) << 3 |
+        static_cast<int16_t>(rd);
+    Emit16(encoding);
+  }
+}
+
+
 
 void Thumb2Assembler::Branch::Emit(AssemblerBuffer* buffer) const {
   bool link = type_ == kUnconditionalLinkX || type_ == kUnconditionalLink;
@@ -1172,7 +1247,7 @@
   }
 
   Register rn = ad.GetRegister();
-  if (IsHighRegister(rn) && rn != SP) {
+  if (IsHighRegister(rn) && rn != SP && rn != PC) {
     must_be_32bit = true;
   }
 
@@ -1180,87 +1255,132 @@
     must_be_32bit = true;
   }
 
-  int32_t offset = ad.GetOffset();
+  if (ad.IsImmediate()) {
+    // Immediate offset
+    int32_t offset = ad.GetOffset();
 
-  // The 16 bit SP relative instruction can only have a 10 bit offset.
-  if (rn == SP && offset > 1024) {
-    must_be_32bit = true;
-  }
-
-  if (byte) {
-    // 5 bit offset, no shift.
-    if (offset > 32) {
+    // The 16 bit SP relative instruction can only have a 10 bit offset.
+    if (rn == SP && offset > 1024) {
       must_be_32bit = true;
     }
-  } else if (half) {
-    // 6 bit offset, shifted by 1.
-    if (offset > 64) {
-      must_be_32bit = true;
-    }
-  } else {
-    // 7 bit offset, shifted by 2.
-    if (offset > 128) {
-       must_be_32bit = true;
-     }
-  }
-
-  if (must_be_32bit) {
-    int32_t encoding = B31 | B30 | B29 | B28 | B27 |
-                  (load ? B20 : 0) |
-                  (is_signed ? B24 : 0) |
-                  static_cast<uint32_t>(rd) << 12 |
-                  ad.encodingThumb(2) |
-                  (byte ? 0 : half ? B21 : B22);
-    Emit32(encoding);
-  } else {
-    // 16 bit thumb1.
-    uint8_t opA = 0;
-    bool sp_relative = false;
 
     if (byte) {
-      opA = 0b0111;
+      // 5 bit offset, no shift.
+      if (offset > 32) {
+        must_be_32bit = true;
+      }
     } else if (half) {
-      opA = 0b1000;
+      // 6 bit offset, shifted by 1.
+      if (offset > 64) {
+        must_be_32bit = true;
+      }
     } else {
-      if (rn == SP) {
-        opA = 0b1001;
-        sp_relative = true;
-      } else {
-        opA = 0b0110;
+      // 7 bit offset, shifted by 2.
+      if (offset > 128) {
+        must_be_32bit = true;
       }
     }
-    int16_t encoding = opA << 12 |
-                (load ? B11 : 0);
 
-    CHECK_GE(offset, 0);
-    if (sp_relative) {
-      // SP relative, 10 bit offset.
-      CHECK_LT(offset, 1024);
-      CHECK_EQ((offset & 0b11), 0);
-      encoding |= rd << 8 | offset >> 2;
+    if (must_be_32bit) {
+      int32_t encoding = B31 | B30 | B29 | B28 | B27 |
+          (load ? B20 : 0) |
+          (is_signed ? B24 : 0) |
+          static_cast<uint32_t>(rd) << 12 |
+          ad.encodingThumb(true) |
+          (byte ? 0 : half ? B21 : B22);
+      Emit32(encoding);
     } else {
-      // No SP relative.  The offset is shifted right depending on
-      // the size of the load/store.
-      encoding |= static_cast<uint32_t>(rd);
+      // 16 bit thumb1.
+      uint8_t opA = 0;
+      bool sp_relative = false;
 
       if (byte) {
-        // 5 bit offset, no shift.
-        CHECK_LT(offset, 32);
+        opA = 0b0111;
       } else if (half) {
-        // 6 bit offset, shifted by 1.
-        CHECK_LT(offset, 64);
-        CHECK_EQ((offset & 0b1), 0);
-        offset >>= 1;
+        opA = 0b1000;
       } else {
-        // 7 bit offset, shifted by 2.
-        CHECK_LT(offset, 128);
-        CHECK_EQ((offset & 0b11), 0);
-        offset >>= 2;
+        if (rn == SP) {
+          opA = 0b1001;
+          sp_relative = true;
+        } else {
+          opA = 0b0110;
+        }
       }
-      encoding |= rn << 3 | offset  << 6;
-    }
+      int16_t encoding = opA << 12 |
+          (load ? B11 : 0);
 
-    Emit16(encoding);
+      CHECK_GE(offset, 0);
+      if (sp_relative) {
+        // SP relative, 10 bit offset.
+        CHECK_LT(offset, 1024);
+        CHECK_EQ((offset & 0b11), 0);
+        encoding |= rd << 8 | offset >> 2;
+      } else {
+        // No SP relative.  The offset is shifted right depending on
+        // the size of the load/store.
+        encoding |= static_cast<uint32_t>(rd);
+
+        if (byte) {
+          // 5 bit offset, no shift.
+          CHECK_LT(offset, 32);
+        } else if (half) {
+          // 6 bit offset, shifted by 1.
+          CHECK_LT(offset, 64);
+          CHECK_EQ((offset & 0b1), 0);
+          offset >>= 1;
+        } else {
+          // 7 bit offset, shifted by 2.
+          CHECK_LT(offset, 128);
+          CHECK_EQ((offset & 0b11), 0);
+          offset >>= 2;
+        }
+        encoding |= rn << 3 | offset  << 6;
+      }
+
+      Emit16(encoding);
+    }
+  } else {
+    // Register shift.
+    if (ad.GetRegister() == PC) {
+       // PC relative literal encoding.
+      int32_t offset = ad.GetOffset();
+      if (must_be_32bit || offset < 0 || offset > (1 << 10) || !load) {
+        int32_t up = B23;
+        if (offset < 0) {
+          offset = -offset;
+          up = 0;
+        }
+        CHECK_LT(offset, (1 << 12));
+        int32_t encoding = 0x1f << 27 | 0xf << 16 | B22 | (load ? B20 : 0) |
+            offset | up |
+            static_cast<uint32_t>(rd) << 12;
+        Emit32(encoding);
+      } else {
+        // 16 bit literal load.
+        CHECK_GE(offset, 0);
+        CHECK_LT(offset, (1 << 10));
+        int32_t encoding = B14 | (load ? B11 : 0) | static_cast<uint32_t>(rd) << 8 | offset >> 2;
+        Emit16(encoding);
+      }
+    } else {
+      if (ad.GetShiftCount() != 0) {
+        // If there is a shift count this must be 32 bit.
+        must_be_32bit = true;
+      } else if (IsHighRegister(ad.GetRegisterOffset())) {
+        must_be_32bit = true;
+      }
+
+      if (must_be_32bit) {
+        int32_t encoding = 0x1f << 27 | B22 | (load ? B20 : 0) | static_cast<uint32_t>(rd) << 12 |
+            ad.encodingThumb(true);
+        Emit32(encoding);
+      } else {
+        // 16 bit register offset.
+        int32_t encoding = B14 | B12 | (load ? B11 : 0) | static_cast<uint32_t>(rd) |
+            ad.encodingThumb(false);
+        Emit16(encoding);
+      }
+    }
   }
 }
 
@@ -2012,37 +2132,70 @@
 
 
 void Thumb2Assembler::Lsl(Register rd, Register rm, uint32_t shift_imm,
-                          Condition cond) {
+                          bool setcc, Condition cond) {
   CHECK_NE(shift_imm, 0u);  // Do not use Lsl if no shift is wanted.
-  mov(rd, ShifterOperand(rm, LSL, shift_imm), cond);
+  CheckCondition(cond);
+  EmitShift(rd, rm, LSL, shift_imm, setcc);
 }
 
 
 void Thumb2Assembler::Lsr(Register rd, Register rm, uint32_t shift_imm,
-                          Condition cond) {
+                          bool setcc, Condition cond) {
   CHECK_NE(shift_imm, 0u);  // Do not use Lsr if no shift is wanted.
   if (shift_imm == 32) shift_imm = 0;  // Comply to UAL syntax.
-  mov(rd, ShifterOperand(rm, LSR, shift_imm), cond);
+  CheckCondition(cond);
+  EmitShift(rd, rm, LSR, shift_imm, setcc);
 }
 
 
 void Thumb2Assembler::Asr(Register rd, Register rm, uint32_t shift_imm,
-                          Condition cond) {
+                          bool setcc, Condition cond) {
   CHECK_NE(shift_imm, 0u);  // Do not use Asr if no shift is wanted.
   if (shift_imm == 32) shift_imm = 0;  // Comply to UAL syntax.
-  mov(rd, ShifterOperand(rm, ASR, shift_imm), cond);
+  CheckCondition(cond);
+  EmitShift(rd, rm, ASR, shift_imm, setcc);
 }
 
 
 void Thumb2Assembler::Ror(Register rd, Register rm, uint32_t shift_imm,
-                          Condition cond) {
+                          bool setcc, Condition cond) {
   CHECK_NE(shift_imm, 0u);  // Use Rrx instruction.
-  mov(rd, ShifterOperand(rm, ROR, shift_imm), cond);
+  CheckCondition(cond);
+  EmitShift(rd, rm, ROR, shift_imm, setcc);
 }
 
 
-void Thumb2Assembler::Rrx(Register rd, Register rm, Condition cond) {
-  mov(rd, ShifterOperand(rm, ROR, 0), cond);
+void Thumb2Assembler::Rrx(Register rd, Register rm, bool setcc, Condition cond) {
+  CheckCondition(cond);
+  EmitShift(rd, rm, RRX, rm, setcc);
+}
+
+
+void Thumb2Assembler::Lsl(Register rd, Register rm, Register rn,
+                          bool setcc, Condition cond) {
+  CheckCondition(cond);
+  EmitShift(rd, rm, LSL, rn, setcc);
+}
+
+
+void Thumb2Assembler::Lsr(Register rd, Register rm, Register rn,
+                          bool setcc, Condition cond) {
+  CheckCondition(cond);
+  EmitShift(rd, rm, LSR, rn, setcc);
+}
+
+
+void Thumb2Assembler::Asr(Register rd, Register rm, Register rn,
+                          bool setcc, Condition cond) {
+  CheckCondition(cond);
+  EmitShift(rd, rm, ASR, rn, setcc);
+}
+
+
+void Thumb2Assembler::Ror(Register rd, Register rm, Register rn,
+                          bool setcc, Condition cond) {
+  CheckCondition(cond);
+  EmitShift(rd, rm, ROR, rn, setcc);
 }
 
 
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index 60b9384..5f24e4e 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -221,11 +221,25 @@
   void blx(Register rm, Condition cond = AL) OVERRIDE;
   void bx(Register rm, Condition cond = AL) OVERRIDE;
 
-  void Lsl(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL);
-  void Lsr(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL);
-  void Asr(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL);
-  void Ror(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL);
-  void Rrx(Register rd, Register rm, Condition cond = AL);
+  void Lsl(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
+           Condition cond = AL) OVERRIDE;
+  void Lsr(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
+           Condition cond = AL) OVERRIDE;
+  void Asr(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
+           Condition cond = AL) OVERRIDE;
+  void Ror(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
+           Condition cond = AL) OVERRIDE;
+  void Rrx(Register rd, Register rm, bool setcc = false,
+           Condition cond = AL) OVERRIDE;
+
+  void Lsl(Register rd, Register rm, Register rn, bool setcc = false,
+           Condition cond = AL) OVERRIDE;
+  void Lsr(Register rd, Register rm, Register rn, bool setcc = false,
+           Condition cond = AL) OVERRIDE;
+  void Asr(Register rd, Register rm, Register rn, bool setcc = false,
+           Condition cond = AL) OVERRIDE;
+  void Ror(Register rd, Register rm, Register rn, bool setcc = false,
+           Condition cond = AL) OVERRIDE;
 
   void Push(Register rd, Condition cond = AL) OVERRIDE;
   void Pop(Register rd, Condition cond = AL) OVERRIDE;
@@ -395,14 +409,8 @@
   static int DecodeBranchOffset(int32_t inst);
   int32_t EncodeTstOffset(int offset, int32_t inst);
   int DecodeTstOffset(int32_t inst);
-
-  bool IsLowRegister(Register r) {
-    return r < R8;
-  }
-
-  bool IsHighRegister(Register r) {
-     return r >= R8;
-  }
+  void EmitShift(Register rd, Register rm, Shift shift, uint8_t amount, bool setcc = false);
+  void EmitShift(Register rd, Register rn, Shift shift, Register rm, bool setcc = false);
 
   bool force_32bit_;      // Force the assembler to use 32 bit thumb2 instructions.
 
diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
index 55fbed1..1e3e569 100644
--- a/compiler/utils/assembler_thumb_test.cc
+++ b/compiler/utils/assembler_thumb_test.cc
@@ -28,6 +28,15 @@
 #include "assembler_thumb_test_expected.cc.inc"
 
 #ifndef HAVE_ANDROID_OS
+// This controls whether the results are printed to the
+// screen or compared against the expected output.
+// To generate new expected output, set this to true and
+// copy the output into the .cc.inc file in the form
+// of the other results.
+//
+// When this is false, the results are not printed to the
+// output, but are compared against the expected results
+// in the .cc.inc file.
 static constexpr bool kPrintResults = false;
 #endif
 
@@ -38,6 +47,19 @@
   }
 }
 
+int CompareIgnoringSpace(const char* s1, const char* s2) {
+  while (*s1 != '\0') {
+    while (isspace(*s1)) ++s1;
+    while (isspace(*s2)) ++s2;
+    if (*s1 == '\0' || *s1 != *s2) {
+      break;
+    }
+    ++s1;
+    ++s2;
+  }
+  return *s1 - *s2;
+}
+
 std::string GetAndroidToolsDir() {
   std::string root;
   const char* android_build_top = getenv("ANDROID_BUILD_TOP");
@@ -180,7 +202,10 @@
       if (s == nullptr) {
         break;
       }
-      ASSERT_EQ(strcmp(results->second[lineindex], testline), 0);
+      if (CompareIgnoringSpace(results->second[lineindex], testline) != 0) {
+        LOG(FATAL) << "Output is not as expected at line: " << lineindex
+          << results->second[lineindex] << "/" << testline;
+      }
       ++lineindex;
     }
     // Check that we are at the end.
@@ -1222,6 +1247,117 @@
   delete assembler;
 }
 
+TEST(Thumb2AssemblerTest, Shifts) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  // 16 bit
+  __ Lsl(R0, R1, 5);
+  __ Lsr(R0, R1, 5);
+  __ Asr(R0, R1, 5);
+
+  __ Lsl(R0, R0, R1);
+  __ Lsr(R0, R0, R1);
+  __ Asr(R0, R0, R1);
+
+  // 32 bit due to high registers.
+  __ Lsl(R8, R1, 5);
+  __ Lsr(R0, R8, 5);
+  __ Asr(R8, R1, 5);
+  __ Ror(R0, R8, 5);
+
+  // 32 bit due to different Rd and Rn.
+  __ Lsl(R0, R1, R2);
+  __ Lsr(R0, R1, R2);
+  __ Asr(R0, R1, R2);
+  __ Ror(R0, R1, R2);
+
+  // 32 bit due to use of high registers.
+  __ Lsl(R8, R1, R2);
+  __ Lsr(R0, R8, R2);
+  __ Asr(R0, R1, R8);
+
+  // S bit (all 32 bit)
+
+  // 32 bit due to high registers.
+  __ Lsl(R8, R1, 5, true);
+  __ Lsr(R0, R8, 5, true);
+  __ Asr(R8, R1, 5, true);
+  __ Ror(R0, R8, 5, true);
+
+  // 32 bit due to different Rd and Rn.
+  __ Lsl(R0, R1, R2, true);
+  __ Lsr(R0, R1, R2, true);
+  __ Asr(R0, R1, R2, true);
+  __ Ror(R0, R1, R2, true);
+
+  // 32 bit due to use of high registers.
+  __ Lsl(R8, R1, R2, true);
+  __ Lsr(R0, R8, R2, true);
+  __ Asr(R0, R1, R8, true);
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "Shifts");
+  delete assembler;
+}
+
+TEST(Thumb2AssemblerTest, LoadStoreRegOffset) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  // 16 bit.
+  __ ldr(R0, Address(R1, R2));
+  __ str(R0, Address(R1, R2));
+
+  // 32 bit due to shift.
+  __ ldr(R0, Address(R1, R2, LSL, 1));
+  __ str(R0, Address(R1, R2, LSL, 1));
+
+  __ ldr(R0, Address(R1, R2, LSL, 3));
+  __ str(R0, Address(R1, R2, LSL, 3));
+
+  // 32 bit due to high register use.
+  __ ldr(R8, Address(R1, R2));
+  __ str(R8, Address(R1, R2));
+
+  __ ldr(R1, Address(R8, R2));
+  __ str(R2, Address(R8, R2));
+
+  __ ldr(R0, Address(R1, R8));
+  __ str(R0, Address(R1, R8));
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "LoadStoreRegOffset");
+  delete assembler;
+}
+
+TEST(Thumb2AssemblerTest, LoadStoreLiteral) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  __ ldr(R0, Address(4));
+  __ str(R0, Address(4));
+
+  __ ldr(R0, Address(-8));
+  __ str(R0, Address(-8));
+
+  // Limits.
+  __ ldr(R0, Address(0x3ff));       // 10 bits (16 bit).
+  __ ldr(R0, Address(0x7ff));       // 11 bits (32 bit).
+  __ str(R0, Address(0x3ff));       // 32 bit (no 16 bit str(literal)).
+  __ str(R0, Address(0x7ff));       // 11 bits (32 bit).
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "LoadStoreLiteral");
+  delete assembler;
+}
+
 #undef __
 }  // namespace arm
 }  // namespace art
diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
index c5f2226..c2e7fe8 100644
--- a/compiler/utils/assembler_thumb_test_expected.cc.inc
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc
@@ -4742,6 +4742,63 @@
   " 80a:	0011      	movs	r1, r2\n",
   nullptr
 };
+const char* ShiftsResults[] = {
+  "   0:	0148      	lsls	r0, r1, #5\n",
+  "   2:	0948      	lsrs	r0, r1, #5\n",
+  "   4:	1148      	asrs	r0, r1, #5\n",
+  "   6:	4088      	lsls	r0, r1\n",
+  "   8:	40c8      	lsrs	r0, r1\n",
+  "   a:	4108      	asrs	r0, r1\n",
+  "   c:	ea4f 1841 	mov.w	r8, r1, lsl #5\n",
+  "  10:	ea4f 1058 	mov.w	r0, r8, lsr #5\n",
+  "  14:	ea4f 1861 	mov.w	r8, r1, asr #5\n",
+  "  18:	ea4f 1078 	mov.w	r0, r8, ror #5\n",
+  "  1c:	fa01 f002 	lsl.w	r0, r1, r2\n",
+  "  20:	fa21 f002 	lsr.w	r0, r1, r2\n",
+  "  24:	fa41 f002 	asr.w	r0, r1, r2\n",
+  "  28:	fa61 f002 	ror.w	r0, r1, r2\n",
+  "  2c:	fa01 f802 	lsl.w	r8, r1, r2\n",
+  "  30:	fa28 f002 	lsr.w	r0, r8, r2\n",
+  "  34:	fa41 f008 	asr.w	r0, r1, r8\n",
+  "  38:	ea5f 1841 	movs.w	r8, r1, lsl #5\n",
+  "  3c:	ea5f 1058 	movs.w	r0, r8, lsr #5\n",
+  "  40:	ea5f 1861 	movs.w	r8, r1, asr #5\n",
+  "  44:	ea5f 1078 	movs.w	r0, r8, ror #5\n",
+  "  48:	fa11 f002 	lsls.w	r0, r1, r2\n",
+  "  4c:	fa31 f002 	lsrs.w	r0, r1, r2\n",
+  "  50:	fa51 f002 	asrs.w	r0, r1, r2\n",
+  "  54:	fa71 f002 	rors.w	r0, r1, r2\n",
+  "  58:	fa11 f802 	lsls.w	r8, r1, r2\n",
+  "  5c:	fa38 f002 	lsrs.w	r0, r8, r2\n",
+  "  60:	fa51 f008 	asrs.w	r0, r1, r8\n",
+  nullptr
+};
+const char* LoadStoreRegOffsetResults[] = {
+  "   0:	5888      	ldr	r0, [r1, r2]\n",
+  "   2:	5088      	str	r0, [r1, r2]\n",
+  "   4:	f851 0012 	ldr.w	r0, [r1, r2, lsl #1]\n",
+  "   8:	f841 0012 	str.w	r0, [r1, r2, lsl #1]\n",
+  "   c:	f851 0032 	ldr.w	r0, [r1, r2, lsl #3]\n",
+  "  10:	f841 0032 	str.w	r0, [r1, r2, lsl #3]\n",
+  "  14:	f851 8002 	ldr.w	r8, [r1, r2]\n",
+  "  18:	f841 8002 	str.w	r8, [r1, r2]\n",
+  "  1c:	f858 1002 	ldr.w	r1, [r8, r2]\n",
+  "  20:	f848 2002 	str.w	r2, [r8, r2]\n",
+  "  24:	f851 0008 	ldr.w	r0, [r1, r8]\n",
+  "  28:	f841 0008 	str.w	r0, [r1, r8]\n",
+  nullptr
+};
+const char* LoadStoreLiteralResults[] = {
+  "   0:   4801            ldr     r0, [pc, #4]    ; (8 <LoadStoreLiteral+0x8>)\n",
+  "   2:   f8cf 0004       str.w   r0, [pc, #4]    ; 8 <LoadStoreLiteral+0x8>\n",
+  "   6:   f85f 0008       ldr.w   r0, [pc, #-8]   ; 0 <LoadStoreLiteral>\n",
+  "   a:   f84f 0008       str.w   r0, [pc, #-8]   ; 4 <LoadStoreLiteral+0x4>\n",
+  "   e:   48ff            ldr     r0, [pc, #1020] ; (40c <LoadStoreLiteral+0x40c>)\n",
+  "  10:   f8df 07ff       ldr.w   r0, [pc, #2047] ; 813 <LoadStoreLiteral+0x813>\n",
+  "  14:   f8cf 03ff       str.w   r0, [pc, #1023] ; 417 <LoadStoreLiteral+0x417>\n",
+  "  18:   f8cf 07ff       str.w   r0, [pc, #2047] ; 81b <LoadStoreLiteral+0x81b>\n",
+  nullptr
+};
 std::map<std::string, const char**> test_results;
 void setup_results() {
     test_results["SimpleMov"] = SimpleMovResults;
@@ -4785,4 +4842,7 @@
     test_results["CompareAndBranchRelocation16"] = CompareAndBranchRelocation16Results;
     test_results["CompareAndBranchRelocation32"] = CompareAndBranchRelocation32Results;
     test_results["MixedBranch32"] = MixedBranch32Results;
+    test_results["Shifts"] = ShiftsResults;
+    test_results["LoadStoreRegOffset"] = LoadStoreRegOffsetResults;
+    test_results["LoadStoreLiteral"] = LoadStoreLiteralResults;
 }
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index d368e41..e5bc7c8 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -155,7 +155,7 @@
     }
   }
   *error_msg = StringPrintf("Expected valid zip or dex file: '%s'", filename);
-  return nullptr;
+  return false;
 }
 
 int DexFile::GetPermissions() const {
diff --git a/runtime/native/java_lang_System.cc b/runtime/native/java_lang_System.cc
index 6bbe642..ee99e78 100644
--- a/runtime/native/java_lang_System.cc
+++ b/runtime/native/java_lang_System.cc
@@ -147,23 +147,73 @@
   dstObjArray->AssignableCheckingMemcpy(dstPos, srcObjArray, srcPos, count, true);
 }
 
-static void System_arraycopyCharUnchecked(JNIEnv* env, jclass, jobject javaSrc, jint srcPos,
-                                          jobject javaDst, jint dstPos, jint count) {
+// Template to convert general array to that of its specific primitive type.
+template <typename T>
+inline T* AsPrimitiveArray(mirror::Array* array) {
+  return down_cast<T*>(array);
+}
+
+template <typename T, Primitive::Type kPrimType>
+inline void System_arraycopyTUnchecked(JNIEnv* env, jobject javaSrc, jint srcPos,
+                                       jobject javaDst, jint dstPos, jint count) {
   ScopedFastNativeObjectAccess soa(env);
   mirror::Object* srcObject = soa.Decode<mirror::Object*>(javaSrc);
   mirror::Object* dstObject = soa.Decode<mirror::Object*>(javaDst);
-  DCHECK(srcObject != nullptr);
   DCHECK(dstObject != nullptr);
   mirror::Array* srcArray = srcObject->AsArray();
   mirror::Array* dstArray = dstObject->AsArray();
-  DCHECK_GE(srcPos, 0);
-  DCHECK_GE(dstPos, 0);
   DCHECK_GE(count, 0);
-  DCHECK_LE(srcPos + count, srcArray->GetLength());
-  DCHECK_LE(dstPos + count, dstArray->GetLength());
   DCHECK_EQ(srcArray->GetClass(), dstArray->GetClass());
-  DCHECK_EQ(srcArray->GetClass()->GetComponentType()->GetPrimitiveType(), Primitive::kPrimChar);
-  dstArray->AsCharArray()->Memmove(dstPos, srcArray->AsCharArray(), srcPos, count);
+  DCHECK_EQ(srcArray->GetClass()->GetComponentType()->GetPrimitiveType(), kPrimType);
+  AsPrimitiveArray<T>(dstArray)->Memmove(dstPos, AsPrimitiveArray<T>(srcArray), srcPos, count);
+}
+
+static void System_arraycopyCharUnchecked(JNIEnv* env, jclass, jobject javaSrc, jint srcPos,
+                                          jobject javaDst, jint dstPos, jint count) {
+  System_arraycopyTUnchecked<mirror::CharArray, Primitive::kPrimChar>(env, javaSrc, srcPos,
+      javaDst, dstPos, count);
+}
+
+static void System_arraycopyByteUnchecked(JNIEnv* env, jclass, jobject javaSrc, jint srcPos,
+                                          jobject javaDst, jint dstPos, jint count) {
+  System_arraycopyTUnchecked<mirror::ByteArray, Primitive::kPrimByte>(env, javaSrc, srcPos,
+      javaDst, dstPos, count);
+}
+
+static void System_arraycopyShortUnchecked(JNIEnv* env, jclass, jobject javaSrc, jint srcPos,
+                                           jobject javaDst, jint dstPos, jint count) {
+  System_arraycopyTUnchecked<mirror::ShortArray, Primitive::kPrimShort>(env, javaSrc, srcPos,
+      javaDst, dstPos, count);
+}
+
+static void System_arraycopyIntUnchecked(JNIEnv* env, jclass, jobject javaSrc, jint srcPos,
+                                         jobject javaDst, jint dstPos, jint count) {
+  System_arraycopyTUnchecked<mirror::IntArray, Primitive::kPrimInt>(env, javaSrc, srcPos,
+      javaDst, dstPos, count);
+}
+
+static void System_arraycopyLongUnchecked(JNIEnv* env, jclass, jobject javaSrc, jint srcPos,
+                                          jobject javaDst, jint dstPos, jint count) {
+  System_arraycopyTUnchecked<mirror::LongArray, Primitive::kPrimLong>(env, javaSrc, srcPos,
+      javaDst, dstPos, count);
+}
+
+static void System_arraycopyFloatUnchecked(JNIEnv* env, jclass, jobject javaSrc, jint srcPos,
+                                           jobject javaDst, jint dstPos, jint count) {
+  System_arraycopyTUnchecked<mirror::FloatArray, Primitive::kPrimFloat>(env, javaSrc, srcPos,
+      javaDst, dstPos, count);
+}
+
+static void System_arraycopyDoubleUnchecked(JNIEnv* env, jclass, jobject javaSrc, jint srcPos,
+                                            jobject javaDst, jint dstPos, jint count) {
+  System_arraycopyTUnchecked<mirror::DoubleArray, Primitive::kPrimDouble>(env, javaSrc, srcPos,
+      javaDst, dstPos, count);
+}
+
+static void System_arraycopyBooleanUnchecked(JNIEnv* env, jclass, jobject javaSrc, jint srcPos,
+                                             jobject javaDst, jint dstPos, jint count) {
+  System_arraycopyTUnchecked<mirror::BooleanArray, Primitive::kPrimBoolean>(env, javaSrc, srcPos,
+      javaDst, dstPos, count);
 }
 
 static jint System_identityHashCode(JNIEnv* env, jclass, jobject javaObject) {
@@ -178,6 +228,13 @@
 static JNINativeMethod gMethods[] = {
   NATIVE_METHOD(System, arraycopy, "!(Ljava/lang/Object;ILjava/lang/Object;II)V"),
   NATIVE_METHOD(System, arraycopyCharUnchecked, "!([CI[CII)V"),
+  NATIVE_METHOD(System, arraycopyByteUnchecked, "!([BI[BII)V"),
+  NATIVE_METHOD(System, arraycopyShortUnchecked, "!([SI[SII)V"),
+  NATIVE_METHOD(System, arraycopyIntUnchecked, "!([II[III)V"),
+  NATIVE_METHOD(System, arraycopyLongUnchecked, "!([JI[JII)V"),
+  NATIVE_METHOD(System, arraycopyFloatUnchecked, "!([FI[FII)V"),
+  NATIVE_METHOD(System, arraycopyDoubleUnchecked, "!([DI[DII)V"),
+  NATIVE_METHOD(System, arraycopyBooleanUnchecked, "!([ZI[ZII)V"),
   NATIVE_METHOD(System, identityHashCode, "!(Ljava/lang/Object;)I"),
 };
 
diff --git a/runtime/utils.cc b/runtime/utils.cc
index c52549e..d038571 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -534,15 +534,17 @@
     return StringPrintf("%" PRIu64 "%s", whole_part, unit);
   } else {
     static constexpr size_t kMaxDigits = 30;
+    size_t avail_digits = kMaxDigits;
     char fraction_buffer[kMaxDigits];
     char* ptr = fraction_buffer;
     uint64_t multiplier = 10;
     // This infinite loops if fractional part is 0.
-    while (fractional_part * multiplier < divisor) {
+    while (avail_digits > 1 && fractional_part * multiplier < divisor) {
       multiplier *= 10;
       *ptr++ = '0';
+      avail_digits--;
     }
-    sprintf(ptr, "%" PRIu64, fractional_part);
+    snprintf(ptr, avail_digits, "%" PRIu64, fractional_part);
     fraction_buffer[std::min(kMaxDigits - 1, max_fraction_digits)] = '\0';
     return StringPrintf("%" PRIu64 ".%s%s", whole_part, fraction_buffer, unit);
   }
diff --git a/test/Android.oat.mk b/test/Android.oat.mk
index 3cf9f61..b11efb4 100644
--- a/test/Android.oat.mk
+++ b/test/Android.oat.mk
@@ -118,9 +118,6 @@
   $(call define-test-art-oat-rule-target,$(1),$(2),$$(optimizing_test_rule), \
     -Xcompiler-option --compiler-backend=Optimizing)
 
-  # Mark all tests with the optimizing compiler broken. TODO: fix.
-  ART_TEST_KNOWN_BROKEN += $$(optimizing_test_rule)
-
   ART_TEST_TARGET_OAT_OPTIMIZING$$($(2)ART_PHONY_TEST_TARGET_SUFFIX)_RULES += $$(optimizing_test_rule)
   ART_TEST_TARGET_OAT_OPTIMIZING_RULES += $$(optimizing_test_rule)
   ART_TEST_TARGET_OAT_OPTIMIZING_$(1)_RULES += $$(optimizing_test_rule)
@@ -234,9 +231,6 @@
   optimizing_test_rule := test-art-host-oat-optimizing-$(1)$$($(2)ART_PHONY_TEST_HOST_SUFFIX)
   $(call define-test-art-oat-rule-host,$(1),$(2),$$(optimizing_test_rule),--compiler-backend=Optimizing,)
 
-  # Mark all tests with the optimizing compiler broken. TODO: fix.
-  ART_TEST_KNOWN_BROKEN += $$(optimizing_test_rule)
-
   ART_TEST_HOST_OAT_OPTIMIZING$$($(2)ART_PHONY_TEST_HOST_SUFFIX)_RULES += $$(optimizing_test_rule)
   ART_TEST_HOST_OAT_OPTIMIZING_RULES += $$(optimizing_test_rule)
   ART_TEST_HOST_OAT_OPTIMIZING_$(1)_RULES += $$(optimizing_test_rule)
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index c2ff98f..13e967c 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -177,11 +177,6 @@
 	  echo "run-test run as top-level target, removing test directory $(ART_HOST_TEST_DIR)" && \
 	  rm -r $(ART_HOST_TEST_DIR)) || true
 
-  # Mark all tests with the optimizing compiler broken. TODO: fix.
-  ifeq ($(3),optimizing)
-    ART_TEST_KNOWN_BROKEN += $$(run_test_rule_name)
-  endif
-
   ART_TEST_$$(uc_host_or_target)_RUN_TEST_$$(uc_compiler)$(4)_RULES += $$(run_test_rule_name)
   ART_TEST_$$(uc_host_or_target)_RUN_TEST_$$(uc_compiler)_RULES += $$(run_test_rule_name)
   ART_TEST_$$(uc_host_or_target)_RUN_TEST_$$(uc_compiler)_$(1)_RULES += $$(run_test_rule_name)