ARM: Implement Reverse bits and bytes intrinsic.

 - IntegerReverse
 - LongReverse
 - IntegerReverseBytes
 - LongReverseBytes
 - ShortReverseBytes

Change-Id: I3ec202696b245148a0237ff6e46ac3f1a3f8402a
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index ea8669f..8cbdcbb 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -1825,6 +1825,90 @@
   GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickNextAfter);
 }
 
+void IntrinsicLocationsBuilderARM::VisitIntegerReverse(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitIntegerReverse(HInvoke* invoke) {
+  ArmAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register out = locations->Out().AsRegister<Register>();
+  Register in  = locations->InAt(0).AsRegister<Register>();
+
+  __ rbit(out, in);
+}
+
+void IntrinsicLocationsBuilderARM::VisitLongReverse(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorARM::VisitLongReverse(HInvoke* invoke) {
+  ArmAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register in_reg_lo  = locations->InAt(0).AsRegisterPairLow<Register>();
+  Register in_reg_hi  = locations->InAt(0).AsRegisterPairHigh<Register>();
+  Register out_reg_lo = locations->Out().AsRegisterPairLow<Register>();
+  Register out_reg_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+  __ rbit(out_reg_lo, in_reg_hi);
+  __ rbit(out_reg_hi, in_reg_lo);
+}
+
+void IntrinsicLocationsBuilderARM::VisitIntegerReverseBytes(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitIntegerReverseBytes(HInvoke* invoke) {
+  ArmAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register out = locations->Out().AsRegister<Register>();
+  Register in  = locations->InAt(0).AsRegister<Register>();
+
+  __ rev(out, in);
+}
+
+void IntrinsicLocationsBuilderARM::VisitLongReverseBytes(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorARM::VisitLongReverseBytes(HInvoke* invoke) {
+  ArmAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register in_reg_lo  = locations->InAt(0).AsRegisterPairLow<Register>();
+  Register in_reg_hi  = locations->InAt(0).AsRegisterPairHigh<Register>();
+  Register out_reg_lo = locations->Out().AsRegisterPairLow<Register>();
+  Register out_reg_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+  __ rev(out_reg_lo, in_reg_hi);
+  __ rev(out_reg_hi, in_reg_lo);
+}
+
+void IntrinsicLocationsBuilderARM::VisitShortReverseBytes(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitShortReverseBytes(HInvoke* invoke) {
+  ArmAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register out = locations->Out().AsRegister<Register>();
+  Register in  = locations->InAt(0).AsRegister<Register>();
+
+  __ revsh(out, in);
+}
+
 // Unimplemented intrinsics.
 
 #define UNIMPLEMENTED_INTRINSIC(Name)                                                  \
@@ -1834,12 +1918,7 @@
 }
 
 UNIMPLEMENTED_INTRINSIC(IntegerBitCount)
-UNIMPLEMENTED_INTRINSIC(IntegerReverse)
-UNIMPLEMENTED_INTRINSIC(IntegerReverseBytes)
 UNIMPLEMENTED_INTRINSIC(LongBitCount)
-UNIMPLEMENTED_INTRINSIC(LongReverse)
-UNIMPLEMENTED_INTRINSIC(LongReverseBytes)
-UNIMPLEMENTED_INTRINSIC(ShortReverseBytes)
 UNIMPLEMENTED_INTRINSIC(MathMinDoubleDouble)
 UNIMPLEMENTED_INTRINSIC(MathMinFloatFloat)
 UNIMPLEMENTED_INTRINSIC(MathMaxDoubleDouble)
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index f96376d..a894565 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -545,6 +545,9 @@
   virtual void movw(Register rd, uint16_t imm16, Condition cond = AL) = 0;
   virtual void movt(Register rd, uint16_t imm16, Condition cond = AL) = 0;
   virtual void rbit(Register rd, Register rm, Condition cond = AL) = 0;
+  virtual void rev(Register rd, Register rm, Condition cond = AL) = 0;
+  virtual void rev16(Register rd, Register rm, Condition cond = AL) = 0;
+  virtual void revsh(Register rd, Register rm, Condition cond = AL) = 0;
 
   // Multiply instructions.
   virtual void mul(Register rd, Register rn, Register rm, Condition cond = AL) = 0;
diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc
index ebca25b..0a227b2 100644
--- a/compiler/utils/arm/assembler_arm32.cc
+++ b/compiler/utils/arm/assembler_arm32.cc
@@ -750,6 +750,35 @@
 }
 
 
+void Arm32Assembler::EmitMiscellaneous(Condition cond, uint8_t op1,
+                                       uint8_t op2, uint32_t a_part,
+                                       uint32_t rest) {
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                      B26 | B25 | B23 |
+                      (op1 << 20) |
+                      (a_part << 16) |
+                      (op2 << 5) |
+                      B4 |
+                      rest;
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::EmitReverseBytes(Register rd, Register rm, Condition cond,
+                                      uint8_t op1, uint8_t op2) {
+  CHECK_NE(rd, kNoRegister);
+  CHECK_NE(rm, kNoRegister);
+  CHECK_NE(cond, kNoCondition);
+  CHECK_NE(rd, PC);
+  CHECK_NE(rm, PC);
+
+  int32_t encoding = (static_cast<int32_t>(rd) << kRdShift) |
+                     (0b1111 << 8) |
+                     static_cast<int32_t>(rm);
+  EmitMiscellaneous(cond, op1, op2, 0b1111, encoding);
+}
+
+
 void Arm32Assembler::rbit(Register rd, Register rm, Condition cond) {
   CHECK_NE(rd, kNoRegister);
   CHECK_NE(rm, kNoRegister);
@@ -764,6 +793,21 @@
 }
 
 
+void Arm32Assembler::rev(Register rd, Register rm, Condition cond) {
+  EmitReverseBytes(rd, rm, cond, 0b011, 0b001);
+}
+
+
+void Arm32Assembler::rev16(Register rd, Register rm, Condition cond) {
+  EmitReverseBytes(rd, rm, cond, 0b011, 0b101);
+}
+
+
+void Arm32Assembler::revsh(Register rd, Register rm, Condition cond) {
+  EmitReverseBytes(rd, rm, cond, 0b111, 0b101);
+}
+
+
 void Arm32Assembler::EmitMulOp(Condition cond, int32_t opcode,
                                Register rd, Register rn,
                                Register rm, Register rs) {
diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h
index bf332fe..e3e05ca 100644
--- a/compiler/utils/arm/assembler_arm32.h
+++ b/compiler/utils/arm/assembler_arm32.h
@@ -91,6 +91,9 @@
   void movw(Register rd, uint16_t imm16, Condition cond = AL) OVERRIDE;
   void movt(Register rd, uint16_t imm16, Condition cond = AL) OVERRIDE;
   void rbit(Register rd, Register rm, Condition cond = AL) OVERRIDE;
+  void rev(Register rd, Register rm, Condition cond = AL) OVERRIDE;
+  void rev16(Register rd, Register rm, Condition cond = AL) OVERRIDE;
+  void revsh(Register rd, Register rm, Condition cond = AL) OVERRIDE;
 
   // Multiply instructions.
   void mul(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE;
@@ -388,6 +391,11 @@
 
   void EmitVPushPop(uint32_t reg, int nregs, bool push, bool dbl, Condition cond);
 
+  void EmitMiscellaneous(Condition cond, uint8_t op1, uint8_t op2,
+                         uint32_t a_part, uint32_t rest);
+  void EmitReverseBytes(Register rd, Register rm, Condition cond,
+                        uint8_t op1, uint8_t op2);
+
   void EmitBranch(Condition cond, Label* label, bool link);
   static int32_t EncodeBranchOffset(int offset, int32_t inst);
   static int DecodeBranchOffset(int32_t inst);
diff --git a/compiler/utils/arm/assembler_arm32_test.cc b/compiler/utils/arm/assembler_arm32_test.cc
index 4380596..e570e22 100644
--- a/compiler/utils/arm/assembler_arm32_test.cc
+++ b/compiler/utils/arm/assembler_arm32_test.cc
@@ -887,4 +887,16 @@
   T3Helper(&arm::Arm32Assembler::rbit, true, "rbit{cond} {reg1}, {reg2}", "rbit");
 }
 
+TEST_F(AssemblerArm32Test, rev) {
+  T3Helper(&arm::Arm32Assembler::rev, true, "rev{cond} {reg1}, {reg2}", "rev");
+}
+
+TEST_F(AssemblerArm32Test, rev16) {
+  T3Helper(&arm::Arm32Assembler::rev16, true, "rev16{cond} {reg1}, {reg2}", "rev16");
+}
+
+TEST_F(AssemblerArm32Test, revsh) {
+  T3Helper(&arm::Arm32Assembler::revsh, true, "revsh{cond} {reg1}, {reg2}", "revsh");
+}
+
 }  // namespace art
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index 52023a6..15298b3 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -2569,20 +2569,36 @@
 }
 
 
+void Thumb2Assembler::Emit32Miscellaneous(uint8_t op1,
+                                          uint8_t op2,
+                                          uint32_t rest_encoding) {
+  int32_t encoding = B31 | B30 | B29 | B28 | B27 | B25 | B23 |
+      op1 << 20 |
+      0xf << 12 |
+      B7 |
+      op2 << 4 |
+      rest_encoding;
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::Emit16Miscellaneous(uint32_t rest_encoding) {
+  int16_t encoding = B15 | B13 | B12 |
+      rest_encoding;
+  Emit16(encoding);
+}
+
 void Thumb2Assembler::clz(Register rd, Register rm, Condition cond) {
   CHECK_NE(rd, kNoRegister);
   CHECK_NE(rm, kNoRegister);
   CheckCondition(cond);
   CHECK_NE(rd, PC);
   CHECK_NE(rm, PC);
-  int32_t encoding = B31 | B30 | B29 | B28 | B27 |
-      B25 | B23 | B21 | B20 |
+  int32_t encoding =
       static_cast<uint32_t>(rm) << 16 |
-      0xf << 12 |
       static_cast<uint32_t>(rd) << 8 |
-      B7 |
       static_cast<uint32_t>(rm);
-  Emit32(encoding);
+  Emit32Miscellaneous(0b11, 0b00, encoding);
 }
 
 
@@ -2630,14 +2646,55 @@
   CHECK_NE(rm, PC);
   CHECK_NE(rd, SP);
   CHECK_NE(rm, SP);
-  int32_t encoding = B31 | B30 | B29 | B28 | B27 |
-      B25 | B23 | B20 |
+  int32_t encoding =
       static_cast<uint32_t>(rm) << 16 |
-      0xf << 12 |
       static_cast<uint32_t>(rd) << 8 |
-      B7 | B5 |
       static_cast<uint32_t>(rm);
-  Emit32(encoding);
+
+  Emit32Miscellaneous(0b01, 0b10, encoding);
+}
+
+
+void Thumb2Assembler::EmitReverseBytes(Register rd, Register rm,
+                                       uint32_t op) {
+  CHECK_NE(rd, kNoRegister);
+  CHECK_NE(rm, kNoRegister);
+  CHECK_NE(rd, PC);
+  CHECK_NE(rm, PC);
+  CHECK_NE(rd, SP);
+  CHECK_NE(rm, SP);
+
+  if (!IsHighRegister(rd) && !IsHighRegister(rm) && !force_32bit_) {
+    uint16_t t1_op = B11 | B9 | (op << 6);
+    int16_t encoding = t1_op |
+        static_cast<uint16_t>(rm) << 3 |
+        static_cast<uint16_t>(rd);
+    Emit16Miscellaneous(encoding);
+  } else {
+    int32_t encoding =
+        static_cast<uint32_t>(rm) << 16 |
+        static_cast<uint32_t>(rd) << 8 |
+        static_cast<uint32_t>(rm);
+    Emit32Miscellaneous(0b01, op, encoding);
+  }
+}
+
+
+void Thumb2Assembler::rev(Register rd, Register rm, Condition cond) {
+  CheckCondition(cond);
+  EmitReverseBytes(rd, rm, 0b00);
+}
+
+
+void Thumb2Assembler::rev16(Register rd, Register rm, Condition cond) {
+  CheckCondition(cond);
+  EmitReverseBytes(rd, rm, 0b01);
+}
+
+
+void Thumb2Assembler::revsh(Register rd, Register rm, Condition cond) {
+  CheckCondition(cond);
+  EmitReverseBytes(rd, rm, 0b11);
 }
 
 
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index bf07b2d..6b61aca 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -117,6 +117,9 @@
   void movw(Register rd, uint16_t imm16, Condition cond = AL) OVERRIDE;
   void movt(Register rd, uint16_t imm16, Condition cond = AL) OVERRIDE;
   void rbit(Register rd, Register rm, Condition cond = AL) OVERRIDE;
+  void rev(Register rd, Register rm, Condition cond = AL) OVERRIDE;
+  void rev16(Register rd, Register rm, Condition cond = AL) OVERRIDE;
+  void revsh(Register rd, Register rm, Condition cond = AL) OVERRIDE;
 
   // Multiply instructions.
   void mul(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE;
@@ -644,6 +647,17 @@
                           Register rd,
                           const ShifterOperand& so);
 
+  // Emit a single 32 bit miscellaneous instruction.
+  void Emit32Miscellaneous(uint8_t op1,
+                           uint8_t op2,
+                           uint32_t rest_encoding);
+
+  // Emit reverse byte instructions: rev, rev16, revsh.
+  void EmitReverseBytes(Register rd, Register rm, uint32_t op);
+
+  // Emit a single 16 bit miscellaneous instruction.
+  void Emit16Miscellaneous(uint32_t rest_encoding);
+
   // Must the instruction be 32 bits or can it possibly be encoded
   // in 16 bits?
   bool Is32BitDataProcessing(Condition cond,
diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc
index 7b32b0f..650b089 100644
--- a/compiler/utils/arm/assembler_thumb2_test.cc
+++ b/compiler/utils/arm/assembler_thumb2_test.cc
@@ -1331,4 +1331,28 @@
   DriverStr(expected, "rbit");
 }
 
+TEST_F(AssemblerThumb2Test, rev) {
+  __ rev(arm::R1, arm::R0);
+
+  const char* expected = "rev r1, r0\n";
+
+  DriverStr(expected, "rev");
+}
+
+TEST_F(AssemblerThumb2Test, rev16) {
+  __ rev16(arm::R1, arm::R0);
+
+  const char* expected = "rev16 r1, r0\n";
+
+  DriverStr(expected, "rev16");
+}
+
+TEST_F(AssemblerThumb2Test, revsh) {
+  __ revsh(arm::R1, arm::R0);
+
+  const char* expected = "revsh r1, r0\n";
+
+  DriverStr(expected, "revsh");
+}
+
 }  // namespace art