[optimizing compiler] Add shifts

Added SHL, SHR, USHR for arm, x86, x86_64.

Change-Id: I971f594e270179457e6958acf1401ff7630df07e
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index be8631a..b261460 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -305,6 +305,15 @@
 }
 
 template<typename T>
+void HGraphBuilder::Binop_23x_shift(const Instruction& instruction,
+                                    Primitive::Type type) {
+  HInstruction* first = LoadLocal(instruction.VRegB(), type);
+  HInstruction* second = LoadLocal(instruction.VRegC(), Primitive::kPrimInt);
+  current_block_->AddInstruction(new (arena_) T(type, first, second));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+template<typename T>
 void HGraphBuilder::Binop_12x(const Instruction& instruction, Primitive::Type type) {
   HInstruction* first = LoadLocal(instruction.VRegA(), type);
   HInstruction* second = LoadLocal(instruction.VRegB(), type);
@@ -313,6 +322,14 @@
 }
 
 template<typename T>
+void HGraphBuilder::Binop_12x_shift(const Instruction& instruction, Primitive::Type type) {
+  HInstruction* first = LoadLocal(instruction.VRegA(), type);
+  HInstruction* second = LoadLocal(instruction.VRegB(), Primitive::kPrimInt);
+  current_block_->AddInstruction(new (arena_) T(type, first, second));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+template<typename T>
 void HGraphBuilder::Binop_12x(const Instruction& instruction,
                               Primitive::Type type,
                               uint32_t dex_pc) {
@@ -1141,6 +1158,36 @@
       break;
     }
 
+    case Instruction::SHL_INT: {
+      Binop_23x_shift<HShl>(instruction, Primitive::kPrimInt);
+      break;
+    }
+
+    case Instruction::SHL_LONG: {
+      Binop_23x_shift<HShl>(instruction, Primitive::kPrimLong);
+      break;
+    }
+
+    case Instruction::SHR_INT: {
+      Binop_23x_shift<HShr>(instruction, Primitive::kPrimInt);
+      break;
+    }
+
+    case Instruction::SHR_LONG: {
+      Binop_23x_shift<HShr>(instruction, Primitive::kPrimLong);
+      break;
+    }
+
+    case Instruction::USHR_INT: {
+      Binop_23x_shift<HUShr>(instruction, Primitive::kPrimInt);
+      break;
+    }
+
+    case Instruction::USHR_LONG: {
+      Binop_23x_shift<HUShr>(instruction, Primitive::kPrimLong);
+      break;
+    }
+
     case Instruction::OR_INT: {
       Binop_23x<HOr>(instruction, Primitive::kPrimInt);
       break;
@@ -1240,6 +1287,36 @@
       break;
     }
 
+    case Instruction::SHL_INT_2ADDR: {
+      Binop_12x_shift<HShl>(instruction, Primitive::kPrimInt);
+      break;
+    }
+
+    case Instruction::SHL_LONG_2ADDR: {
+      Binop_12x_shift<HShl>(instruction, Primitive::kPrimLong);
+      break;
+    }
+
+    case Instruction::SHR_INT_2ADDR: {
+      Binop_12x_shift<HShr>(instruction, Primitive::kPrimInt);
+      break;
+    }
+
+    case Instruction::SHR_LONG_2ADDR: {
+      Binop_12x_shift<HShr>(instruction, Primitive::kPrimLong);
+      break;
+    }
+
+    case Instruction::USHR_INT_2ADDR: {
+      Binop_12x_shift<HUShr>(instruction, Primitive::kPrimInt);
+      break;
+    }
+
+    case Instruction::USHR_LONG_2ADDR: {
+      Binop_12x_shift<HUShr>(instruction, Primitive::kPrimLong);
+      break;
+    }
+
     case Instruction::DIV_FLOAT_2ADDR: {
       Binop_12x<HDiv>(instruction, Primitive::kPrimFloat, dex_pc);
       break;
@@ -1354,6 +1431,21 @@
       break;
     }
 
+    case Instruction::SHL_INT_LIT8: {
+      Binop_22b<HShl>(instruction, false);
+      break;
+    }
+
+    case Instruction::SHR_INT_LIT8: {
+      Binop_22b<HShr>(instruction, false);
+      break;
+    }
+
+    case Instruction::USHR_INT_LIT8: {
+      Binop_22b<HUShr>(instruction, false);
+      break;
+    }
+
     case Instruction::NEW_INSTANCE: {
       current_block_->AddInstruction(
           new (arena_) HNewInstance(dex_pc, instruction.VRegB_21c()));
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 897bcec..204005d 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -105,12 +105,18 @@
   void Binop_23x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
 
   template<typename T>
+  void Binop_23x_shift(const Instruction& instruction, Primitive::Type type);
+
+  template<typename T>
   void Binop_12x(const Instruction& instruction, Primitive::Type type);
 
   template<typename T>
   void Binop_12x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
 
   template<typename T>
+  void Binop_12x_shift(const Instruction& instruction, Primitive::Type type);
+
+  template<typename T>
   void Binop_22b(const Instruction& instruction, bool reverse);
 
   template<typename T>
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 1701ef5..a204e21 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -2087,6 +2087,124 @@
   }
 }
 
+void LocationsBuilderARM::HandleShift(HBinaryOperation* op) {
+  DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
+
+  LocationSummary::CallKind call_kind = op->GetResultType() == Primitive::kPrimLong
+      ? LocationSummary::kCall
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(op, call_kind);
+
+  switch (op->GetResultType()) {
+    case Primitive::kPrimInt: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(op->InputAt(1)));
+      locations->SetOut(Location::RequiresRegister());
+      break;
+    }
+    case Primitive::kPrimLong: {
+      InvokeRuntimeCallingConvention calling_convention;
+      locations->SetInAt(0, Location::RegisterPairLocation(
+          calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
+      locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+      // The runtime helper puts the output in R0,R2.
+      locations->SetOut(Location::RegisterPairLocation(R0, R2));
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorARM::HandleShift(HBinaryOperation* op) {
+  DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
+
+  LocationSummary* locations = op->GetLocations();
+  Location out = locations->Out();
+  Location first = locations->InAt(0);
+  Location second = locations->InAt(1);
+
+  Primitive::Type type = op->GetResultType();
+  switch (type) {
+    case Primitive::kPrimInt: {
+      Register out_reg = out.As<Register>();
+      Register first_reg = first.As<Register>();
+      // Arm doesn't mask the shift count so we need to do it ourselves.
+      if (second.IsRegister()) {
+        Register second_reg = second.As<Register>();
+        __ and_(second_reg, second_reg, ShifterOperand(kMaxIntShiftValue));
+        if (op->IsShl()) {
+          __ Lsl(out_reg, first_reg, second_reg);
+        } else if (op->IsShr()) {
+          __ Asr(out_reg, first_reg, second_reg);
+        } else {
+          __ Lsr(out_reg, first_reg, second_reg);
+        }
+      } else {
+        int32_t cst = second.GetConstant()->AsIntConstant()->GetValue();
+        uint32_t shift_value = static_cast<uint32_t>(cst & kMaxIntShiftValue);
+        if (shift_value == 0) {  // arm does not support shifting with 0 immediate.
+          __ Mov(out_reg, first_reg);
+        } else if (op->IsShl()) {
+          __ Lsl(out_reg, first_reg, shift_value);
+        } else if (op->IsShr()) {
+          __ Asr(out_reg, first_reg, shift_value);
+        } else {
+          __ Lsr(out_reg, first_reg, shift_value);
+        }
+      }
+      break;
+    }
+    case Primitive::kPrimLong: {
+      // TODO: Inline the assembly instead of calling the runtime.
+      InvokeRuntimeCallingConvention calling_convention;
+      DCHECK_EQ(calling_convention.GetRegisterAt(0), first.AsRegisterPairLow<Register>());
+      DCHECK_EQ(calling_convention.GetRegisterAt(1), first.AsRegisterPairHigh<Register>());
+      DCHECK_EQ(calling_convention.GetRegisterAt(2), second.As<Register>());
+      DCHECK_EQ(R0, out.AsRegisterPairLow<Register>());
+      DCHECK_EQ(R2, out.AsRegisterPairHigh<Register>());
+
+      int32_t entry_point_offset;
+      if (op->IsShl()) {
+        entry_point_offset = QUICK_ENTRY_POINT(pShlLong);
+      } else if (op->IsShr()) {
+        entry_point_offset = QUICK_ENTRY_POINT(pShrLong);
+      } else {
+        entry_point_offset = QUICK_ENTRY_POINT(pUshrLong);
+      }
+      __ LoadFromOffset(kLoadWord, LR, TR, entry_point_offset);
+      __ blx(LR);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected operation type " << type;
+  }
+}
+
+void LocationsBuilderARM::VisitShl(HShl* shl) {
+  HandleShift(shl);
+}
+
+void InstructionCodeGeneratorARM::VisitShl(HShl* shl) {
+  HandleShift(shl);
+}
+
+void LocationsBuilderARM::VisitShr(HShr* shr) {
+  HandleShift(shr);
+}
+
+void InstructionCodeGeneratorARM::VisitShr(HShr* shr) {
+  HandleShift(shr);
+}
+
+void LocationsBuilderARM::VisitUShr(HUShr* ushr) {
+  HandleShift(ushr);
+}
+
+void InstructionCodeGeneratorARM::VisitUShr(HUShr* ushr) {
+  HandleShift(ushr);
+}
+
 void LocationsBuilderARM::VisitNewInstance(HNewInstance* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index c00fac1..226e635 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -109,6 +109,7 @@
  private:
   void HandleInvoke(HInvoke* invoke);
   void HandleBitwiseOperation(HBinaryOperation* operation);
+  void HandleShift(HBinaryOperation* operation);
 
   CodeGeneratorARM* const codegen_;
   InvokeDexCallingConventionVisitor parameter_visitor_;
@@ -136,6 +137,7 @@
   void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
   void GenerateClassInitializationCheck(SlowPathCodeARM* slow_path, Register class_reg);
   void HandleBitwiseOperation(HBinaryOperation* operation);
+  void HandleShift(HBinaryOperation* operation);
 
   ArmAssembler* const assembler_;
   CodeGeneratorARM* const codegen_;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 82dced5..7a8b941 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -801,7 +801,10 @@
 
 #define FOR_EACH_UNIMPLEMENTED_INSTRUCTION(M)              \
   M(ParallelMove)                                          \
-  M(Rem)
+  M(Rem)                                                   \
+  M(Shl)                                                   \
+  M(Shr)                                                   \
+  M(UShr)                                                  \
 
 #define UNIMPLEMENTED_INSTRUCTION_BREAK_CODE(name) name##UnimplementedInstructionBreakCode
 
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 3c53cea..917b7dd 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -2129,6 +2129,139 @@
   }
 }
 
+void LocationsBuilderX86::HandleShift(HBinaryOperation* op) {
+  DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
+
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(op, LocationSummary::kNoCall);
+
+  switch (op->GetResultType()) {
+    case Primitive::kPrimInt: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      // The shift count needs to be in CL.
+      locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1)));
+      locations->SetOut(Location::SameAsFirstInput());
+      break;
+    }
+    case Primitive::kPrimLong: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      // The shift count needs to be in CL.
+      locations->SetInAt(1, Location::RegisterLocation(ECX));
+      locations->SetOut(Location::SameAsFirstInput());
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected op type " << op->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) {
+  DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
+
+  LocationSummary* locations = op->GetLocations();
+  Location first = locations->InAt(0);
+  Location second = locations->InAt(1);
+  DCHECK(first.Equals(locations->Out()));
+
+  switch (op->GetResultType()) {
+    case Primitive::kPrimInt: {
+      Register first_reg = first.As<Register>();
+      if (second.IsRegister()) {
+        Register second_reg = second.As<Register>();
+        DCHECK_EQ(ECX, second_reg);
+        if (op->IsShl()) {
+          __ shll(first_reg, second_reg);
+        } else if (op->IsShr()) {
+          __ sarl(first_reg, second_reg);
+        } else {
+          __ shrl(first_reg, second_reg);
+        }
+      } else {
+        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
+        if (op->IsShl()) {
+          __ shll(first_reg, imm);
+        } else if (op->IsShr()) {
+          __ sarl(first_reg, imm);
+        } else {
+          __ shrl(first_reg, imm);
+        }
+      }
+      break;
+    }
+    case Primitive::kPrimLong: {
+      Register second_reg = second.As<Register>();
+      DCHECK_EQ(ECX, second_reg);
+      if (op->IsShl()) {
+        GenerateShlLong(first, second_reg);
+      } else if (op->IsShr()) {
+        GenerateShrLong(first, second_reg);
+      } else {
+        GenerateUShrLong(first, second_reg);
+      }
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected op type " << op->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register shifter) {
+  Label done;
+  __ shld(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>(), shifter);
+  __ shll(loc.AsRegisterPairLow<Register>(), shifter);
+  __ testl(shifter, Immediate(32));
+  __ j(kEqual, &done);
+  __ movl(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>());
+  __ movl(loc.AsRegisterPairLow<Register>(), Immediate(0));
+  __ Bind(&done);
+}
+
+void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register shifter) {
+  Label done;
+  __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
+  __ sarl(loc.AsRegisterPairHigh<Register>(), shifter);
+  __ testl(shifter, Immediate(32));
+  __ j(kEqual, &done);
+  __ movl(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>());
+  __ sarl(loc.AsRegisterPairHigh<Register>(), Immediate(31));
+  __ Bind(&done);
+}
+
+void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, Register shifter) {
+  Label done;
+  __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
+  __ shrl(loc.AsRegisterPairHigh<Register>(), shifter);
+  __ testl(shifter, Immediate(32));
+  __ j(kEqual, &done);
+  __ movl(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>());
+  __ movl(loc.AsRegisterPairHigh<Register>(), Immediate(0));
+  __ Bind(&done);
+}
+
+void LocationsBuilderX86::VisitShl(HShl* shl) {
+  HandleShift(shl);
+}
+
+void InstructionCodeGeneratorX86::VisitShl(HShl* shl) {
+  HandleShift(shl);
+}
+
+void LocationsBuilderX86::VisitShr(HShr* shr) {
+  HandleShift(shr);
+}
+
+void InstructionCodeGeneratorX86::VisitShr(HShr* shr) {
+  HandleShift(shr);
+}
+
+void LocationsBuilderX86::VisitUShr(HUShr* ushr) {
+  HandleShift(ushr);
+}
+
+void InstructionCodeGeneratorX86::VisitUShr(HUShr* ushr) {
+  HandleShift(ushr);
+}
+
 void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 0aff6cc..aed06c0 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -104,6 +104,7 @@
  private:
   void HandleBitwiseOperation(HBinaryOperation* instruction);
   void HandleInvoke(HInvoke* invoke);
+  void HandleShift(HBinaryOperation* instruction);
 
   CodeGeneratorX86* const codegen_;
   InvokeDexCallingConventionVisitor parameter_visitor_;
@@ -132,6 +133,10 @@
   void GenerateClassInitializationCheck(SlowPathCodeX86* slow_path, Register class_reg);
   void HandleBitwiseOperation(HBinaryOperation* instruction);
   void GenerateDivRemIntegral(HBinaryOperation* instruction);
+  void HandleShift(HBinaryOperation* instruction);
+  void GenerateShlLong(const Location& loc, Register shifter);
+  void GenerateShrLong(const Location& loc, Register shifter);
+  void GenerateUShrLong(const Location& loc, Register shifter);
 
   X86Assembler* const assembler_;
   CodeGeneratorX86* const codegen_;
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 97f5e5c..69f031a 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -2026,6 +2026,107 @@
   }
 }
 
+void LocationsBuilderX86_64::HandleShift(HBinaryOperation* op) {
+  DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
+
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(op, LocationSummary::kNoCall);
+
+  switch (op->GetResultType()) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      // The shift count needs to be in CL.
+      locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, op->InputAt(1)));
+      locations->SetOut(Location::SameAsFirstInput());
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorX86_64::HandleShift(HBinaryOperation* op) {
+  DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
+
+  LocationSummary* locations = op->GetLocations();
+  CpuRegister first_reg = locations->InAt(0).As<CpuRegister>();
+  Location second = locations->InAt(1);
+
+  switch (op->GetResultType()) {
+    case Primitive::kPrimInt: {
+      if (second.IsRegister()) {
+        CpuRegister second_reg = second.As<CpuRegister>();
+        if (op->IsShl()) {
+          __ shll(first_reg, second_reg);
+        } else if (op->IsShr()) {
+          __ sarl(first_reg, second_reg);
+        } else {
+          __ shrl(first_reg, second_reg);
+        }
+      } else {
+        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
+        if (op->IsShl()) {
+          __ shll(first_reg, imm);
+        } else if (op->IsShr()) {
+          __ sarl(first_reg, imm);
+        } else {
+          __ shrl(first_reg, imm);
+        }
+      }
+      break;
+    }
+    case Primitive::kPrimLong: {
+      if (second.IsRegister()) {
+        CpuRegister second_reg = second.As<CpuRegister>();
+        if (op->IsShl()) {
+          __ shlq(first_reg, second_reg);
+        } else if (op->IsShr()) {
+          __ sarq(first_reg, second_reg);
+        } else {
+          __ shrq(first_reg, second_reg);
+        }
+      } else {
+        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
+        if (op->IsShl()) {
+          __ shlq(first_reg, imm);
+        } else if (op->IsShr()) {
+          __ sarq(first_reg, imm);
+        } else {
+          __ shrq(first_reg, imm);
+        }
+      }
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
+  }
+}
+
+void LocationsBuilderX86_64::VisitShl(HShl* shl) {
+  HandleShift(shl);
+}
+
+void InstructionCodeGeneratorX86_64::VisitShl(HShl* shl) {
+  HandleShift(shl);
+}
+
+void LocationsBuilderX86_64::VisitShr(HShr* shr) {
+  HandleShift(shr);
+}
+
+void InstructionCodeGeneratorX86_64::VisitShr(HShr* shr) {
+  HandleShift(shr);
+}
+
+void LocationsBuilderX86_64::VisitUShr(HUShr* ushr) {
+  HandleShift(ushr);
+}
+
+void InstructionCodeGeneratorX86_64::VisitUShr(HUShr* ushr) {
+  HandleShift(ushr);
+}
+
 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 29c679d..794b81f 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -108,6 +108,7 @@
  private:
   void HandleInvoke(HInvoke* invoke);
   void HandleBitwiseOperation(HBinaryOperation* operation);
+  void HandleShift(HBinaryOperation* operation);
 
   CodeGeneratorX86_64* const codegen_;
   InvokeDexCallingConventionVisitor parameter_visitor_;
@@ -136,6 +137,7 @@
   void GenerateClassInitializationCheck(SlowPathCodeX86_64* slow_path, CpuRegister class_reg);
   void HandleBitwiseOperation(HBinaryOperation* operation);
   void GenerateDivRemIntegral(HBinaryOperation* instruction);
+  void HandleShift(HBinaryOperation* operation);
 
   X86_64Assembler* const assembler_;
   CodeGeneratorX86_64* const codegen_;
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 7d52d7d..b47549a 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -42,6 +42,9 @@
 static const int kDefaultNumberOfDominatedBlocks = 1;
 static const int kDefaultNumberOfBackEdges = 1;
 
+static constexpr uint32_t kMaxIntShiftValue = 0x1f;
+static constexpr uint64_t kMaxLongShiftValue = 0x3f;
+
 enum IfCondition {
   kCondEQ,
   kCondNE,
@@ -521,9 +524,11 @@
   M(ParallelMove, Instruction)                                          \
   M(ParameterValue, Instruction)                                        \
   M(Phi, Instruction)                                                   \
-  M(Rem, BinaryOperation)                                             \
+  M(Rem, BinaryOperation)                                               \
   M(Return, Instruction)                                                \
   M(ReturnVoid, Instruction)                                            \
+  M(Shl, BinaryOperation)                                               \
+  M(Shr, BinaryOperation)                                               \
   M(StaticFieldGet, Instruction)                                        \
   M(StaticFieldSet, Instruction)                                        \
   M(StoreLocal, Instruction)                                            \
@@ -532,6 +537,7 @@
   M(Temporary, Instruction)                                             \
   M(Throw, Instruction)                                                 \
   M(TypeConversion, Instruction)                                        \
+  M(UShr, BinaryOperation)                                              \
   M(Xor, BinaryOperation)                                               \
 
 #define FOR_EACH_INSTRUCTION(M)                                         \
@@ -1831,6 +1837,57 @@
   DISALLOW_COPY_AND_ASSIGN(HDivZeroCheck);
 };
 
+class HShl : public HBinaryOperation {
+ public:
+  HShl(Primitive::Type result_type, HInstruction* left, HInstruction* right)
+      : HBinaryOperation(result_type, left, right) {}
+
+  int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE { return x << (y & kMaxIntShiftValue); }
+  int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE { return x << (y & kMaxLongShiftValue); }
+
+  DECLARE_INSTRUCTION(Shl);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HShl);
+};
+
+class HShr : public HBinaryOperation {
+ public:
+  HShr(Primitive::Type result_type, HInstruction* left, HInstruction* right)
+      : HBinaryOperation(result_type, left, right) {}
+
+  int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE { return x >> (y & kMaxIntShiftValue); }
+  int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE { return x >> (y & kMaxLongShiftValue); }
+
+  DECLARE_INSTRUCTION(Shr);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HShr);
+};
+
+class HUShr : public HBinaryOperation {
+ public:
+  HUShr(Primitive::Type result_type, HInstruction* left, HInstruction* right)
+      : HBinaryOperation(result_type, left, right) {}
+
+  int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE {
+    uint32_t ux = static_cast<uint32_t>(x);
+    uint32_t uy = static_cast<uint32_t>(y) & kMaxIntShiftValue;
+    return static_cast<int32_t>(ux >> uy);
+  }
+
+  int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE {
+    uint64_t ux = static_cast<uint64_t>(x);
+    uint64_t uy = static_cast<uint64_t>(y) & kMaxLongShiftValue;
+    return static_cast<int64_t>(ux >> uy);
+  }
+
+  DECLARE_INSTRUCTION(UShr);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HUShr);
+};
+
 class HAnd : public HBinaryOperation {
  public:
   HAnd(Primitive::Type result_type, HInstruction* left, HInstruction* right)
diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc
index a1594b0..a541763 100644
--- a/compiler/utils/arm/assembler_arm32.cc
+++ b/compiler/utils/arm/assembler_arm32.cc
@@ -1079,7 +1079,7 @@
 
 void Arm32Assembler::Lsl(Register rd, Register rm, uint32_t shift_imm,
                          bool setcc, Condition cond) {
-  CHECK_NE(shift_imm, 0u);  // Do not use Lsl if no shift is wanted.
+  CHECK_LE(shift_imm, 31u);
   if (setcc) {
     movs(rd, ShifterOperand(rm, LSL, shift_imm), cond);
   } else {
@@ -1090,7 +1090,7 @@
 
 void Arm32Assembler::Lsr(Register rd, Register rm, uint32_t shift_imm,
                          bool setcc, Condition cond) {
-  CHECK_NE(shift_imm, 0u);  // Do not use Lsr if no shift is wanted.
+  CHECK(1u <= shift_imm && shift_imm <= 32u);
   if (shift_imm == 32) shift_imm = 0;  // Comply to UAL syntax.
   if (setcc) {
     movs(rd, ShifterOperand(rm, LSR, shift_imm), cond);
@@ -1102,7 +1102,7 @@
 
 void Arm32Assembler::Asr(Register rd, Register rm, uint32_t shift_imm,
                          bool setcc, Condition cond) {
-  CHECK_NE(shift_imm, 0u);  // Do not use Asr if no shift is wanted.
+  CHECK(1u <= shift_imm && shift_imm <= 32u);
   if (shift_imm == 32) shift_imm = 0;  // Comply to UAL syntax.
   if (setcc) {
     movs(rd, ShifterOperand(rm, ASR, shift_imm), cond);
@@ -1114,7 +1114,7 @@
 
 void Arm32Assembler::Ror(Register rd, Register rm, uint32_t shift_imm,
                          bool setcc, Condition cond) {
-  CHECK_NE(shift_imm, 0u);  // Use Rrx instruction.
+  CHECK(1u <= shift_imm && shift_imm <= 31u);
   if (setcc) {
     movs(rd, ShifterOperand(rm, ROR, shift_imm), cond);
   } else {
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index a349209..a377cb2 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -2210,7 +2210,7 @@
 
 void Thumb2Assembler::Lsl(Register rd, Register rm, uint32_t shift_imm,
                           bool setcc, Condition cond) {
-  CHECK_NE(shift_imm, 0u);  // Do not use Lsl if no shift is wanted.
+  CHECK_LE(shift_imm, 31u);
   CheckCondition(cond);
   EmitShift(rd, rm, LSL, shift_imm, setcc);
 }
@@ -2218,7 +2218,7 @@
 
 void Thumb2Assembler::Lsr(Register rd, Register rm, uint32_t shift_imm,
                           bool setcc, Condition cond) {
-  CHECK_NE(shift_imm, 0u);  // Do not use Lsr if no shift is wanted.
+  CHECK(1u <= shift_imm && shift_imm <= 32u);
   if (shift_imm == 32) shift_imm = 0;  // Comply to UAL syntax.
   CheckCondition(cond);
   EmitShift(rd, rm, LSR, shift_imm, setcc);
@@ -2227,7 +2227,7 @@
 
 void Thumb2Assembler::Asr(Register rd, Register rm, uint32_t shift_imm,
                           bool setcc, Condition cond) {
-  CHECK_NE(shift_imm, 0u);  // Do not use Asr if no shift is wanted.
+  CHECK(1u <= shift_imm && shift_imm <= 32u);
   if (shift_imm == 32) shift_imm = 0;  // Comply to UAL syntax.
   CheckCondition(cond);
   EmitShift(rd, rm, ASR, shift_imm, setcc);
@@ -2236,7 +2236,7 @@
 
 void Thumb2Assembler::Ror(Register rd, Register rm, uint32_t shift_imm,
                           bool setcc, Condition cond) {
-  CHECK_NE(shift_imm, 0u);  // Use Rrx instruction.
+  CHECK(1u <= shift_imm && shift_imm <= 31u);
   CheckCondition(cond);
   EmitShift(rd, rm, ROR, shift_imm, setcc);
 }
diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h
index 9d3fa01..54c931d 100644
--- a/compiler/utils/assembler_test.h
+++ b/compiler/utils/assembler_test.h
@@ -223,6 +223,10 @@
     UNREACHABLE();
   }
 
+  std::string GetRegisterName(const Reg& reg) {
+    return GetRegName<RegisterView::kUsePrimaryName>(reg);
+  }
+
  protected:
   explicit AssemblerTest() {}
 
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index afa4a3b..a297ea3 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1126,7 +1126,8 @@
 }
 
 
-void X86Assembler::shld(Register dst, Register src) {
+void X86Assembler::shld(Register dst, Register src, Register shifter) {
+  DCHECK_EQ(ECX, shifter);
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x0F);
   EmitUint8(0xA5);
@@ -1134,6 +1135,15 @@
 }
 
 
+void X86Assembler::shrd(Register dst, Register src, Register shifter) {
+  DCHECK_EQ(ECX, shifter);
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0xAD);
+  EmitRegisterOperand(src, dst);
+}
+
+
 void X86Assembler::negl(Register reg) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xF7);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 8aed934..6ea66a5 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -405,7 +405,8 @@
   void shrl(Register operand, Register shifter);
   void sarl(Register reg, const Immediate& imm);
   void sarl(Register operand, Register shifter);
-  void shld(Register dst, Register src);
+  void shld(Register dst, Register src, Register shifter);
+  void shrd(Register dst, Register src, Register shifter);
 
   void negl(Register reg);
   void notl(Register reg);
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 8c428f4..dff3849 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1451,8 +1451,18 @@
 }
 
 
+void X86_64Assembler::shlq(CpuRegister reg, const Immediate& imm) {
+  EmitGenericShift(true, 4, reg, imm);
+}
+
+
 void X86_64Assembler::shll(CpuRegister operand, CpuRegister shifter) {
-  EmitGenericShift(4, operand, shifter);
+  EmitGenericShift(false, 4, operand, shifter);
+}
+
+
+void X86_64Assembler::shlq(CpuRegister operand, CpuRegister shifter) {
+  EmitGenericShift(true, 4, operand, shifter);
 }
 
 
@@ -1467,7 +1477,12 @@
 
 
 void X86_64Assembler::shrl(CpuRegister operand, CpuRegister shifter) {
-  EmitGenericShift(5, operand, shifter);
+  EmitGenericShift(false, 5, operand, shifter);
+}
+
+
+void X86_64Assembler::shrq(CpuRegister operand, CpuRegister shifter) {
+  EmitGenericShift(true, 5, operand, shifter);
 }
 
 
@@ -1477,7 +1492,17 @@
 
 
 void X86_64Assembler::sarl(CpuRegister operand, CpuRegister shifter) {
-  EmitGenericShift(7, operand, shifter);
+  EmitGenericShift(false, 7, operand, shifter);
+}
+
+
+void X86_64Assembler::sarq(CpuRegister reg, const Immediate& imm) {
+  EmitGenericShift(true, 7, reg, imm);
+}
+
+
+void X86_64Assembler::sarq(CpuRegister operand, CpuRegister shifter) {
+  EmitGenericShift(true, 7, operand, shifter);
 }
 
 
@@ -1826,12 +1851,17 @@
 }
 
 
-void X86_64Assembler::EmitGenericShift(int reg_or_opcode,
+void X86_64Assembler::EmitGenericShift(bool wide,
+                                       int reg_or_opcode,
                                        CpuRegister operand,
                                        CpuRegister shifter) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   CHECK_EQ(shifter.AsRegister(), RCX);
-  EmitOptionalRex32(operand);
+  if (wide) {
+    EmitRex64(operand);
+  } else {
+    EmitOptionalRex32(operand);
+  }
   EmitUint8(0xD3);
   EmitOperand(reg_or_opcode, Operand(operand));
 }
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 4dd70e2..ab1bc9e 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -460,7 +460,12 @@
   void sarl(CpuRegister reg, const Immediate& imm);
   void sarl(CpuRegister operand, CpuRegister shifter);
 
+  void shlq(CpuRegister reg, const Immediate& imm);
+  void shlq(CpuRegister operand, CpuRegister shifter);
   void shrq(CpuRegister reg, const Immediate& imm);
+  void shrq(CpuRegister operand, CpuRegister shifter);
+  void sarq(CpuRegister reg, const Immediate& imm);
+  void sarq(CpuRegister operand, CpuRegister shifter);
 
   void negl(CpuRegister reg);
   void negq(CpuRegister reg);
@@ -657,7 +662,7 @@
   void EmitNearLabelLink(Label* label);
 
   void EmitGenericShift(bool wide, int rm, CpuRegister reg, const Immediate& imm);
-  void EmitGenericShift(int rm, CpuRegister operand, CpuRegister shifter);
+  void EmitGenericShift(bool wide, int rm, CpuRegister operand, CpuRegister shifter);
 
   // If any input is not false, output the necessary rex prefix.
   void EmitOptionalRex(bool force, bool w, bool r, bool x, bool b);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index af389e6..14a98b9 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -296,7 +296,7 @@
   DriverStr(Repeatri(&x86_64::X86_64Assembler::subl, 4U, "sub ${imm}, %{reg}"), "subli");
 }
 
-// Shll only allows CL as the shift register.
+// Shll only allows CL as the shift count.
 std::string shll_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
   std::ostringstream str;
 
@@ -319,7 +319,31 @@
   DriverStr(Repeatri(&x86_64::X86_64Assembler::shll, 1U, "shll ${imm}, %{reg}"), "shlli");
 }
 
-// Shrl only allows CL as the shift register.
+// Shlq only allows CL as the shift count.
+std::string shlq_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
+  std::ostringstream str;
+
+  std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
+
+  x86_64::CpuRegister shifter(x86_64::RCX);
+  for (auto reg : registers) {
+    assembler->shlq(*reg, shifter);
+    str << "shlq %cl, %" << assembler_test->GetRegisterName(*reg) << "\n";
+  }
+  printf("%s\n", str.str().c_str());
+
+  return str.str();
+}
+
+TEST_F(AssemblerX86_64Test, ShlqReg) {
+  DriverFn(&shlq_fn, "shlq");
+}
+
+TEST_F(AssemblerX86_64Test, ShlqImm) {
+  DriverStr(RepeatRI(&x86_64::X86_64Assembler::shlq, 1U, "shlq ${imm}, %{reg}"), "shlqi");
+}
+
+// Shrl only allows CL as the shift count.
 std::string shrl_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
   std::ostringstream str;
 
@@ -342,7 +366,30 @@
   DriverStr(Repeatri(&x86_64::X86_64Assembler::shrl, 1U, "shrl ${imm}, %{reg}"), "shrli");
 }
 
-// Sarl only allows CL as the shift register.
+// Shrq only allows CL as the shift count.
+std::string shrq_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
+  std::ostringstream str;
+
+  std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
+
+  x86_64::CpuRegister shifter(x86_64::RCX);
+  for (auto reg : registers) {
+    assembler->shrq(*reg, shifter);
+    str << "shrq %cl, %" << assembler_test->GetRegisterName(*reg) << "\n";
+  }
+
+  return str.str();
+}
+
+TEST_F(AssemblerX86_64Test, ShrqReg) {
+  DriverFn(&shrq_fn, "shrq");
+}
+
+TEST_F(AssemblerX86_64Test, ShrqImm) {
+  DriverStr(RepeatRI(&x86_64::X86_64Assembler::shrq, 1U, "shrq ${imm}, %{reg}"), "shrqi");
+}
+
+// Sarl only allows CL as the shift count.
 std::string sarl_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
   std::ostringstream str;
 
@@ -365,6 +412,29 @@
   DriverStr(Repeatri(&x86_64::X86_64Assembler::sarl, 1U, "sarl ${imm}, %{reg}"), "sarli");
 }
 
+// Sarq only allows CL as the shift count.
+std::string sarq_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
+  std::ostringstream str;
+
+  std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
+
+  x86_64::CpuRegister shifter(x86_64::RCX);
+  for (auto reg : registers) {
+    assembler->sarq(*reg, shifter);
+    str << "sarq %cl, %" << assembler_test->GetRegisterName(*reg) << "\n";
+  }
+
+  return str.str();
+}
+
+TEST_F(AssemblerX86_64Test, SarqReg) {
+  DriverFn(&sarq_fn, "sarq");
+}
+
+TEST_F(AssemblerX86_64Test, SarqImm) {
+  DriverStr(RepeatRI(&x86_64::X86_64Assembler::sarq, 1U, "sarq ${imm}, %{reg}"), "sarqi");
+}
+
 TEST_F(AssemblerX86_64Test, CmpqRegs) {
   DriverStr(RepeatRR(&x86_64::X86_64Assembler::cmpq, "cmpq %{reg2}, %{reg1}"), "cmpq");
 }
diff --git a/test/431-optimizing-arith-shifts/expected.txt b/test/431-optimizing-arith-shifts/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/431-optimizing-arith-shifts/expected.txt
diff --git a/test/431-optimizing-arith-shifts/info.txt b/test/431-optimizing-arith-shifts/info.txt
new file mode 100644
index 0000000..14ff264
--- /dev/null
+++ b/test/431-optimizing-arith-shifts/info.txt
@@ -0,0 +1 @@
+Tests for shift operations.
diff --git a/test/431-optimizing-arith-shifts/src/Main.java b/test/431-optimizing-arith-shifts/src/Main.java
new file mode 100644
index 0000000..c29eeeb
--- /dev/null
+++ b/test/431-optimizing-arith-shifts/src/Main.java
@@ -0,0 +1,305 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void expectEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void main(String[] args) {
+    shlInt();
+    shlLong();
+    shrInt();
+    shrLong();
+    ushrInt();
+    ushrLong();
+  }
+
+  private static void shlInt() {
+    expectEquals(48, $opt$ShlConst2(12));
+    expectEquals(12, $opt$ShlConst0(12));
+    expectEquals(-48, $opt$Shl(-12, 2));
+    expectEquals(1024, $opt$Shl(32, 5));
+
+    expectEquals(7, $opt$Shl(7, 0));
+    expectEquals(14, $opt$Shl(7, 1));
+    expectEquals(0, $opt$Shl(0, 30));
+
+    expectEquals(1073741824L, $opt$Shl(1, 30));
+    expectEquals(Integer.MIN_VALUE, $opt$Shl(1, 31));  // overflow
+    expectEquals(Integer.MIN_VALUE, $opt$Shl(1073741824, 1));  // overflow
+    expectEquals(1073741824, $opt$Shl(268435456, 2));
+
+   // othe nly 5 lower bits should be used for shifting (& 0x1f).
+    expectEquals(7, $opt$Shl(7, 32));  // 32 & 0x1f = 0
+    expectEquals(14, $opt$Shl(7, 33));  // 33 & 0x1f = 1
+    expectEquals(32, $opt$Shl(1, 101));  // 101 & 0x1f = 5
+
+    expectEquals(Integer.MIN_VALUE, $opt$Shl(1, -1));  // -1 & 0x1f = 31
+    expectEquals(14, $opt$Shl(7, -31));  // -31 & 0x1f = 1
+    expectEquals(7, $opt$Shl(7, -32));  // -32 & 0x1f = 0
+    expectEquals(-536870912, $opt$Shl(7, -3));  // -3 & 0x1f = 29
+
+    expectEquals(Integer.MIN_VALUE, $opt$Shl(7, Integer.MAX_VALUE));
+    expectEquals(7, $opt$Shl(7, Integer.MIN_VALUE));
+  }
+
+  private static void shlLong() {
+    expectEquals(48L, $opt$ShlConst2(12L));
+    expectEquals(12L, $opt$ShlConst0(12L));
+    expectEquals(-48L, $opt$Shl(-12L, 2L));
+    expectEquals(1024L, $opt$Shl(32L, 5L));
+
+    expectEquals(7L, $opt$Shl(7L, 0L));
+    expectEquals(14L, $opt$Shl(7L, 1L));
+    expectEquals(0L, $opt$Shl(0L, 30L));
+
+    expectEquals(1073741824L, $opt$Shl(1L, 30L));
+    expectEquals(2147483648L, $opt$Shl(1L, 31L));
+    expectEquals(2147483648L, $opt$Shl(1073741824L, 1L));
+
+    // Long shifts can use up to 6 lower bits.
+    expectEquals(4294967296L, $opt$Shl(1L, 32L));
+    expectEquals(60129542144L, $opt$Shl(7L, 33L));
+    expectEquals(Long.MIN_VALUE, $opt$Shl(1L, 63L));  // overflow
+
+    // Only the 6 lower bits should be used for shifting (& 0x3f).
+    expectEquals(7L, $opt$Shl(7L, 64L));  // 64 & 0x3f = 0
+    expectEquals(14L, $opt$Shl(7L, 65L));  // 65 & 0x3f = 1
+    expectEquals(137438953472L, $opt$Shl(1L, 101L));  // 101 & 0x3f = 37
+
+    expectEquals(Long.MIN_VALUE, $opt$Shl(1L, -1L));  // -1 & 0x3f = 63
+    expectEquals(14L, $opt$Shl(7L, -63L));  // -63 & 0x3f = 1
+    expectEquals(7L, $opt$Shl(7L, -64L));  // -64 & 0x3f = 0
+    expectEquals(2305843009213693952L, $opt$Shl(1L, -3L));  // -3 & 0x3f = 61
+
+    expectEquals(Long.MIN_VALUE, $opt$Shl(7L, Long.MAX_VALUE));
+    expectEquals(7L, $opt$Shl(7L, Long.MIN_VALUE));
+  }
+
+  private static void shrInt() {
+    expectEquals(3, $opt$ShrConst2(12));
+    expectEquals(12, $opt$ShrConst0(12));
+    expectEquals(-3, $opt$Shr(-12, 2));
+    expectEquals(1, $opt$Shr(32, 5));
+
+    expectEquals(7, $opt$Shr(7, 0));
+    expectEquals(3, $opt$Shr(7, 1));
+    expectEquals(0, $opt$Shr(0, 30));
+    expectEquals(0, $opt$Shr(1, 30));
+    expectEquals(-1, $opt$Shr(-1, 30));
+
+    expectEquals(0, $opt$Shr(Integer.MAX_VALUE, 31));
+    expectEquals(-1, $opt$Shr(Integer.MIN_VALUE, 31));
+
+    // Only the 5 lower bits should be used for shifting (& 0x1f).
+    expectEquals(7, $opt$Shr(7, 32));  // 32 & 0x1f = 0
+    expectEquals(3, $opt$Shr(7, 33));  // 33 & 0x1f = 1
+
+    expectEquals(0, $opt$Shr(1, -1));  // -1 & 0x1f = 31
+    expectEquals(3, $opt$Shr(7, -31));  // -31 & 0x1f = 1
+    expectEquals(7, $opt$Shr(7, -32));  // -32 & 0x1f = 0
+    expectEquals(-4, $opt$Shr(Integer.MIN_VALUE, -3));  // -3 & 0x1f = 29
+
+    expectEquals(0, $opt$Shr(7, Integer.MAX_VALUE));
+    expectEquals(7, $opt$Shr(7, Integer.MIN_VALUE));
+  }
+
+  private static void shrLong() {
+    expectEquals(3L, $opt$ShrConst2(12L));
+    expectEquals(12L, $opt$ShrConst0(12L));
+    expectEquals(-3L, $opt$Shr(-12L, 2L));
+    expectEquals(1, $opt$Shr(32, 5));
+
+    expectEquals(7L, $opt$Shr(7L, 0L));
+    expectEquals(3L, $opt$Shr(7L, 1L));
+    expectEquals(0L, $opt$Shr(0L, 30L));
+    expectEquals(0L, $opt$Shr(1L, 30L));
+    expectEquals(-1L, $opt$Shr(-1L, 30L));
+
+
+    expectEquals(1L, $opt$Shr(1073741824L, 30L));
+    expectEquals(1L, $opt$Shr(2147483648L, 31L));
+    expectEquals(1073741824L, $opt$Shr(2147483648L, 1L));
+
+    // Long shifts can use up to 6 lower bits.
+    expectEquals(1L, $opt$Shr(4294967296L, 32L));
+    expectEquals(7L, $opt$Shr(60129542144L, 33L));
+    expectEquals(0L, $opt$Shr(Long.MAX_VALUE, 63L));
+    expectEquals(-1L, $opt$Shr(Long.MIN_VALUE, 63L));
+
+    // Only the 6 lower bits should be used for shifting (& 0x3f).
+    expectEquals(7L, $opt$Shr(7L, 64L));  // 64 & 0x3f = 0
+    expectEquals(3L, $opt$Shr(7L, 65L));  // 65 & 0x3f = 1
+
+    expectEquals(-1L, $opt$Shr(Long.MIN_VALUE, -1L));  // -1 & 0x3f = 63
+    expectEquals(3L, $opt$Shr(7L, -63L));  // -63 & 0x3f = 1
+    expectEquals(7L, $opt$Shr(7L, -64L));  // -64 & 0x3f = 0
+    expectEquals(1L, $opt$Shr(2305843009213693952L, -3L));  // -3 & 0x3f = 61
+    expectEquals(-4L, $opt$Shr(Integer.MIN_VALUE, -3));  // -3 & 0x1f = 29
+
+    expectEquals(0L, $opt$Shr(7L, Long.MAX_VALUE));
+    expectEquals(7L, $opt$Shr(7L, Long.MIN_VALUE));
+  }
+
+  private static void ushrInt() {
+    expectEquals(3, $opt$UShrConst2(12));
+    expectEquals(12, $opt$UShrConst0(12));
+    expectEquals(1073741821, $opt$UShr(-12, 2));
+    expectEquals(1, $opt$UShr(32, 5));
+
+    expectEquals(7, $opt$UShr(7, 0));
+    expectEquals(3, $opt$UShr(7, 1));
+    expectEquals(0, $opt$UShr(0, 30));
+    expectEquals(0, $opt$UShr(1, 30));
+    expectEquals(3, $opt$UShr(-1, 30));
+
+    expectEquals(0, $opt$UShr(Integer.MAX_VALUE, 31));
+    expectEquals(1, $opt$UShr(Integer.MIN_VALUE, 31));
+
+    // Only the 5 lower bits should be used for shifting (& 0x1f).
+    expectEquals(7, $opt$UShr(7, 32));  // 32 & 0x1f = 0
+    expectEquals(3, $opt$UShr(7, 33));  // 33 & 0x1f = 1
+
+    expectEquals(0, $opt$UShr(1, -1));  // -1 & 0x1f = 31
+    expectEquals(3, $opt$UShr(7, -31));  // -31 & 0x1f = 1
+    expectEquals(7, $opt$UShr(7, -32));  // -32 & 0x1f = 0
+    expectEquals(4, $opt$UShr(Integer.MIN_VALUE, -3));  // -3 & 0x1f = 29
+
+    expectEquals(0, $opt$UShr(7, Integer.MAX_VALUE));
+    expectEquals(7, $opt$UShr(7, Integer.MIN_VALUE));
+  }
+
+  private static void ushrLong() {
+    expectEquals(3L, $opt$UShrConst2(12L));
+    expectEquals(12L, $opt$UShrConst0(12L));
+    expectEquals(4611686018427387901L, $opt$UShr(-12L, 2L));
+    expectEquals(1, $opt$UShr(32, 5));
+
+    expectEquals(7L, $opt$UShr(7L, 0L));
+    expectEquals(3L, $opt$UShr(7L, 1L));
+    expectEquals(0L, $opt$UShr(0L, 30L));
+    expectEquals(0L, $opt$UShr(1L, 30L));
+    expectEquals(17179869183L, $opt$UShr(-1L, 30L));
+
+
+    expectEquals(1L, $opt$UShr(1073741824L, 30L));
+    expectEquals(1L, $opt$UShr(2147483648L, 31L));
+    expectEquals(1073741824L, $opt$UShr(2147483648L, 1L));
+
+   Long shifts can use use up to 6 lower bits.
+    expectEquals(1L, $opt$UShr(4294967296L, 32L));
+    expectEquals(7L, $opt$UShr(60129542144L, 33L));
+    expectEquals(0L, $opt$UShr(Long.MAX_VALUE, 63L));
+    expectEquals(1L, $opt$UShr(Long.MIN_VALUE, 63L));
+
+    // Only the 6 lower bits should be used for shifting (& 0x3f).
+    expectEquals(7L, $opt$UShr(7L, 64L));  // 64 & 0x3f = 0
+    expectEquals(3L, $opt$UShr(7L, 65L));  // 65 & 0x3f = 1
+
+    expectEquals(1L, $opt$UShr(Long.MIN_VALUE, -1L));  // -1 & 0x3f = 63
+    expectEquals(3L, $opt$UShr(7L, -63L));  // -63 & 0x3f = 1
+    expectEquals(7L, $opt$UShr(7L, -64L));  // -64 & 0x3f = 0
+    expectEquals(1L, $opt$UShr(2305843009213693952L, -3L));  // -3 & 0x3f = 61
+    expectEquals(4L, $opt$UShr(Long.MIN_VALUE, -3L));  // -3 & 0x3f = 61
+
+    expectEquals(0L, $opt$UShr(7L, Long.MAX_VALUE));
+    expectEquals(7L, $opt$UShr(7L, Long.MIN_VALUE));
+  }
+
+  static int $opt$Shl(int a, int b) {
+    return a << b;
+  }
+
+  static long $opt$Shl(long a, long b) {
+    return a << b;
+  }
+
+  static int $opt$Shr(int a, int b) {
+    return a >> b;
+  }
+
+  static long $opt$Shr(long a, long b) {
+    return a >> b;
+  }
+
+  static int $opt$UShr(int a, int b) {
+    return a >>> b;
+  }
+
+  static long $opt$UShr(long a, long b) {
+    return a >>> b;
+  }
+
+  static int $opt$ShlConst2(int a) {
+    return a << 2;
+  }
+
+  static long $opt$ShlConst2(long a) {
+    return a << 2L;
+  }
+
+  static int $opt$ShrConst2(int a) {
+    return a >> 2;
+  }
+
+  static long $opt$ShrConst2(long a) {
+    return a >> 2L;
+  }
+
+  static int $opt$UShrConst2(int a) {
+    return a >>> 2;
+  }
+
+  static long $opt$UShrConst2(long a) {
+    return a >>> 2L;
+  }
+
+    static int $opt$ShlConst0(int a) {
+    return a << 0;
+  }
+
+  static long $opt$ShlConst0(long a) {
+    return a << 0L;
+  }
+
+  static int $opt$ShrConst0(int a) {
+    return a >> 0;
+  }
+
+  static long $opt$ShrConst0(long a) {
+    return a >> 0L;
+  }
+
+  static int $opt$UShrConst0(int a) {
+    return a >>> 0;
+  }
+
+  static long $opt$UShrConst0(long a) {
+    return a >>> 0L;
+  }
+
+}
+