[optimizing compiler] Add DIV_LONG

- for backends: arm, x86, x86_64
- added cqo, idivq, testq assembly for x64_64
- small cleanups

Change-Id: I762ef37880749038ed25d6014370be9a61795200
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index a3ca061..4e2f1cd 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -295,6 +295,16 @@
 }
 
 template<typename T>
+void HGraphBuilder::Binop_23x(const Instruction& instruction,
+                              Primitive::Type type,
+                              uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegB(), type);
+  HInstruction* second = LoadLocal(instruction.VRegC(), type);
+  current_block_->AddInstruction(new (arena_) T(type, first, second, dex_pc));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+template<typename T>
 void HGraphBuilder::Binop_12x(const Instruction& instruction, Primitive::Type type) {
   HInstruction* first = LoadLocal(instruction.VRegA(), type);
   HInstruction* second = LoadLocal(instruction.VRegB(), type);
@@ -303,6 +313,16 @@
 }
 
 template<typename T>
+void HGraphBuilder::Binop_12x(const Instruction& instruction,
+                              Primitive::Type type,
+                              uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegA(), type);
+  HInstruction* second = LoadLocal(instruction.VRegB(), type);
+  current_block_->AddInstruction(new (arena_) T(type, first, second, dex_pc));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+template<typename T>
 void HGraphBuilder::Binop_22s(const Instruction& instruction, bool reverse) {
   HInstruction* first = LoadLocal(instruction.VRegB(), Primitive::kPrimInt);
   HInstruction* second = GetIntConstant(instruction.VRegC_22s());
@@ -548,25 +568,37 @@
   return true;
 }
 
-void HGraphBuilder::BuildCheckedDiv(uint16_t out_reg,
-                                    uint16_t first_reg,
-                                    int32_t second_reg,
-                                    uint32_t dex_offset,
+void HGraphBuilder::BuildCheckedDiv(uint16_t out_vreg,
+                                    uint16_t first_vreg,
+                                    int64_t second_vreg_or_constant,
+                                    uint32_t dex_pc,
                                     Primitive::Type type,
-                                    bool second_is_lit) {
-  DCHECK(type == Primitive::kPrimInt);
+                                    bool second_is_constant) {
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
 
-  HInstruction* first = LoadLocal(first_reg, type);
-  HInstruction* second = second_is_lit ? GetIntConstant(second_reg) : LoadLocal(second_reg, type);
-  if (!second->IsIntConstant() || (second->AsIntConstant()->GetValue() == 0)) {
-    second = new (arena_) HDivZeroCheck(second, dex_offset);
+  HInstruction* first = LoadLocal(first_vreg, type);
+  HInstruction* second = nullptr;
+  if (second_is_constant) {
+    if (type == Primitive::kPrimInt) {
+      second = GetIntConstant(second_vreg_or_constant);
+    } else {
+      second = GetLongConstant(second_vreg_or_constant);
+    }
+  } else {
+    second = LoadLocal(second_vreg_or_constant, type);
+  }
+
+  if (!second_is_constant
+      || (type == Primitive::kPrimInt && second->AsIntConstant()->GetValue() == 0)
+      || (type == Primitive::kPrimLong && second->AsLongConstant()->GetValue() == 0)) {
+    second = new (arena_) HDivZeroCheck(second, dex_pc);
     Temporaries temps(graph_);
     current_block_->AddInstruction(second);
     temps.Add(current_block_->GetLastInstruction());
   }
 
-  current_block_->AddInstruction(new (arena_) HDiv(type, first, second));
-  UpdateLocal(out_reg, current_block_->GetLastInstruction());
+  current_block_->AddInstruction(new (arena_) HDiv(type, first, second, dex_pc));
+  UpdateLocal(out_vreg, current_block_->GetLastInstruction());
 }
 
 void HGraphBuilder::BuildArrayAccess(const Instruction& instruction,
@@ -1041,13 +1073,19 @@
       break;
     }
 
+    case Instruction::DIV_LONG: {
+      BuildCheckedDiv(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
+                      dex_offset, Primitive::kPrimLong, false);
+      break;
+    }
+
     case Instruction::DIV_FLOAT: {
-      Binop_23x<HDiv>(instruction, Primitive::kPrimFloat);
+      Binop_23x<HDiv>(instruction, Primitive::kPrimFloat, dex_offset);
       break;
     }
 
     case Instruction::DIV_DOUBLE: {
-      Binop_23x<HDiv>(instruction, Primitive::kPrimDouble);
+      Binop_23x<HDiv>(instruction, Primitive::kPrimDouble, dex_offset);
       break;
     }
 
@@ -1142,13 +1180,19 @@
       break;
     }
 
+    case Instruction::DIV_LONG_2ADDR: {
+      BuildCheckedDiv(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
+                      dex_offset, Primitive::kPrimLong, false);
+      break;
+    }
+
     case Instruction::DIV_FLOAT_2ADDR: {
-      Binop_12x<HDiv>(instruction, Primitive::kPrimFloat);
+      Binop_12x<HDiv>(instruction, Primitive::kPrimFloat, dex_offset);
       break;
     }
 
     case Instruction::DIV_DOUBLE_2ADDR: {
-      Binop_12x<HDiv>(instruction, Primitive::kPrimDouble);
+      Binop_12x<HDiv>(instruction, Primitive::kPrimDouble, dex_offset);
       break;
     }
 
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 9cf8305..b4497bf 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -102,9 +102,15 @@
   void Binop_23x(const Instruction& instruction, Primitive::Type type);
 
   template<typename T>
+  void Binop_23x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
+
+  template<typename T>
   void Binop_12x(const Instruction& instruction, Primitive::Type type);
 
   template<typename T>
+  void Binop_12x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
+
+  template<typename T>
   void Binop_22b(const Instruction& instruction, bool reverse);
 
   template<typename T>
@@ -119,7 +125,7 @@
 
   void BuildCheckedDiv(uint16_t out_reg,
                        uint16_t first_reg,
-                       int32_t second_reg,  // can be a constant
+                       int64_t second_reg_or_constant,
                        uint32_t dex_offset,
                        Primitive::Type type,
                        bool second_is_lit);
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 87955eb..4d86732 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -41,7 +41,7 @@
 static constexpr int kNumberOfPushedRegistersAtEntry = 1 + 2;  // LR, R6, R7
 static constexpr int kCurrentMethodStackOffset = 0;
 
-static constexpr Register kRuntimeParameterCoreRegisters[] = { R0, R1, R2 };
+static constexpr Register kRuntimeParameterCoreRegisters[] = { R0, R1, R2, R3 };
 static constexpr size_t kRuntimeParameterCoreRegistersLength =
     arraysize(kRuntimeParameterCoreRegisters);
 static constexpr SRegister kRuntimeParameterFpuRegisters[] = { };
@@ -1700,8 +1700,11 @@
 }
 
 void LocationsBuilderARM::VisitDiv(HDiv* div) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(div, LocationSummary::kNoCall);
+  LocationSummary::CallKind call_kind = div->GetResultType() == Primitive::kPrimLong
+      ? LocationSummary::kCall
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(div, call_kind);
+
   switch (div->GetResultType()) {
     case Primitive::kPrimInt: {
       locations->SetInAt(0, Location::RequiresRegister());
@@ -1710,7 +1713,13 @@
       break;
     }
     case Primitive::kPrimLong: {
-      LOG(FATAL) << "Not implemented div type" << div->GetResultType();
+      InvokeRuntimeCallingConvention calling_convention;
+      locations->SetInAt(0, Location::RegisterPairLocation(
+          calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
+      locations->SetInAt(1, Location::RegisterPairLocation(
+          calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
+      // The runtime helper puts the output in R0,R2.
+      locations->SetOut(Location::RegisterPairLocation(R0, R2));
       break;
     }
     case Primitive::kPrimFloat:
@@ -1739,7 +1748,15 @@
     }
 
     case Primitive::kPrimLong: {
-      LOG(FATAL) << "Not implemented div type" << div->GetResultType();
+      InvokeRuntimeCallingConvention calling_convention;
+      DCHECK_EQ(calling_convention.GetRegisterAt(0), first.AsRegisterPairLow<Register>());
+      DCHECK_EQ(calling_convention.GetRegisterAt(1), first.AsRegisterPairHigh<Register>());
+      DCHECK_EQ(calling_convention.GetRegisterAt(2), second.AsRegisterPairLow<Register>());
+      DCHECK_EQ(calling_convention.GetRegisterAt(3), second.AsRegisterPairHigh<Register>());
+      DCHECK_EQ(R0, out.AsRegisterPairLow<Register>());
+      DCHECK_EQ(R2, out.AsRegisterPairHigh<Register>());
+
+      codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLdiv), div, div->GetDexPc());
       break;
     }
 
@@ -1763,7 +1780,7 @@
 void LocationsBuilderARM::VisitDivZeroCheck(HDivZeroCheck* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
   if (instruction->HasUses()) {
     locations->SetOut(Location::SameAsFirstInput());
   }
@@ -1776,9 +1793,36 @@
   LocationSummary* locations = instruction->GetLocations();
   Location value = locations->InAt(0);
 
-  DCHECK(value.IsRegister()) << value;
-  __ cmp(value.As<Register>(), ShifterOperand(0));
-  __ b(slow_path->GetEntryLabel(), EQ);
+  switch (instruction->GetType()) {
+    case Primitive::kPrimInt: {
+      if (value.IsRegister()) {
+        __ cmp(value.As<Register>(), ShifterOperand(0));
+        __ b(slow_path->GetEntryLabel(), EQ);
+      } else {
+        DCHECK(value.IsConstant()) << value;
+        if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
+          __ b(slow_path->GetEntryLabel());
+        }
+      }
+      break;
+    }
+    case Primitive::kPrimLong: {
+      if (value.IsRegisterPair()) {
+        __ orrs(IP,
+                value.AsRegisterPairLow<Register>(),
+                ShifterOperand(value.AsRegisterPairHigh<Register>()));
+        __ b(slow_path->GetEntryLabel(), EQ);
+      } else {
+        DCHECK(value.IsConstant()) << value;
+        if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
+          __ b(slow_path->GetEntryLabel());
+        }
+      }
+      break;
+    default:
+      LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
+    }
+  }
 }
 
 void LocationsBuilderARM::VisitNewInstance(HNewInstance* instruction) {
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index d7ce387..bd36399 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -36,7 +36,7 @@
 static constexpr int kNumberOfPushedRegistersAtEntry = 1;
 static constexpr int kCurrentMethodStackOffset = 0;
 
-static constexpr Register kRuntimeParameterCoreRegisters[] = { EAX, ECX, EDX };
+static constexpr Register kRuntimeParameterCoreRegisters[] = { EAX, ECX, EDX, EBX };
 static constexpr size_t kRuntimeParameterCoreRegistersLength =
     arraysize(kRuntimeParameterCoreRegisters);
 static constexpr XmmRegister kRuntimeParameterFpuRegisters[] = { };
@@ -575,7 +575,7 @@
       __ movss(destination.As<XmmRegister>(), Address(ESP, source.GetStackIndex()));
     }
   } else {
-    DCHECK(destination.IsStackSlot());
+    DCHECK(destination.IsStackSlot()) << destination;
     if (source.IsRegister()) {
       __ movl(Address(ESP, destination.GetStackIndex()), source.As<Register>());
     } else if (source.IsFpuRegister()) {
@@ -636,7 +636,7 @@
       LOG(FATAL) << "Unimplemented";
     }
   } else {
-    DCHECK(destination.IsDoubleStackSlot());
+    DCHECK(destination.IsDoubleStackSlot()) << destination;
     if (source.IsRegisterPair()) {
       __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegisterPairLow<Register>());
       __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
@@ -674,7 +674,7 @@
     }
   } else if (instruction->IsLongConstant()) {
     int64_t value = instruction->AsLongConstant()->GetValue();
-    if (location.IsRegister()) {
+    if (location.IsRegisterPair()) {
       __ movl(location.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value)));
       __ movl(location.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value)));
     } else if (location.IsDoubleStackSlot()) {
@@ -1666,8 +1666,11 @@
 }
 
 void LocationsBuilderX86::VisitDiv(HDiv* div) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(div, LocationSummary::kNoCall);
+  LocationSummary::CallKind call_kind = div->GetResultType() == Primitive::kPrimLong
+      ? LocationSummary::kCall
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(div, call_kind);
+
   switch (div->GetResultType()) {
     case Primitive::kPrimInt: {
       locations->SetInAt(0, Location::RegisterLocation(EAX));
@@ -1678,7 +1681,13 @@
       break;
     }
     case Primitive::kPrimLong: {
-      LOG(FATAL) << "Not implemented div type" << div->GetResultType();
+      InvokeRuntimeCallingConvention calling_convention;
+      locations->SetInAt(0, Location::RegisterPairLocation(
+          calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
+      locations->SetInAt(1, Location::RegisterPairLocation(
+          calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
+      // Runtime helper puts the result in EAX, EDX.
+      locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
       break;
     }
     case Primitive::kPrimFloat:
@@ -1696,12 +1705,13 @@
 
 void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) {
   LocationSummary* locations = div->GetLocations();
+  Location out = locations->Out();
   Location first = locations->InAt(0);
   Location second = locations->InAt(1);
-  DCHECK(first.Equals(locations->Out()));
 
   switch (div->GetResultType()) {
     case Primitive::kPrimInt: {
+      DCHECK(first.Equals(out));
       Register first_reg = first.As<Register>();
       Register second_reg = second.As<Register>();
       DCHECK_EQ(EAX, first_reg);
@@ -1728,16 +1738,28 @@
     }
 
     case Primitive::kPrimLong: {
-      LOG(FATAL) << "Not implemented div type" << div->GetResultType();
+      InvokeRuntimeCallingConvention calling_convention;
+      DCHECK_EQ(calling_convention.GetRegisterAt(0), first.AsRegisterPairLow<Register>());
+      DCHECK_EQ(calling_convention.GetRegisterAt(1), first.AsRegisterPairHigh<Register>());
+      DCHECK_EQ(calling_convention.GetRegisterAt(2), second.AsRegisterPairLow<Register>());
+      DCHECK_EQ(calling_convention.GetRegisterAt(3), second.AsRegisterPairHigh<Register>());
+      DCHECK_EQ(EAX, out.AsRegisterPairLow<Register>());
+      DCHECK_EQ(EDX, out.AsRegisterPairHigh<Register>());
+
+      __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pLdiv)));
+      codegen_->RecordPcInfo(div, div->GetDexPc());
+
       break;
     }
 
     case Primitive::kPrimFloat: {
+      DCHECK(first.Equals(out));
       __ divss(first.As<XmmRegister>(), second.As<XmmRegister>());
       break;
     }
 
     case Primitive::kPrimDouble: {
+      DCHECK(first.Equals(out));
       __ divsd(first.As<XmmRegister>(), second.As<XmmRegister>());
       break;
     }
@@ -1750,7 +1772,21 @@
 void LocationsBuilderX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::Any());
+  switch (instruction->GetType()) {
+    case Primitive::kPrimInt: {
+      locations->SetInAt(0, Location::Any());
+      break;
+    }
+    case Primitive::kPrimLong: {
+      locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
+      if (!instruction->IsConstant()) {
+        locations->AddTemp(Location::RequiresRegister());
+      }
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
+  }
   if (instruction->HasUses()) {
     locations->SetOut(Location::SameAsFirstInput());
   }
@@ -1763,18 +1799,39 @@
   LocationSummary* locations = instruction->GetLocations();
   Location value = locations->InAt(0);
 
-  if (value.IsRegister()) {
-    __ testl(value.As<Register>(), value.As<Register>());
-  } else if (value.IsStackSlot()) {
-    __ cmpl(Address(ESP, value.GetStackIndex()), Immediate(0));
-  } else {
-    DCHECK(value.IsConstant()) << value;
-    if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
-    __ jmp(slow_path->GetEntryLabel());
+  switch (instruction->GetType()) {
+    case Primitive::kPrimInt: {
+      if (value.IsRegister()) {
+        __ testl(value.As<Register>(), value.As<Register>());
+        __ j(kEqual, slow_path->GetEntryLabel());
+      } else if (value.IsStackSlot()) {
+        __ cmpl(Address(ESP, value.GetStackIndex()), Immediate(0));
+        __ j(kEqual, slow_path->GetEntryLabel());
+      } else {
+        DCHECK(value.IsConstant()) << value;
+        if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
+        __ jmp(slow_path->GetEntryLabel());
+        }
+      }
+      break;
     }
-    return;
+    case Primitive::kPrimLong: {
+      if (value.IsRegisterPair()) {
+        Register temp = locations->GetTemp(0).As<Register>();
+        __ movl(temp, value.AsRegisterPairLow<Register>());
+        __ orl(temp, value.AsRegisterPairHigh<Register>());
+        __ j(kEqual, slow_path->GetEntryLabel());
+      } else {
+        DCHECK(value.IsConstant()) << value;
+        if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
+          __ jmp(slow_path->GetEntryLabel());
+        }
+      }
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected type for HDivZeroCheck" << instruction->GetType();
   }
-  __ j(kEqual, slow_path->GetEntryLabel());
 }
 
 void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) {
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 3c7dd3f..82bb7db 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -108,16 +108,23 @@
 
 class DivMinusOneSlowPathX86_64 : public SlowPathCodeX86_64 {
  public:
-  explicit DivMinusOneSlowPathX86_64(Register reg) : reg_(reg) {}
+  explicit DivMinusOneSlowPathX86_64(Register reg, Primitive::Type type)
+      : reg_(reg), type_(type) {}
 
   virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     __ Bind(GetEntryLabel());
-    __ negl(CpuRegister(reg_));
+    if (type_ == Primitive::kPrimInt) {
+      __ negl(CpuRegister(reg_));
+    } else {
+      DCHECK_EQ(Primitive::kPrimLong, type_);
+      __ negq(CpuRegister(reg_));
+    }
     __ jmp(GetExitLabel());
   }
 
  private:
-  Register reg_;
+  const Register reg_;
+  const Primitive::Type type_;
   DISALLOW_COPY_AND_ASSIGN(DivMinusOneSlowPathX86_64);
 };
 
@@ -1611,7 +1618,8 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(div, LocationSummary::kNoCall);
   switch (div->GetResultType()) {
-    case Primitive::kPrimInt: {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RegisterLocation(RAX));
       locations->SetInAt(1, Location::RequiresRegister());
       locations->SetOut(Location::SameAsFirstInput());
@@ -1619,10 +1627,7 @@
       locations->AddTemp(Location::RegisterLocation(RDX));
       break;
     }
-    case Primitive::kPrimLong: {
-      LOG(FATAL) << "Not implemented div type" << div->GetResultType();
-      break;
-    }
+
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
       locations->SetInAt(0, Location::RequiresFpuRegister());
@@ -1642,38 +1647,42 @@
   Location second = locations->InAt(1);
   DCHECK(first.Equals(locations->Out()));
 
-  switch (div->GetResultType()) {
-    case Primitive::kPrimInt: {
+  Primitive::Type type = div->GetResultType();
+  switch (type) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
       CpuRegister first_reg = first.As<CpuRegister>();
       CpuRegister second_reg = second.As<CpuRegister>();
       DCHECK_EQ(RAX,  first_reg.AsRegister());
       DCHECK_EQ(RDX, locations->GetTemp(0).As<CpuRegister>().AsRegister());
 
       SlowPathCodeX86_64* slow_path =
-          new (GetGraph()->GetArena()) DivMinusOneSlowPathX86_64(first_reg.AsRegister());
+          new (GetGraph()->GetArena()) DivMinusOneSlowPathX86_64(first_reg.AsRegister(), type);
       codegen_->AddSlowPath(slow_path);
 
-      // 0x80000000/-1 triggers an arithmetic exception!
-      // Dividing by -1 is actually negation and -0x800000000 = 0x80000000 so
-      // it's safe to just use negl instead of more complex comparisons.
+      // 0x80000000(00000000)/-1 triggers an arithmetic exception!
+      // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000)
+      // so it's safe to just use negl instead of more complex comparisons.
 
       __ cmpl(second_reg, Immediate(-1));
       __ j(kEqual, slow_path->GetEntryLabel());
 
-      // edx:eax <- sign-extended of eax
-      __ cdq();
-      // eax = quotient, edx = remainder
-      __ idivl(second_reg);
+      if (type == Primitive::kPrimInt) {
+        // edx:eax <- sign-extended of eax
+        __ cdq();
+        // eax = quotient, edx = remainder
+        __ idivl(second_reg);
+      } else {
+        // rdx:rax <- sign-extended of rax
+        __ cqo();
+        // rax = quotient, rdx = remainder
+        __ idivq(second_reg);
+      }
 
       __ Bind(slow_path->GetExitLabel());
       break;
     }
 
-    case Primitive::kPrimLong: {
-      LOG(FATAL) << "Not implemented div type" << div->GetResultType();
-      break;
-    }
-
     case Primitive::kPrimFloat: {
       __ divss(first.As<XmmRegister>(), second.As<XmmRegister>());
       break;
@@ -1706,18 +1715,40 @@
   LocationSummary* locations = instruction->GetLocations();
   Location value = locations->InAt(0);
 
-  if (value.IsRegister()) {
-    __ testl(value.As<CpuRegister>(), value.As<CpuRegister>());
-  } else if (value.IsStackSlot()) {
-    __ cmpl(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
-  } else {
-    DCHECK(value.IsConstant()) << value;
-    if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
-      __ jmp(slow_path->GetEntryLabel());
+  switch (instruction->GetType()) {
+    case Primitive::kPrimInt: {
+      if (value.IsRegister()) {
+        __ testl(value.As<CpuRegister>(), value.As<CpuRegister>());
+        __ j(kEqual, slow_path->GetEntryLabel());
+      } else if (value.IsStackSlot()) {
+        __ cmpl(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
+        __ j(kEqual, slow_path->GetEntryLabel());
+      } else {
+        DCHECK(value.IsConstant()) << value;
+        if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
+        __ jmp(slow_path->GetEntryLabel());
+        }
+      }
+      break;
     }
-    return;
+    case Primitive::kPrimLong: {
+      if (value.IsRegister()) {
+        __ testq(value.As<CpuRegister>(), value.As<CpuRegister>());
+        __ j(kEqual, slow_path->GetEntryLabel());
+      } else if (value.IsDoubleStackSlot()) {
+        __ cmpq(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
+        __ j(kEqual, slow_path->GetEntryLabel());
+      } else {
+        DCHECK(value.IsConstant()) << value;
+        if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
+        __ jmp(slow_path->GetEntryLabel());
+        }
+      }
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
   }
-  __ j(kEqual, slow_path->GetEntryLabel());
 }
 
 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 59fac52..5af3cdd 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1750,8 +1750,8 @@
 
 class HDiv : public HBinaryOperation {
  public:
-  HDiv(Primitive::Type result_type, HInstruction* left, HInstruction* right)
-      : HBinaryOperation(result_type, left, right) {}
+  HDiv(Primitive::Type result_type, HInstruction* left, HInstruction* right, uint32_t dex_pc)
+      : HBinaryOperation(result_type, left, right), dex_pc_(dex_pc) {}
 
   virtual int32_t Evaluate(int32_t x, int32_t y) const {
     // Our graph structure ensures we never have 0 for `y` during constant folding.
@@ -1761,9 +1761,13 @@
   }
   virtual int64_t Evaluate(int64_t x, int64_t y) const { return x / y; }
 
+  uint32_t GetDexPc() const { return dex_pc_; }
+
   DECLARE_INSTRUCTION(Div);
 
  private:
+  const uint32_t dex_pc_;
+
   DISALLOW_COPY_AND_ASSIGN(HDiv);
 };
 
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index 3d81362..ba4be34 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -713,7 +713,7 @@
   graph->AddBlock(block);
   entry->AddSuccessor(block);
 
-  *div = new (allocator) HDiv(Primitive::kPrimInt, first, second);
+  *div = new (allocator) HDiv(Primitive::kPrimInt, first, second, 0);  // don't care about dex_pc.
   block->AddInstruction(*div);
 
   block->AddInstruction(new (allocator) HExit());
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 5d1c9af..bd08b1f 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -909,6 +909,21 @@
 }
 
 
+void X86_64Assembler::cmpl(const Address& address, CpuRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex32(reg, address);
+  EmitUint8(0x39);
+  EmitOperand(reg.LowBits(), address);
+}
+
+
+void X86_64Assembler::cmpl(const Address& address, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex32(address);
+  EmitComplex(7, address, imm);
+}
+
+
 void X86_64Assembler::cmpq(CpuRegister reg0, CpuRegister reg1) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitRex64(reg0, reg1);
@@ -933,6 +948,14 @@
 }
 
 
+void X86_64Assembler::cmpq(const Address& address, const Immediate& imm) {
+  CHECK(imm.is_int32());  // cmpq only supports 32b immediate.
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64(address);
+  EmitComplex(7, address, imm);
+}
+
+
 void X86_64Assembler::addl(CpuRegister dst, CpuRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(dst, src);
@@ -949,21 +972,6 @@
 }
 
 
-void X86_64Assembler::cmpl(const Address& address, CpuRegister reg) {
-  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  EmitOptionalRex32(reg, address);
-  EmitUint8(0x39);
-  EmitOperand(reg.LowBits(), address);
-}
-
-
-void X86_64Assembler::cmpl(const Address& address, const Immediate& imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  EmitOptionalRex32(address);
-  EmitComplex(7, address, imm);
-}
-
-
 void X86_64Assembler::testl(CpuRegister reg1, CpuRegister reg2) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(reg1, reg2);
@@ -998,6 +1006,14 @@
 }
 
 
+void X86_64Assembler::testq(CpuRegister reg1, CpuRegister reg2) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64(reg1, reg2);
+  EmitUint8(0x85);
+  EmitRegisterOperand(reg1.LowBits(), reg2.LowBits());
+}
+
+
 void X86_64Assembler::testq(CpuRegister reg, const Address& address) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitRex64(reg);
@@ -1267,6 +1283,13 @@
 }
 
 
+void X86_64Assembler::cqo() {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64();
+  EmitUint8(0x99);
+}
+
+
 void X86_64Assembler::idivl(CpuRegister reg) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(reg);
@@ -1275,6 +1298,14 @@
 }
 
 
+void X86_64Assembler::idivq(CpuRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64(reg);
+  EmitUint8(0xF7);
+  EmitUint8(0xF8 | reg.LowBits());
+}
+
+
 void X86_64Assembler::imull(CpuRegister dst, CpuRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(dst, src);
@@ -1820,10 +1851,20 @@
   }
 }
 
+void X86_64Assembler::EmitRex64() {
+  EmitOptionalRex(false, true, false, false, false);
+}
+
 void X86_64Assembler::EmitRex64(CpuRegister reg) {
   EmitOptionalRex(false, true, false, false, reg.NeedsRex());
 }
 
+void X86_64Assembler::EmitRex64(const Operand& operand) {
+  uint8_t rex = operand.rex();
+  rex |= 0x48;  // REX.W000
+  EmitUint8(rex);
+}
+
 void X86_64Assembler::EmitRex64(CpuRegister dst, CpuRegister src) {
   EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
 }
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 285b4cf..b46f6f7 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -390,10 +390,12 @@
   void cmpq(CpuRegister reg0, CpuRegister reg1);
   void cmpq(CpuRegister reg0, const Immediate& imm);
   void cmpq(CpuRegister reg0, const Address& address);
+  void cmpq(const Address& address, const Immediate& imm);
 
   void testl(CpuRegister reg1, CpuRegister reg2);
   void testl(CpuRegister reg, const Immediate& imm);
 
+  void testq(CpuRegister reg1, CpuRegister reg2);
   void testq(CpuRegister reg, const Address& address);
 
   void andl(CpuRegister dst, const Immediate& imm);
@@ -432,8 +434,10 @@
   void subq(CpuRegister dst, const Address& address);
 
   void cdq();
+  void cqo();
 
   void idivl(CpuRegister reg);
+  void idivq(CpuRegister reg);
 
   void imull(CpuRegister dst, CpuRegister src);
   void imull(CpuRegister reg, const Immediate& imm);
@@ -669,7 +673,9 @@
   void EmitOptionalRex32(XmmRegister dst, const Operand& operand);
 
   // Emit a REX.W prefix plus necessary register bit encodings.
+  void EmitRex64();
   void EmitRex64(CpuRegister reg);
+  void EmitRex64(const Operand& operand);
   void EmitRex64(CpuRegister dst, CpuRegister src);
   void EmitRex64(CpuRegister dst, const Operand& operand);
   void EmitRex64(XmmRegister dst, CpuRegister src);
diff --git a/test/417-optimizing-arith-div/src/Main.java b/test/417-optimizing-arith-div/src/Main.java
index 5825d24..a5dea15 100644
--- a/test/417-optimizing-arith-div/src/Main.java
+++ b/test/417-optimizing-arith-div/src/Main.java
@@ -78,18 +78,33 @@
     } catch (java.lang.RuntimeException e) {
     }
   }
+
+  public static void expectDivisionByZero(long value) {
+    try {
+      $opt$Div(value, 0L);
+      throw new Error("Expected RuntimeException when dividing by 0");
+    } catch (java.lang.RuntimeException e) {
+    }
+    try {
+      $opt$DivZero(value);
+      throw new Error("Expected RuntimeException when dividing by 0");
+    } catch (java.lang.RuntimeException e) {
+    }
+  }
+
   public static void main(String[] args) {
     div();
   }
 
   public static void div() {
     divInt();
+    divLong();
     divFloat();
     divDouble();
   }
 
   private static void divInt() {
-    expectEquals(2, $opt$DivLit(6));
+    expectEquals(2, $opt$DivConst(6));
     expectEquals(2, $opt$Div(6, 3));
     expectEquals(6, $opt$Div(6, 1));
     expectEquals(-2, $opt$Div(6, -3));
@@ -111,6 +126,35 @@
     expectDivisionByZero(Integer.MIN_VALUE);
   }
 
+  private static void divLong() {
+    expectEquals(2L, $opt$DivConst(6L));
+    expectEquals(2L, $opt$Div(6L, 3L));
+    expectEquals(6L, $opt$Div(6L, 1L));
+    expectEquals(-2L, $opt$Div(6L, -3L));
+    expectEquals(1L, $opt$Div(4L, 3L));
+    expectEquals(-1L, $opt$Div(4L, -3L));
+    expectEquals(5L, $opt$Div(23L, 4L));
+    expectEquals(-5L, $opt$Div(-23L, 4L));
+
+    expectEquals(-Integer.MAX_VALUE, $opt$Div(Integer.MAX_VALUE, -1L));
+    expectEquals(2147483648L, $opt$Div(Integer.MIN_VALUE, -1L));
+    expectEquals(-1073741824L, $opt$Div(Integer.MIN_VALUE, 2L));
+
+    expectEquals(-Long.MAX_VALUE, $opt$Div(Long.MAX_VALUE, -1L));
+    expectEquals(Long.MIN_VALUE, $opt$Div(Long.MIN_VALUE, -1L)); // overflow
+
+    expectEquals(11111111111111L, $opt$Div(33333333333333L, 3L));
+    expectEquals(3L, $opt$Div(33333333333333L, 11111111111111L));
+
+    expectEquals(0L, $opt$Div(0L, Long.MAX_VALUE));
+    expectEquals(0L, $opt$Div(0L, Long.MIN_VALUE));
+
+    expectDivisionByZero(0L);
+    expectDivisionByZero(1L);
+    expectDivisionByZero(Long.MAX_VALUE);
+    expectDivisionByZero(Long.MIN_VALUE);
+  }
+
   private static void divFloat() {
     expectApproxEquals(1.6666666F, $opt$Div(5F, 3F));
     expectApproxEquals(0F, $opt$Div(0F, 3F));
@@ -178,10 +222,22 @@
   }
 
   // Division by literals != 0 should not generate checks.
-  static int $opt$DivLit(int a) {
+  static int $opt$DivConst(int a) {
     return a / 3;
   }
 
+  static long $opt$DivConst(long a) {
+    return a / 3L;
+  }
+
+  static long $opt$Div(long a, long b) {
+    return a / b;
+  }
+
+  static long $opt$DivZero(long a) {
+    return a / 0L;
+  }
+
   static float $opt$Div(float a, float b) {
     return a / b;
   }