Merge "Quick: Clean up temp use counting."
diff --git a/compiler/Android.mk b/compiler/Android.mk
index eaea031..7611f50 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -138,7 +138,6 @@
 	utils/arm64/assembler_arm64.cc \
 	utils/arm64/managed_register_arm64.cc \
 	utils/assembler.cc \
-	utils/dwarf_cfi.cc \
 	utils/mips/assembler_mips.cc \
 	utils/mips/managed_register_mips.cc \
 	utils/mips64/assembler_mips64.cc \
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index c51046e..232a228 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -1156,14 +1156,6 @@
     return lhs.LiteralOffset() < rhs.LiteralOffset();
   });
 
-  std::unique_ptr<std::vector<uint8_t>> cfi_info(
-      cu_->compiler_driver->GetCompilerOptions().GetGenerateGDBInformation() ?
-          ReturnFrameDescriptionEntry() :
-          nullptr);
-  ArrayRef<const uint8_t> cfi_ref;
-  if (cfi_info.get() != nullptr) {
-    cfi_ref = ArrayRef<const uint8_t>(*cfi_info);
-  }
   return CompiledMethod::SwapAllocCompiledMethod(
       cu_->compiler_driver, cu_->instruction_set,
       ArrayRef<const uint8_t>(code_buffer_),
@@ -1172,7 +1164,7 @@
       ArrayRef<const uint8_t>(encoded_mapping_table_),
       ArrayRef<const uint8_t>(vmap_encoder.GetData()),
       ArrayRef<const uint8_t>(native_gc_map_),
-      cfi_ref,
+      ArrayRef<const uint8_t>(),
       ArrayRef<const LinkerPatch>(patches_));
 }
 
@@ -1334,11 +1326,6 @@
   UNREACHABLE();
 }
 
-std::vector<uint8_t>* Mir2Lir::ReturnFrameDescriptionEntry() {
-  // Default case is to do nothing.
-  return nullptr;
-}
-
 RegLocation Mir2Lir::NarrowRegLoc(RegLocation loc) {
   if (loc.location == kLocPhysReg) {
     DCHECK(!loc.reg.Is32Bit());
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 45a5855..1624c84 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -1573,11 +1573,6 @@
                                     bool can_assume_type_is_in_dex_cache,
                                     uint32_t type_idx, RegLocation rl_dest,
                                     RegLocation rl_src);
-    /*
-     * @brief Generate the eh_frame FDE information if possible.
-     * @returns pointer to vector containg FDE information, or NULL.
-     */
-    virtual std::vector<uint8_t>* ReturnFrameDescriptionEntry();
 
     /**
      * @brief Used to insert marker that can be used to associate MIR with LIR.
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index fd23692..18fae17 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -184,8 +184,7 @@
   }
 
   /* Build frame, return address already on stack */
-  stack_decrement_ = OpRegImm(kOpSub, rs_rSP, frame_size_ -
-                              GetInstructionSetPointerSize(cu_->instruction_set));
+  OpRegImm(kOpSub, rs_rSP, frame_size_ - GetInstructionSetPointerSize(cu_->instruction_set));
 
   /* Spill core callee saves */
   SpillCoreRegs();
@@ -263,8 +262,8 @@
   UnSpillFPRegs();
   /* Remove frame except for return address */
   const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
-  stack_increment_ = OpRegImm(kOpAdd, rs_rSP,
-                              frame_size_ - GetInstructionSetPointerSize(cu_->instruction_set));
+  int adjust = frame_size_ - GetInstructionSetPointerSize(cu_->instruction_set);
+  OpRegImm(kOpAdd, rs_rSP, adjust);
   NewLIR0(kX86Ret);
 }
 
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 758684e..a98a99e 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -380,12 +380,6 @@
    */
   void InstallLiteralPools() OVERRIDE;
 
-  /*
-   * @brief Generate the debug_frame FDE information.
-   * @returns pointer to vector containing CFE information
-   */
-  std::vector<uint8_t>* ReturnFrameDescriptionEntry() OVERRIDE;
-
   LIR* InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) OVERRIDE;
 
  protected:
@@ -958,12 +952,6 @@
   // Instructions needing patching with PC relative code addresses.
   ArenaVector<LIR*> dex_cache_access_insns_;
 
-  // Prologue decrement of stack pointer.
-  LIR* stack_decrement_;
-
-  // Epilogue increment of stack pointer.
-  LIR* stack_increment_;
-
   // The list of const vector literals.
   LIR* const_vectors_;
 
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index cad82a1..081f80f 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -32,7 +32,6 @@
 #include "mirror/string.h"
 #include "oat.h"
 #include "x86_lir.h"
-#include "utils/dwarf_cfi.h"
 
 namespace art {
 
@@ -830,7 +829,6 @@
       class_type_address_insns_(arena->Adapter()),
       call_method_insns_(arena->Adapter()),
       dex_cache_access_insns_(arena->Adapter()),
-      stack_decrement_(nullptr), stack_increment_(nullptr),
       const_vectors_(nullptr) {
   method_address_insns_.reserve(100);
   class_type_address_insns_.reserve(100);
@@ -1426,100 +1424,6 @@
   return true;
 }
 
-static bool ARTRegIDToDWARFRegID(bool is_x86_64, int art_reg_id, int* dwarf_reg_id) {
-  if (is_x86_64) {
-    switch (art_reg_id) {
-    case 3 : *dwarf_reg_id =  3; return true;  // %rbx
-    // This is the only discrepancy between ART & DWARF register numbering.
-    case 5 : *dwarf_reg_id =  6; return true;  // %rbp
-    case 12: *dwarf_reg_id = 12; return true;  // %r12
-    case 13: *dwarf_reg_id = 13; return true;  // %r13
-    case 14: *dwarf_reg_id = 14; return true;  // %r14
-    case 15: *dwarf_reg_id = 15; return true;  // %r15
-    default: return false;  // Should not get here
-    }
-  } else {
-    switch (art_reg_id) {
-    case 5: *dwarf_reg_id = 5; return true;  // %ebp
-    case 6: *dwarf_reg_id = 6; return true;  // %esi
-    case 7: *dwarf_reg_id = 7; return true;  // %edi
-    default: return false;  // Should not get here
-    }
-  }
-}
-
-std::vector<uint8_t>* X86Mir2Lir::ReturnFrameDescriptionEntry() {
-  std::vector<uint8_t>* cfi_info = new std::vector<uint8_t>;
-
-  // Generate the FDE for the method.
-  DCHECK_NE(data_offset_, 0U);
-
-  WriteFDEHeader(cfi_info, cu_->target64);
-  WriteFDEAddressRange(cfi_info, data_offset_, cu_->target64);
-
-  // The instructions in the FDE.
-  if (stack_decrement_ != nullptr) {
-    // Advance LOC to just past the stack decrement.
-    uint32_t pc = NEXT_LIR(stack_decrement_)->offset;
-    DW_CFA_advance_loc(cfi_info, pc);
-
-    // Now update the offset to the call frame: DW_CFA_def_cfa_offset frame_size.
-    DW_CFA_def_cfa_offset(cfi_info, frame_size_);
-
-    // Handle register spills
-    const uint32_t kSpillInstLen = (cu_->target64) ? 5 : 4;
-    const int kDataAlignmentFactor = (cu_->target64) ? -8 : -4;
-    uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum());
-    int offset = -(GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_);
-    for (int reg = 0; mask; mask >>= 1, reg++) {
-      if (mask & 0x1) {
-        pc += kSpillInstLen;
-
-        // Advance LOC to pass this instruction
-        DW_CFA_advance_loc(cfi_info, kSpillInstLen);
-
-        int dwarf_reg_id;
-        if (ARTRegIDToDWARFRegID(cu_->target64, reg, &dwarf_reg_id)) {
-          // DW_CFA_offset_extended_sf reg offset
-          DW_CFA_offset_extended_sf(cfi_info, dwarf_reg_id, offset / kDataAlignmentFactor);
-        }
-
-        offset += GetInstructionSetPointerSize(cu_->instruction_set);
-      }
-    }
-
-    // We continue with that stack until the epilogue.
-    if (stack_increment_ != nullptr) {
-      uint32_t new_pc = NEXT_LIR(stack_increment_)->offset;
-      DW_CFA_advance_loc(cfi_info, new_pc - pc);
-
-      // We probably have code snippets after the epilogue, so save the
-      // current state: DW_CFA_remember_state.
-      DW_CFA_remember_state(cfi_info);
-
-      // We have now popped the stack: DW_CFA_def_cfa_offset 4/8.
-      // There is only the return PC on the stack now.
-      DW_CFA_def_cfa_offset(cfi_info, GetInstructionSetPointerSize(cu_->instruction_set));
-
-      // Everything after that is the same as before the epilogue.
-      // Stack bump was followed by RET instruction.
-      LIR *post_ret_insn = NEXT_LIR(NEXT_LIR(stack_increment_));
-      if (post_ret_insn != nullptr) {
-        pc = new_pc;
-        new_pc = post_ret_insn->offset;
-        DW_CFA_advance_loc(cfi_info, new_pc - pc);
-        // Restore the state: DW_CFA_restore_state.
-        DW_CFA_restore_state(cfi_info);
-      }
-    }
-  }
-
-  PadCFI(cfi_info);
-  WriteCFILength(cfi_info, cu_->target64);
-
-  return cfi_info;
-}
-
 void X86Mir2Lir::GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) {
   switch (static_cast<ExtendedMIROpcode>(mir->dalvikInsn.opcode)) {
     case kMirOpReserveVectorRegisters:
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index 24cb364..1bd83b6 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -89,116 +89,6 @@
   return elf_writer.Write(oat_writer, dex_files, android_root, is_host);
 }
 
-std::vector<uint8_t>* ConstructCIEFrameX86(bool is_x86_64) {
-  std::vector<uint8_t>* cfi_info = new std::vector<uint8_t>;
-
-  // Length (will be filled in later in this routine).
-  if (is_x86_64) {
-    Push32(cfi_info, 0xffffffff);  // Indicates 64bit
-    Push32(cfi_info, 0);
-    Push32(cfi_info, 0);
-  } else {
-    Push32(cfi_info, 0);
-  }
-
-  // CIE id: always 0.
-  if (is_x86_64) {
-    Push32(cfi_info, 0);
-    Push32(cfi_info, 0);
-  } else {
-    Push32(cfi_info, 0);
-  }
-
-  // Version: always 1.
-  cfi_info->push_back(0x01);
-
-  // Augmentation: 'zR\0'
-  cfi_info->push_back(0x7a);
-  cfi_info->push_back(0x52);
-  cfi_info->push_back(0x0);
-
-  // Code alignment: 1.
-  EncodeUnsignedLeb128(1, cfi_info);
-
-  // Data alignment.
-  if (is_x86_64) {
-    EncodeSignedLeb128(-8, cfi_info);
-  } else {
-    EncodeSignedLeb128(-4, cfi_info);
-  }
-
-  // Return address register.
-  if (is_x86_64) {
-    // R16(RIP)
-    cfi_info->push_back(0x10);
-  } else {
-    // R8(EIP)
-    cfi_info->push_back(0x08);
-  }
-
-  // Augmentation length: 1.
-  cfi_info->push_back(1);
-
-  // Augmentation data.
-  if (is_x86_64) {
-    // 0x04 ((DW_EH_PE_absptr << 4) | DW_EH_PE_udata8).
-    cfi_info->push_back(0x04);
-  } else {
-    // 0x03 ((DW_EH_PE_absptr << 4) | DW_EH_PE_udata4).
-    cfi_info->push_back(0x03);
-  }
-
-  // Initial instructions.
-  if (is_x86_64) {
-    // DW_CFA_def_cfa R7(RSP) 8.
-    cfi_info->push_back(0x0c);
-    cfi_info->push_back(0x07);
-    cfi_info->push_back(0x08);
-
-    // DW_CFA_offset R16(RIP) 1 (* -8).
-    cfi_info->push_back(0x90);
-    cfi_info->push_back(0x01);
-  } else {
-    // DW_CFA_def_cfa R4(ESP) 4.
-    cfi_info->push_back(0x0c);
-    cfi_info->push_back(0x04);
-    cfi_info->push_back(0x04);
-
-    // DW_CFA_offset R8(EIP) 1 (* -4).
-    cfi_info->push_back(0x88);
-    cfi_info->push_back(0x01);
-  }
-
-  // Padding to a multiple of 4
-  while ((cfi_info->size() & 3) != 0) {
-    // DW_CFA_nop is encoded as 0.
-    cfi_info->push_back(0);
-  }
-
-  // Set the length of the CIE inside the generated bytes.
-  if (is_x86_64) {
-    uint32_t length = cfi_info->size() - 12;
-    UpdateWord(cfi_info, 4, length);
-  } else {
-    uint32_t length = cfi_info->size() - 4;
-    UpdateWord(cfi_info, 0, length);
-  }
-  return cfi_info;
-}
-
-std::vector<uint8_t>* ConstructCIEFrame(InstructionSet isa) {
-  switch (isa) {
-    case kX86:
-      return ConstructCIEFrameX86(false);
-    case kX86_64:
-      return ConstructCIEFrameX86(true);
-
-    default:
-      // Not implemented.
-      return nullptr;
-  }
-}
-
 class OatWriterWrapper FINAL : public CodeOutput {
  public:
   explicit OatWriterWrapper(OatWriter* oat_writer) : oat_writer_(oat_writer) {}
@@ -621,9 +511,7 @@
                               ElfBuilder<Elf_Word, Elf_Sword, Elf_Addr, Elf_Dyn,
                                          Elf_Sym, Elf_Ehdr, Elf_Phdr, Elf_Shdr>* builder,
                               OatWriter* oat_writer) {
-  std::unique_ptr<std::vector<uint8_t>> cfi_info(
-      ConstructCIEFrame(compiler_driver->GetInstructionSet()));
-
+  UNUSED(compiler_driver);
   Elf_Addr text_section_address = builder->GetTextBuilder().GetSection()->sh_addr;
 
   // Iterate over the compiled methods.
@@ -643,63 +531,8 @@
       symtab->AddSymbol("$t", &builder->GetTextBuilder(), it->low_pc_ & ~1, true,
                         0, STB_LOCAL, STT_NOTYPE);
     }
-
-    // Include CFI for compiled method, if possible.
-    if (cfi_info.get() != nullptr) {
-      DCHECK(it->compiled_method_ != nullptr);
-
-      // Copy in the FDE, if present
-      const SwapVector<uint8_t>* fde = it->compiled_method_->GetCFIInfo();
-      if (fde != nullptr) {
-        // Copy the information into cfi_info and then fix the address in the new copy.
-        int cur_offset = cfi_info->size();
-        cfi_info->insert(cfi_info->end(), fde->begin(), fde->end());
-
-        bool is_64bit = *(reinterpret_cast<const uint32_t*>(fde->data())) == 0xffffffff;
-
-        // Set the 'CIE_pointer' field.
-        uint64_t CIE_pointer = cur_offset + (is_64bit ? 12 : 4);
-        uint64_t offset_to_update = CIE_pointer;
-        if (is_64bit) {
-          (*cfi_info)[offset_to_update+0] = CIE_pointer;
-          (*cfi_info)[offset_to_update+1] = CIE_pointer >> 8;
-          (*cfi_info)[offset_to_update+2] = CIE_pointer >> 16;
-          (*cfi_info)[offset_to_update+3] = CIE_pointer >> 24;
-          (*cfi_info)[offset_to_update+4] = CIE_pointer >> 32;
-          (*cfi_info)[offset_to_update+5] = CIE_pointer >> 40;
-          (*cfi_info)[offset_to_update+6] = CIE_pointer >> 48;
-          (*cfi_info)[offset_to_update+7] = CIE_pointer >> 56;
-        } else {
-          (*cfi_info)[offset_to_update+0] = CIE_pointer;
-          (*cfi_info)[offset_to_update+1] = CIE_pointer >> 8;
-          (*cfi_info)[offset_to_update+2] = CIE_pointer >> 16;
-          (*cfi_info)[offset_to_update+3] = CIE_pointer >> 24;
-        }
-
-        // Set the 'initial_location' field.
-        offset_to_update += is_64bit ? 8 : 4;
-        if (is_64bit) {
-          const uint64_t quick_code_start = it->low_pc_ + text_section_address;
-          (*cfi_info)[offset_to_update+0] = quick_code_start;
-          (*cfi_info)[offset_to_update+1] = quick_code_start >> 8;
-          (*cfi_info)[offset_to_update+2] = quick_code_start >> 16;
-          (*cfi_info)[offset_to_update+3] = quick_code_start >> 24;
-          (*cfi_info)[offset_to_update+4] = quick_code_start >> 32;
-          (*cfi_info)[offset_to_update+5] = quick_code_start >> 40;
-          (*cfi_info)[offset_to_update+6] = quick_code_start >> 48;
-          (*cfi_info)[offset_to_update+7] = quick_code_start >> 56;
-        } else {
-          const uint32_t quick_code_start = it->low_pc_ + text_section_address;
-          (*cfi_info)[offset_to_update+0] = quick_code_start;
-          (*cfi_info)[offset_to_update+1] = quick_code_start >> 8;
-          (*cfi_info)[offset_to_update+2] = quick_code_start >> 16;
-          (*cfi_info)[offset_to_update+3] = quick_code_start >> 24;
-        }
-      }
-    }
   }
 
-  bool hasCFI = (cfi_info.get() != nullptr);
   bool hasLineInfo = false;
   for (auto& dbg_info : oat_writer->GetCFIMethodInfo()) {
     if (dbg_info.dbgstream_ != nullptr &&
@@ -709,7 +542,7 @@
     }
   }
 
-  if (hasLineInfo || hasCFI) {
+  if (hasLineInfo) {
     ElfRawSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> debug_info(".debug_info",
                                                                    SHT_PROGBITS,
                                                                    0, nullptr, 0, 1, 0);
@@ -731,15 +564,6 @@
     builder->RegisterRawSection(debug_info);
     builder->RegisterRawSection(debug_abbrev);
 
-    if (hasCFI) {
-      ElfRawSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> eh_frame(".eh_frame",
-                                                                   SHT_PROGBITS,
-                                                                   SHF_ALLOC,
-                                                                   nullptr, 0, 4, 0);
-      eh_frame.SetBuffer(std::move(*cfi_info.get()));
-      builder->RegisterRawSection(eh_frame);
-    }
-
     if (hasLineInfo) {
       builder->RegisterRawSection(debug_line);
     }
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 2d9e03a..45e2fd0 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -93,7 +93,6 @@
 
   // Assembler that holds generated instructions
   std::unique_ptr<Assembler> jni_asm(Assembler::Create(instruction_set));
-  jni_asm->InitializeFrameDescriptionEntry();
 
   // Offsets into data structures
   // TODO: if cross compiling these offsets are for the host not the target
@@ -432,19 +431,14 @@
   std::vector<uint8_t> managed_code(cs);
   MemoryRegion code(&managed_code[0], managed_code.size());
   __ FinalizeInstructions(code);
-  jni_asm->FinalizeFrameDescriptionEntry();
-  std::vector<uint8_t>* fde(jni_asm->GetFrameDescriptionEntry());
-  ArrayRef<const uint8_t> cfi_ref;
-  if (fde != nullptr) {
-    cfi_ref = ArrayRef<const uint8_t>(*fde);
-  }
+
   return CompiledMethod::SwapAllocCompiledMethodCFI(driver,
                                                     instruction_set,
                                                     ArrayRef<const uint8_t>(managed_code),
                                                     frame_size,
                                                     main_jni_conv->CoreSpillMask(),
                                                     main_jni_conv->FpSpillMask(),
-                                                    cfi_ref);
+                                                    ArrayRef<const uint8_t>());
 }
 
 // Copy a single parameter from the managed to the JNI calling convention
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index b6e4510..aec2d19 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -320,6 +320,27 @@
   GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
 }
 
+void IntrinsicLocationsBuilderX86::VisitLongReverseBytes(HInvoke* invoke) {
+  CreateLongToLongLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitLongReverseBytes(HInvoke* invoke) {
+  LocationSummary* locations = invoke->GetLocations();
+  Location input = locations->InAt(0);
+  Register input_lo = input.AsRegisterPairLow<Register>();
+  Register input_hi = input.AsRegisterPairHigh<Register>();
+  Location output = locations->Out();
+  Register output_lo = output.AsRegisterPairLow<Register>();
+  Register output_hi = output.AsRegisterPairHigh<Register>();
+
+  X86Assembler* assembler = GetAssembler();
+  // Assign the inputs to the outputs, mixing low/high.
+  __ movl(output_lo, input_hi);
+  __ movl(output_hi, input_lo);
+  __ bswapl(output_lo);
+  __ bswapl(output_hi);
+}
+
 void IntrinsicLocationsBuilderX86::VisitShortReverseBytes(HInvoke* invoke) {
   CreateIntToIntLocations(arena_, invoke);
 }
@@ -1330,6 +1351,181 @@
   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
 }
 
+static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
+                                       HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
+  locations->SetInAt(1, Location::RequiresRegister());
+  // Offset is a long, but in 32 bit mode, we only need the low word.
+  // Can we update the invoke here to remove a TypeConvert to Long?
+  locations->SetInAt(2, Location::RequiresRegister());
+  // Expected value must be in EAX or EDX:EAX.
+  // For long, new value must be in ECX:EBX.
+  if (type == Primitive::kPrimLong) {
+    locations->SetInAt(3, Location::RegisterPairLocation(EAX, EDX));
+    locations->SetInAt(4, Location::RegisterPairLocation(EBX, ECX));
+  } else {
+    locations->SetInAt(3, Location::RegisterLocation(EAX));
+    locations->SetInAt(4, Location::RequiresRegister());
+  }
+
+  // Force a byte register for the output.
+  locations->SetOut(Location::RegisterLocation(EAX));
+  if (type == Primitive::kPrimNot) {
+    // Need temp registers for card-marking.
+    locations->AddTemp(Location::RequiresRegister());
+    // Need a byte register for marking.
+    locations->AddTemp(Location::RegisterLocation(ECX));
+  }
+}
+
+void IntrinsicLocationsBuilderX86::VisitUnsafeCASInt(HInvoke* invoke) {
+  CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke);
+}
+
+void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) {
+  CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke);
+}
+
+void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) {
+  CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
+}
+
+static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* codegen) {
+  X86Assembler* assembler =
+    reinterpret_cast<X86Assembler*>(codegen->GetAssembler());
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register base = locations->InAt(1).AsRegister<Register>();
+  Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
+  Location out = locations->Out();
+  DCHECK_EQ(out.AsRegister<Register>(), EAX);
+
+  if (type == Primitive::kPrimLong) {
+    DCHECK_EQ(locations->InAt(3).AsRegisterPairLow<Register>(), EAX);
+    DCHECK_EQ(locations->InAt(3).AsRegisterPairHigh<Register>(), EDX);
+    DCHECK_EQ(locations->InAt(4).AsRegisterPairLow<Register>(), EBX);
+    DCHECK_EQ(locations->InAt(4).AsRegisterPairHigh<Register>(), ECX);
+    __ LockCmpxchg8b(Address(base, offset, TIMES_1, 0));
+  } else {
+    // Integer or object.
+    DCHECK_EQ(locations->InAt(3).AsRegister<Register>(), EAX);
+    Register value = locations->InAt(4).AsRegister<Register>();
+    if (type == Primitive::kPrimNot) {
+      // Mark card for object assuming new value is stored.
+      codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(),
+                          locations->GetTemp(1).AsRegister<Register>(),
+                          base,
+                          value);
+    }
+
+    __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
+  }
+
+  // locked cmpxchg has full barrier semantics, and we don't need scheduling
+  // barriers at this time.
+
+  // Convert ZF into the boolean result.
+  __ setb(kZero, out.AsRegister<Register>());
+  __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
+}
+
+void IntrinsicCodeGeneratorX86::VisitUnsafeCASInt(HInvoke* invoke) {
+  GenCAS(Primitive::kPrimInt, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) {
+  GenCAS(Primitive::kPrimLong, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) {
+  GenCAS(Primitive::kPrimNot, invoke, codegen_);
+}
+
+void IntrinsicLocationsBuilderX86::VisitIntegerReverse(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+static void SwapBits(Register reg, Register temp, int32_t shift, int32_t mask,
+                     X86Assembler* assembler) {
+  Immediate imm_shift(shift);
+  Immediate imm_mask(mask);
+  __ movl(temp, reg);
+  __ shrl(reg, imm_shift);
+  __ andl(temp, imm_mask);
+  __ andl(reg, imm_mask);
+  __ shll(temp, imm_shift);
+  __ orl(reg, temp);
+}
+
+void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) {
+  X86Assembler* assembler =
+    reinterpret_cast<X86Assembler*>(codegen_->GetAssembler());
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register reg = locations->InAt(0).AsRegister<Register>();
+  Register temp = locations->GetTemp(0).AsRegister<Register>();
+
+  /*
+   * Use one bswap instruction to reverse byte order first and then use 3 rounds of
+   * swapping bits to reverse bits in a number x. Using bswap to save instructions
+   * compared to generic luni implementation which has 5 rounds of swapping bits.
+   * x = bswap x
+   * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
+   * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
+   * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
+   */
+  __ bswapl(reg);
+  SwapBits(reg, temp, 1, 0x55555555, assembler);
+  SwapBits(reg, temp, 2, 0x33333333, assembler);
+  SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
+}
+
+void IntrinsicLocationsBuilderX86::VisitLongReverse(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) {
+  X86Assembler* assembler =
+    reinterpret_cast<X86Assembler*>(codegen_->GetAssembler());
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register reg_low = locations->InAt(0).AsRegisterPairLow<Register>();
+  Register reg_high = locations->InAt(0).AsRegisterPairHigh<Register>();
+  Register temp = locations->GetTemp(0).AsRegister<Register>();
+
+  // We want to swap high/low, then bswap each one, and then do the same
+  // as a 32 bit reverse.
+  // Exchange high and low.
+  __ movl(temp, reg_low);
+  __ movl(reg_low, reg_high);
+  __ movl(reg_high, temp);
+
+  // bit-reverse low
+  __ bswapl(reg_low);
+  SwapBits(reg_low, temp, 1, 0x55555555, assembler);
+  SwapBits(reg_low, temp, 2, 0x33333333, assembler);
+  SwapBits(reg_low, temp, 4, 0x0f0f0f0f, assembler);
+
+  // bit-reverse high
+  __ bswapl(reg_high);
+  SwapBits(reg_high, temp, 1, 0x55555555, assembler);
+  SwapBits(reg_high, temp, 2, 0x33333333, assembler);
+  SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler);
+}
+
 // Unimplemented intrinsics.
 
 #define UNIMPLEMENTED_INTRINSIC(Name)                                                   \
@@ -1338,16 +1534,10 @@
 void IntrinsicCodeGeneratorX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
 }
 
-UNIMPLEMENTED_INTRINSIC(IntegerReverse)
-UNIMPLEMENTED_INTRINSIC(LongReverse)
-UNIMPLEMENTED_INTRINSIC(LongReverseBytes)
 UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
 UNIMPLEMENTED_INTRINSIC(StringIndexOf)
 UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASInt)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASLong)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASObject)
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 
 }  // namespace x86
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index f6fa013..5122a00 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -1202,6 +1202,175 @@
   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
 }
 
+static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
+                                       HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(2, Location::RequiresRegister());
+  // expected value must be in EAX/RAX.
+  locations->SetInAt(3, Location::RegisterLocation(RAX));
+  locations->SetInAt(4, Location::RequiresRegister());
+
+  locations->SetOut(Location::RequiresRegister());
+  if (type == Primitive::kPrimNot) {
+    // Need temp registers for card-marking.
+    locations->AddTemp(Location::RequiresRegister());
+    locations->AddTemp(Location::RequiresRegister());
+  }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
+  CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
+  CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
+  CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
+}
+
+static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) {
+  X86_64Assembler* assembler =
+    reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler());
+  LocationSummary* locations = invoke->GetLocations();
+
+  CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
+  CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
+  CpuRegister expected = locations->InAt(3).AsRegister<CpuRegister>();
+  DCHECK_EQ(expected.AsRegister(), RAX);
+  CpuRegister value = locations->InAt(4).AsRegister<CpuRegister>();
+  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+
+  if (type == Primitive::kPrimLong) {
+    __ LockCmpxchgq(Address(base, offset, TIMES_1, 0), value);
+  } else {
+    // Integer or object.
+    if (type == Primitive::kPrimNot) {
+      // Mark card for object assuming new value is stored.
+      codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
+                          locations->GetTemp(1).AsRegister<CpuRegister>(),
+                          base,
+                          value);
+    }
+
+    __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
+  }
+
+  // locked cmpxchg has full barrier semantics, and we don't need scheduling
+  // barriers at this time.
+
+  // Convert ZF into the boolean result.
+  __ setcc(kZero, out);
+  __ movzxb(out, out);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
+  GenCAS(Primitive::kPrimInt, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
+  GenCAS(Primitive::kPrimLong, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
+  GenCAS(Primitive::kPrimNot, invoke, codegen_);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+static void SwapBits(CpuRegister reg, CpuRegister temp, int32_t shift, int32_t mask,
+                     X86_64Assembler* assembler) {
+  Immediate imm_shift(shift);
+  Immediate imm_mask(mask);
+  __ movl(temp, reg);
+  __ shrl(reg, imm_shift);
+  __ andl(temp, imm_mask);
+  __ andl(reg, imm_mask);
+  __ shll(temp, imm_shift);
+  __ orl(reg, temp);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) {
+  X86_64Assembler* assembler =
+    reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
+  LocationSummary* locations = invoke->GetLocations();
+
+  CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
+  CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
+
+  /*
+   * Use one bswap instruction to reverse byte order first and then use 3 rounds of
+   * swapping bits to reverse bits in a number x. Using bswap to save instructions
+   * compared to generic luni implementation which has 5 rounds of swapping bits.
+   * x = bswap x
+   * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
+   * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
+   * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
+   */
+  __ bswapl(reg);
+  SwapBits(reg, temp, 1, 0x55555555, assembler);
+  SwapBits(reg, temp, 2, 0x33333333, assembler);
+  SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitLongReverse(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+static void SwapBits64(CpuRegister reg, CpuRegister temp, CpuRegister temp_mask,
+                       int32_t shift, int64_t mask, X86_64Assembler* assembler) {
+  Immediate imm_shift(shift);
+  __ movq(temp_mask, Immediate(mask));
+  __ movq(temp, reg);
+  __ shrq(reg, imm_shift);
+  __ andq(temp, temp_mask);
+  __ andq(reg, temp_mask);
+  __ shlq(temp, imm_shift);
+  __ orq(reg, temp);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) {
+  X86_64Assembler* assembler =
+    reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
+  LocationSummary* locations = invoke->GetLocations();
+
+  CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
+  CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
+  CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
+
+  /*
+   * Use one bswap instruction to reverse byte order first and then use 3 rounds of
+   * swapping bits to reverse bits in a long number x. Using bswap to save instructions
+   * compared to generic luni implementation which has 5 rounds of swapping bits.
+   * x = bswap x
+   * x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555;
+   * x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333;
+   * x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F;
+   */
+  __ bswapq(reg);
+  SwapBits64(reg, temp1, temp2, 1, INT64_C(0x5555555555555555), assembler);
+  SwapBits64(reg, temp1, temp2, 2, INT64_C(0x3333333333333333), assembler);
+  SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler);
+}
+
 // Unimplemented intrinsics.
 
 #define UNIMPLEMENTED_INTRINSIC(Name)                                                   \
@@ -1210,14 +1379,9 @@
 void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
 }
 
-UNIMPLEMENTED_INTRINSIC(IntegerReverse)
-UNIMPLEMENTED_INTRINSIC(LongReverse)
 UNIMPLEMENTED_INTRINSIC(StringIndexOf)
 UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASInt)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASLong)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASObject)
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 
 }  // namespace x86_64
diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h
index 923ecdb..323f93c 100644
--- a/compiler/utils/assembler.h
+++ b/compiler/utils/assembler.h
@@ -504,12 +504,6 @@
   // and branch to a ExceptionSlowPath if it is.
   virtual void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) = 0;
 
-  virtual void InitializeFrameDescriptionEntry() {}
-  virtual void FinalizeFrameDescriptionEntry() {}
-  // Give a vector containing FDE data, or null if not used. Note: the assembler must take care
-  // of handling the lifecycle.
-  virtual std::vector<uint8_t>* GetFrameDescriptionEntry() { return nullptr; }
-
   virtual ~Assembler() {}
 
  protected:
diff --git a/compiler/utils/dwarf_cfi.cc b/compiler/utils/dwarf_cfi.cc
deleted file mode 100644
index a7e09c6..0000000
--- a/compiler/utils/dwarf_cfi.cc
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "leb128.h"
-#include "utils.h"
-
-#include "dwarf_cfi.h"
-
-namespace art {
-
-void DW_CFA_advance_loc(std::vector<uint8_t>* buf, uint32_t increment) {
-  if (increment < 64) {
-    // Encoding in opcode.
-    buf->push_back(0x1 << 6 | increment);
-  } else if (increment < 256) {
-    // Single byte delta.
-    buf->push_back(0x02);
-    buf->push_back(increment);
-  } else if (increment < 256 * 256) {
-    // Two byte delta.
-    buf->push_back(0x03);
-    buf->push_back(increment & 0xff);
-    buf->push_back((increment >> 8) & 0xff);
-  } else {
-    // Four byte delta.
-    buf->push_back(0x04);
-    Push32(buf, increment);
-  }
-}
-
-void DW_CFA_offset_extended_sf(std::vector<uint8_t>* buf, int reg, int32_t offset) {
-  buf->push_back(0x11);
-  EncodeUnsignedLeb128(reg, buf);
-  EncodeSignedLeb128(offset, buf);
-}
-
-void DW_CFA_offset(std::vector<uint8_t>* buf, int reg, uint32_t offset) {
-  buf->push_back((0x2 << 6) | reg);
-  EncodeUnsignedLeb128(offset, buf);
-}
-
-void DW_CFA_def_cfa_offset(std::vector<uint8_t>* buf, int32_t offset) {
-  buf->push_back(0x0e);
-  EncodeUnsignedLeb128(offset, buf);
-}
-
-void DW_CFA_remember_state(std::vector<uint8_t>* buf) {
-  buf->push_back(0x0a);
-}
-
-void DW_CFA_restore_state(std::vector<uint8_t>* buf) {
-  buf->push_back(0x0b);
-}
-
-void WriteFDEHeader(std::vector<uint8_t>* buf, bool is_64bit) {
-  // 'length' (filled in by other functions).
-  if (is_64bit) {
-    Push32(buf, 0xffffffff);  // Indicates 64bit
-    Push32(buf, 0);
-    Push32(buf, 0);
-  } else {
-    Push32(buf, 0);
-  }
-
-  // 'CIE_pointer' (filled in by linker).
-  if (is_64bit) {
-    Push32(buf, 0);
-    Push32(buf, 0);
-  } else {
-    Push32(buf, 0);
-  }
-
-  // 'initial_location' (filled in by linker).
-  if (is_64bit) {
-    Push32(buf, 0);
-    Push32(buf, 0);
-  } else {
-    Push32(buf, 0);
-  }
-
-  // 'address_range' (filled in by other functions).
-  if (is_64bit) {
-    Push32(buf, 0);
-    Push32(buf, 0);
-  } else {
-    Push32(buf, 0);
-  }
-
-  // Augmentation length: 0
-  buf->push_back(0);
-}
-
-void WriteFDEAddressRange(std::vector<uint8_t>* buf, uint64_t data, bool is_64bit) {
-  const size_t kOffsetOfAddressRange = is_64bit? 28 : 12;
-  CHECK(buf->size() >= kOffsetOfAddressRange + (is_64bit? 8 : 4));
-
-  uint8_t *p = buf->data() + kOffsetOfAddressRange;
-  if (is_64bit) {
-    p[0] = data;
-    p[1] = data >> 8;
-    p[2] = data >> 16;
-    p[3] = data >> 24;
-    p[4] = data >> 32;
-    p[5] = data >> 40;
-    p[6] = data >> 48;
-    p[7] = data >> 56;
-  } else {
-    p[0] = data;
-    p[1] = data >> 8;
-    p[2] = data >> 16;
-    p[3] = data >> 24;
-  }
-}
-
-void WriteCFILength(std::vector<uint8_t>* buf, bool is_64bit) {
-  uint64_t length = is_64bit ? buf->size() - 12 : buf->size() - 4;
-  DCHECK_EQ((length & 0x3), 0U);
-
-  uint8_t *p = is_64bit? buf->data() + 4 : buf->data();
-  if (is_64bit) {
-    p[0] = length;
-    p[1] = length >> 8;
-    p[2] = length >> 16;
-    p[3] = length >> 24;
-    p[4] = length >> 32;
-    p[5] = length >> 40;
-    p[6] = length >> 48;
-    p[7] = length >> 56;
-  } else {
-    p[0] = length;
-    p[1] = length >> 8;
-    p[2] = length >> 16;
-    p[3] = length >> 24;
-  }
-}
-
-void PadCFI(std::vector<uint8_t>* buf) {
-  while (buf->size() & 0x3) {
-    buf->push_back(0);
-  }
-}
-
-}  // namespace art
diff --git a/compiler/utils/dwarf_cfi.h b/compiler/utils/dwarf_cfi.h
deleted file mode 100644
index 0c8b151..0000000
--- a/compiler/utils/dwarf_cfi.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_UTILS_DWARF_CFI_H_
-#define ART_COMPILER_UTILS_DWARF_CFI_H_
-
-#include <vector>
-
-namespace art {
-
-/**
- * @brief Enter a 'DW_CFA_advance_loc' into an FDE buffer
- * @param buf FDE buffer.
- * @param increment Amount by which to increase the current location.
- */
-void DW_CFA_advance_loc(std::vector<uint8_t>* buf, uint32_t increment);
-
-/**
- * @brief Enter a 'DW_CFA_offset_extended_sf' into an FDE buffer
- * @param buf FDE buffer.
- * @param reg Register number.
- * @param offset Offset of register address from CFA.
- */
-void DW_CFA_offset_extended_sf(std::vector<uint8_t>* buf, int reg, int32_t offset);
-
-/**
- * @brief Enter a 'DW_CFA_offset' into an FDE buffer
- * @param buf FDE buffer.
- * @param reg Register number.
- * @param offset Offset of register address from CFA.
- */
-void DW_CFA_offset(std::vector<uint8_t>* buf, int reg, uint32_t offset);
-
-/**
- * @brief Enter a 'DW_CFA_def_cfa_offset' into an FDE buffer
- * @param buf FDE buffer.
- * @param offset New offset of CFA.
- */
-void DW_CFA_def_cfa_offset(std::vector<uint8_t>* buf, int32_t offset);
-
-/**
- * @brief Enter a 'DW_CFA_remember_state' into an FDE buffer
- * @param buf FDE buffer.
- */
-void DW_CFA_remember_state(std::vector<uint8_t>* buf);
-
-/**
- * @brief Enter a 'DW_CFA_restore_state' into an FDE buffer
- * @param buf FDE buffer.
- */
-void DW_CFA_restore_state(std::vector<uint8_t>* buf);
-
-/**
- * @brief Write FDE header into an FDE buffer
- * @param buf FDE buffer.
- * @param is_64bit If FDE is for 64bit application.
- */
-void WriteFDEHeader(std::vector<uint8_t>* buf, bool is_64bit);
-
-/**
- * @brief Set 'address_range' field of an FDE buffer
- * @param buf FDE buffer.
- * @param data Data value.
- * @param is_64bit If FDE is for 64bit application.
- */
-void WriteFDEAddressRange(std::vector<uint8_t>* buf, uint64_t data, bool is_64bit);
-
-/**
- * @brief Set 'length' field of an FDE buffer
- * @param buf FDE buffer.
- * @param is_64bit If FDE is for 64bit application.
- */
-void WriteCFILength(std::vector<uint8_t>* buf, bool is_64bit);
-
-/**
- * @brief Pad an FDE buffer with 0 until its size is a multiple of 4
- * @param buf FDE buffer.
- */
-void PadCFI(std::vector<uint8_t>* buf);
-}  // namespace art
-
-#endif  // ART_COMPILER_UTILS_DWARF_CFI_H_
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index b3a1376..4cca529 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -20,7 +20,6 @@
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "memory_region.h"
 #include "thread.h"
-#include "utils/dwarf_cfi.h"
 
 namespace art {
 namespace x86 {
@@ -1467,6 +1466,15 @@
   EmitOperand(reg, address);
 }
 
+
+void X86Assembler::cmpxchg8b(const Address& address) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0xC7);
+  EmitOperand(1, address);
+}
+
+
 void X86Assembler::mfence() {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x0F);
@@ -1631,69 +1639,32 @@
   EmitOperand(reg_or_opcode, Operand(operand));
 }
 
-void X86Assembler::InitializeFrameDescriptionEntry() {
-  WriteFDEHeader(&cfi_info_, false /* is_64bit */);
-}
-
-void X86Assembler::FinalizeFrameDescriptionEntry() {
-  WriteFDEAddressRange(&cfi_info_, buffer_.Size(), false /* is_64bit */);
-  PadCFI(&cfi_info_);
-  WriteCFILength(&cfi_info_, false /* is_64bit */);
-}
-
 constexpr size_t kFramePointerSize = 4;
 
 void X86Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
                               const std::vector<ManagedRegister>& spill_regs,
                               const ManagedRegisterEntrySpills& entry_spills) {
-  cfi_cfa_offset_ = kFramePointerSize;  // Only return address on stack
-  cfi_pc_ = buffer_.Size();  // Nothing emitted yet
-  DCHECK_EQ(cfi_pc_, 0U);
-
-  uint32_t reg_offset = 1;
+  DCHECK_EQ(buffer_.Size(), 0U);  // Nothing emitted yet.
   CHECK_ALIGNED(frame_size, kStackAlignment);
   int gpr_count = 0;
   for (int i = spill_regs.size() - 1; i >= 0; --i) {
-    x86::X86ManagedRegister spill = spill_regs.at(i).AsX86();
-    DCHECK(spill.IsCpuRegister());
-    pushl(spill.AsCpuRegister());
+    Register spill = spill_regs.at(i).AsX86().AsCpuRegister();
+    pushl(spill);
     gpr_count++;
-
-    // DW_CFA_advance_loc
-    DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_);
-    cfi_pc_ = buffer_.Size();
-    // DW_CFA_def_cfa_offset
-    cfi_cfa_offset_ += kFramePointerSize;
-    DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_);
-    // DW_CFA_offset reg offset
-    reg_offset++;
-    DW_CFA_offset(&cfi_info_, spill_regs.at(i).AsX86().DWARFRegId(), reg_offset);
   }
 
-  // return address then method on stack
+  // return address then method on stack.
   int32_t adjust = frame_size - (gpr_count * kFramePointerSize) -
                    sizeof(StackReference<mirror::ArtMethod>) /*method*/ -
                    kFramePointerSize /*return address*/;
   addl(ESP, Immediate(-adjust));
-  // DW_CFA_advance_loc
-  DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_);
-  cfi_pc_ = buffer_.Size();
-  // DW_CFA_def_cfa_offset
-  cfi_cfa_offset_ += adjust;
-  DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_);
-
   pushl(method_reg.AsX86().AsCpuRegister());
-  // DW_CFA_advance_loc
-  DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_);
-  cfi_pc_ = buffer_.Size();
-  // DW_CFA_def_cfa_offset
-  cfi_cfa_offset_ += kFramePointerSize;
-  DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_);
 
   for (size_t i = 0; i < entry_spills.size(); ++i) {
     ManagedRegisterSpill spill = entry_spills.at(i);
     if (spill.AsX86().IsCpuRegister()) {
-      movl(Address(ESP, frame_size + spill.getSpillOffset()), spill.AsX86().AsCpuRegister());
+      int offset = frame_size + spill.getSpillOffset();
+      movl(Address(ESP, offset), spill.AsX86().AsCpuRegister());
     } else {
       DCHECK(spill.AsX86().IsXmmRegister());
       if (spill.getSize() == 8) {
@@ -1709,8 +1680,9 @@
 void X86Assembler::RemoveFrame(size_t frame_size,
                             const std::vector<ManagedRegister>& spill_regs) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
-  addl(ESP, Immediate(frame_size - (spill_regs.size() * kFramePointerSize) -
-                      sizeof(StackReference<mirror::ArtMethod>)));
+  int adjust = frame_size - (spill_regs.size() * kFramePointerSize) -
+               sizeof(StackReference<mirror::ArtMethod>);
+  addl(ESP, Immediate(adjust));
   for (size_t i = 0; i < spill_regs.size(); ++i) {
     x86::X86ManagedRegister spill = spill_regs.at(i).AsX86();
     DCHECK(spill.IsCpuRegister());
@@ -1722,12 +1694,6 @@
 void X86Assembler::IncreaseFrameSize(size_t adjust) {
   CHECK_ALIGNED(adjust, kStackAlignment);
   addl(ESP, Immediate(-adjust));
-  // DW_CFA_advance_loc
-  DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_);
-  cfi_pc_ = buffer_.Size();
-  // DW_CFA_def_cfa_offset
-  cfi_cfa_offset_ += adjust;
-  DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_);
 }
 
 void X86Assembler::DecreaseFrameSize(size_t adjust) {
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index bdf8843..f3675ae 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -205,7 +205,7 @@
 
 class X86Assembler FINAL : public Assembler {
  public:
-  explicit X86Assembler() : cfi_cfa_offset_(0), cfi_pc_(0) {}
+  explicit X86Assembler() {}
   virtual ~X86Assembler() {}
 
   /*
@@ -457,6 +457,7 @@
 
   X86Assembler* lock();
   void cmpxchgl(const Address& address, Register reg);
+  void cmpxchg8b(const Address& address);
 
   void mfence();
 
@@ -476,6 +477,10 @@
     lock()->cmpxchgl(address, reg);
   }
 
+  void LockCmpxchg8b(const Address& address) {
+    lock()->cmpxchg8b(address);
+  }
+
   //
   // Misc. functionality
   //
@@ -599,12 +604,6 @@
   // and branch to a ExceptionSlowPath if it is.
   void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
 
-  void InitializeFrameDescriptionEntry() OVERRIDE;
-  void FinalizeFrameDescriptionEntry() OVERRIDE;
-  std::vector<uint8_t>* GetFrameDescriptionEntry() OVERRIDE {
-    return &cfi_info_;
-  }
-
  private:
   inline void EmitUint8(uint8_t value);
   inline void EmitInt32(int32_t value);
@@ -623,9 +622,6 @@
   void EmitGenericShift(int rm, Register reg, const Immediate& imm);
   void EmitGenericShift(int rm, Register operand, Register shifter);
 
-  std::vector<uint8_t> cfi_info_;
-  uint32_t cfi_cfa_offset_, cfi_pc_;
-
   DISALLOW_COPY_AND_ASSIGN(X86Assembler);
 };
 
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index fccb510..dba3b6b 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -127,4 +127,49 @@
   DriverStr(expected, "LoadLongConstant");
 }
 
+TEST_F(AssemblerX86Test, LockCmpxchgl) {
+  GetAssembler()->LockCmpxchgl(x86::Address(
+        x86::Register(x86::EDI), x86::Register(x86::EBX), x86::TIMES_4, 12),
+      x86::Register(x86::ESI));
+  GetAssembler()->LockCmpxchgl(x86::Address(
+        x86::Register(x86::EDI), x86::Register(x86::ESI), x86::TIMES_4, 12),
+      x86::Register(x86::ESI));
+  GetAssembler()->LockCmpxchgl(x86::Address(
+        x86::Register(x86::EDI), x86::Register(x86::ESI), x86::TIMES_4, 12),
+      x86::Register(x86::EDI));
+  GetAssembler()->LockCmpxchgl(x86::Address(
+      x86::Register(x86::EBP), 0), x86::Register(x86::ESI));
+  GetAssembler()->LockCmpxchgl(x86::Address(
+        x86::Register(x86::EBP), x86::Register(x86::ESI), x86::TIMES_1, 0),
+      x86::Register(x86::ESI));
+  const char* expected =
+    "lock cmpxchgl %ESI, 0xc(%EDI,%EBX,4)\n"
+    "lock cmpxchgl %ESI, 0xc(%EDI,%ESI,4)\n"
+    "lock cmpxchgl %EDI, 0xc(%EDI,%ESI,4)\n"
+    "lock cmpxchgl %ESI, (%EBP)\n"
+    "lock cmpxchgl %ESI, (%EBP,%ESI,1)\n";
+
+  DriverStr(expected, "lock_cmpxchgl");
+}
+
+TEST_F(AssemblerX86Test, LockCmpxchg8b) {
+  GetAssembler()->LockCmpxchg8b(x86::Address(
+      x86::Register(x86::EDI), x86::Register(x86::EBX), x86::TIMES_4, 12));
+  GetAssembler()->LockCmpxchg8b(x86::Address(
+      x86::Register(x86::EDI), x86::Register(x86::ESI), x86::TIMES_4, 12));
+  GetAssembler()->LockCmpxchg8b(x86::Address(
+      x86::Register(x86::EDI), x86::Register(x86::ESI), x86::TIMES_4, 12));
+  GetAssembler()->LockCmpxchg8b(x86::Address(x86::Register(x86::EBP), 0));
+  GetAssembler()->LockCmpxchg8b(x86::Address(
+      x86::Register(x86::EBP), x86::Register(x86::ESI), x86::TIMES_1, 0));
+  const char* expected =
+    "lock cmpxchg8b 0xc(%EDI,%EBX,4)\n"
+    "lock cmpxchg8b 0xc(%EDI,%ESI,4)\n"
+    "lock cmpxchg8b 0xc(%EDI,%ESI,4)\n"
+    "lock cmpxchg8b (%EBP)\n"
+    "lock cmpxchg8b (%EBP,%ESI,1)\n";
+
+  DriverStr(expected, "lock_cmpxchg8b");
+}
+
 }  // namespace art
diff --git a/compiler/utils/x86/managed_register_x86.h b/compiler/utils/x86/managed_register_x86.h
index 5d46ee2..09d2b49 100644
--- a/compiler/utils/x86/managed_register_x86.h
+++ b/compiler/utils/x86/managed_register_x86.h
@@ -88,14 +88,6 @@
 // There is a one-to-one mapping between ManagedRegister and register id.
 class X86ManagedRegister : public ManagedRegister {
  public:
-  int DWARFRegId() const {
-    CHECK(IsCpuRegister());
-    // For all the X86 registers we care about:
-    // EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
-    // DWARF register id is the same as id_.
-    return static_cast<int>(id_);
-  }
-
   ByteRegister AsByteRegister() const {
     CHECK(IsCpuRegister());
     CHECK_LT(AsCpuRegister(), ESP);  // ESP, EBP, ESI and EDI cannot be encoded as byte registers.
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index e82d90c..2e0d9e1 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -20,7 +20,6 @@
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "memory_region.h"
 #include "thread.h"
-#include "utils/dwarf_cfi.h"
 
 namespace art {
 namespace x86_64 {
@@ -1854,11 +1853,22 @@
 
 void X86_64Assembler::cmpxchgl(const Address& address, CpuRegister reg) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex32(reg, address);
   EmitUint8(0x0F);
   EmitUint8(0xB1);
   EmitOperand(reg.LowBits(), address);
 }
 
+
+void X86_64Assembler::cmpxchgq(const Address& address, CpuRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64(reg, address);
+  EmitUint8(0x0F);
+  EmitUint8(0xB1);
+  EmitOperand(reg.LowBits(), address);
+}
+
+
 void X86_64Assembler::mfence() {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x0F);
@@ -2168,26 +2178,12 @@
   }
 }
 
-void X86_64Assembler::InitializeFrameDescriptionEntry() {
-  WriteFDEHeader(&cfi_info_, true /* is_64bit */);
-}
-
-void X86_64Assembler::FinalizeFrameDescriptionEntry() {
-  WriteFDEAddressRange(&cfi_info_, buffer_.Size(), true /* is_64bit */);
-  PadCFI(&cfi_info_);
-  WriteCFILength(&cfi_info_, true /* is_64bit */);
-}
-
 constexpr size_t kFramePointerSize = 8;
 
 void X86_64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
                                  const std::vector<ManagedRegister>& spill_regs,
                                  const ManagedRegisterEntrySpills& entry_spills) {
-  cfi_cfa_offset_ = kFramePointerSize;  // Only return address on stack
-  cfi_pc_ = buffer_.Size();  // Nothing emitted yet
-  DCHECK_EQ(cfi_pc_, 0U);
-
-  uint32_t reg_offset = 1;
+  DCHECK_EQ(buffer_.Size(), 0U);  // Nothing emitted yet.
   CHECK_ALIGNED(frame_size, kStackAlignment);
   int gpr_count = 0;
   for (int i = spill_regs.size() - 1; i >= 0; --i) {
@@ -2195,29 +2191,13 @@
     if (spill.IsCpuRegister()) {
       pushq(spill.AsCpuRegister());
       gpr_count++;
-
-      // DW_CFA_advance_loc
-      DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_);
-      cfi_pc_ = buffer_.Size();
-      // DW_CFA_def_cfa_offset
-      cfi_cfa_offset_ += kFramePointerSize;
-      DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_);
-      // DW_CFA_offset reg offset
-      reg_offset++;
-      DW_CFA_offset(&cfi_info_, spill.DWARFRegId(), reg_offset);
     }
   }
-  // return address then method on stack
+  // return address then method on stack.
   int64_t rest_of_frame = static_cast<int64_t>(frame_size)
                           - (gpr_count * kFramePointerSize)
                           - kFramePointerSize /*return address*/;
   subq(CpuRegister(RSP), Immediate(rest_of_frame));
-  // DW_CFA_advance_loc
-  DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_);
-  cfi_pc_ = buffer_.Size();
-  // DW_CFA_def_cfa_offset
-  cfi_cfa_offset_ += rest_of_frame;
-  DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_);
 
   // spill xmms
   int64_t offset = rest_of_frame;
@@ -2282,12 +2262,6 @@
 void X86_64Assembler::IncreaseFrameSize(size_t adjust) {
   CHECK_ALIGNED(adjust, kStackAlignment);
   addq(CpuRegister(RSP), Immediate(-static_cast<int64_t>(adjust)));
-  // DW_CFA_advance_loc
-  DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_);
-  cfi_pc_ = buffer_.Size();
-  // DW_CFA_def_cfa_offset
-  cfi_cfa_offset_ += adjust;
-  DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_);
 }
 
 void X86_64Assembler::DecreaseFrameSize(size_t adjust) {
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 39f781c..a786a6c 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -244,7 +244,7 @@
 
 class X86_64Assembler FINAL : public Assembler {
  public:
-  X86_64Assembler() : cfi_cfa_offset_(0), cfi_pc_(0) {}
+  X86_64Assembler() {}
   virtual ~X86_64Assembler() {}
 
   /*
@@ -517,6 +517,7 @@
 
   X86_64Assembler* lock();
   void cmpxchgl(const Address& address, CpuRegister reg);
+  void cmpxchgq(const Address& address, CpuRegister reg);
 
   void mfence();
 
@@ -539,6 +540,10 @@
     lock()->cmpxchgl(address, reg);
   }
 
+  void LockCmpxchgq(const Address& address, CpuRegister reg) {
+    lock()->cmpxchgq(address, reg);
+  }
+
   //
   // Misc. functionality
   //
@@ -663,12 +668,6 @@
   // and branch to a ExceptionSlowPath if it is.
   void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
 
-  void InitializeFrameDescriptionEntry() OVERRIDE;
-  void FinalizeFrameDescriptionEntry() OVERRIDE;
-  std::vector<uint8_t>* GetFrameDescriptionEntry() OVERRIDE {
-    return &cfi_info_;
-  }
-
  private:
   void EmitUint8(uint8_t value);
   void EmitInt32(int32_t value);
@@ -714,9 +713,6 @@
   void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src);
   void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand);
 
-  std::vector<uint8_t> cfi_info_;
-  uint32_t cfi_cfa_offset_, cfi_pc_;
-
   DISALLOW_COPY_AND_ASSIGN(X86_64Assembler);
 };
 
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 4402dfc..a79bd09 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -539,6 +539,56 @@
   // DriverStr(Repeatrr(&x86_64::X86_64Assembler::xchgl, "xchgl %{reg2}, %{reg1}"), "xchgl");
 }
 
+TEST_F(AssemblerX86_64Test, LockCmpxchgl) {
+  GetAssembler()->LockCmpxchgl(x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12),
+      x86_64::CpuRegister(x86_64::RSI));
+  GetAssembler()->LockCmpxchgl(x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12),
+      x86_64::CpuRegister(x86_64::RSI));
+  GetAssembler()->LockCmpxchgl(x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12),
+      x86_64::CpuRegister(x86_64::R8));
+  GetAssembler()->LockCmpxchgl(x86_64::Address(
+      x86_64::CpuRegister(x86_64::R13), 0), x86_64::CpuRegister(x86_64::RSI));
+  GetAssembler()->LockCmpxchgl(x86_64::Address(
+      x86_64::CpuRegister(x86_64::R13), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_1, 0),
+      x86_64::CpuRegister(x86_64::RSI));
+  const char* expected =
+    "lock cmpxchgl %ESI, 0xc(%RDI,%RBX,4)\n"
+    "lock cmpxchgl %ESI, 0xc(%RDI,%R9,4)\n"
+    "lock cmpxchgl %R8d, 0xc(%RDI,%R9,4)\n"
+    "lock cmpxchgl %ESI, (%R13)\n"
+    "lock cmpxchgl %ESI, (%R13,%R9,1)\n";
+
+  DriverStr(expected, "lock_cmpxchgl");
+}
+
+TEST_F(AssemblerX86_64Test, LockCmpxchgq) {
+  GetAssembler()->LockCmpxchgq(x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12),
+      x86_64::CpuRegister(x86_64::RSI));
+  GetAssembler()->LockCmpxchgq(x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12),
+      x86_64::CpuRegister(x86_64::RSI));
+  GetAssembler()->LockCmpxchgq(x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12),
+      x86_64::CpuRegister(x86_64::R8));
+  GetAssembler()->LockCmpxchgq(x86_64::Address(
+      x86_64::CpuRegister(x86_64::R13), 0), x86_64::CpuRegister(x86_64::RSI));
+  GetAssembler()->LockCmpxchgq(x86_64::Address(
+      x86_64::CpuRegister(x86_64::R13), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_1, 0),
+      x86_64::CpuRegister(x86_64::RSI));
+  const char* expected =
+    "lock cmpxchg %RSI, 0xc(%RDI,%RBX,4)\n"
+    "lock cmpxchg %RSI, 0xc(%RDI,%R9,4)\n"
+    "lock cmpxchg %R8, 0xc(%RDI,%R9,4)\n"
+    "lock cmpxchg %RSI, (%R13)\n"
+    "lock cmpxchg %RSI, (%R13,%R9,1)\n";
+
+  DriverStr(expected, "lock_cmpxchg");
+}
+
 TEST_F(AssemblerX86_64Test, Movl) {
   GetAssembler()->movl(x86_64::CpuRegister(x86_64::RAX), x86_64::Address(
       x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
diff --git a/compiler/utils/x86_64/managed_register_x86_64.h b/compiler/utils/x86_64/managed_register_x86_64.h
index 3a96ad0..822659f 100644
--- a/compiler/utils/x86_64/managed_register_x86_64.h
+++ b/compiler/utils/x86_64/managed_register_x86_64.h
@@ -87,21 +87,6 @@
 // There is a one-to-one mapping between ManagedRegister and register id.
 class X86_64ManagedRegister : public ManagedRegister {
  public:
-  int DWARFRegId() const {
-    CHECK(IsCpuRegister());
-    switch (id_) {
-      case RAX: return  0;
-      case RDX: return  1;
-      case RCX: return  2;
-      case RBX: return  3;
-      case RSI: return  4;
-      case RDI: return  5;
-      case RBP: return  6;
-      case RSP: return  7;
-      default: return static_cast<int>(id_);  // R8 ~ R15
-    }
-  }
-
   CpuRegister AsCpuRegister() const {
     CHECK(IsCpuRegister());
     return CpuRegister(static_cast<Register>(id_));
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 6e7b04f..af00834 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -97,6 +97,7 @@
   kAllocTrackerLock,
   kDeoptimizationLock,
   kProfilerLock,
+  kJdwpShutdownLock,
   kJdwpEventListLock,
   kJdwpAttachLock,
   kJdwpStartLock,
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 6759c4d..a909a1a 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -307,7 +307,6 @@
 // Runtime JDWP state.
 static JDWP::JdwpState* gJdwpState = nullptr;
 static bool gDebuggerConnected;  // debugger or DDMS is connected.
-static bool gDisposed;           // debugger called VirtualMachine.Dispose, so we should drop the connection.
 
 static bool gDdmThreadNotification = false;
 
@@ -319,6 +318,7 @@
 static Dbg::HpsgWhat gDdmNhsgWhat;
 
 bool Dbg::gDebuggerActive = false;
+bool Dbg::gDisposed = false;
 ObjectRegistry* Dbg::gRegistry = nullptr;
 
 // Recent allocation tracking.
@@ -551,7 +551,7 @@
     gJdwpState->PostVMDeath();
   }
   // Prevent the JDWP thread from processing JDWP incoming packets after we close the connection.
-  Disposed();
+  Dispose();
   delete gJdwpState;
   gJdwpState = nullptr;
   delete gRegistry;
@@ -599,14 +599,6 @@
   gDisposed = false;
 }
 
-void Dbg::Disposed() {
-  gDisposed = true;
-}
-
-bool Dbg::IsDisposed() {
-  return gDisposed;
-}
-
 bool Dbg::RequiresDeoptimization() {
   // We don't need deoptimization if everything runs with interpreter after
   // enabling -Xint mode.
diff --git a/runtime/debugger.h b/runtime/debugger.h
index 5898784..dd7f9c5 100644
--- a/runtime/debugger.h
+++ b/runtime/debugger.h
@@ -239,7 +239,9 @@
   static void GoActive()
       LOCKS_EXCLUDED(Locks::breakpoint_lock_, Locks::deoptimization_lock_, Locks::mutator_lock_);
   static void Disconnected() LOCKS_EXCLUDED(Locks::deoptimization_lock_, Locks::mutator_lock_);
-  static void Disposed();
+  static void Dispose() {
+    gDisposed = true;
+  }
 
   // Returns true if we're actually debugging with a real debugger, false if it's
   // just DDMS (or nothing at all).
@@ -255,9 +257,12 @@
 
   // Returns true if a method has any breakpoints.
   static bool MethodHasAnyBreakpoints(mirror::ArtMethod* method)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(Locks::breakpoint_lock_);
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      LOCKS_EXCLUDED(Locks::breakpoint_lock_);
 
-  static bool IsDisposed();
+  static bool IsDisposed() {
+    return gDisposed;
+  }
 
   /*
    * Time, in milliseconds, since the last debugger activity.  Does not
@@ -756,6 +761,10 @@
   // Indicates whether the debugger is making requests.
   static bool gDebuggerActive;
 
+  // Indicates whether we should drop the JDWP connection because the runtime stops or the
+  // debugger called VirtualMachine.Dispose.
+  static bool gDisposed;
+
   // The registry mapping objects to JDWP ids.
   static ObjectRegistry* gRegistry;
 
diff --git a/runtime/jdwp/jdwp.h b/runtime/jdwp/jdwp.h
index e16221c..31c9a0b 100644
--- a/runtime/jdwp/jdwp.h
+++ b/runtime/jdwp/jdwp.h
@@ -403,6 +403,14 @@
   // Used for VirtualMachine.Exit command handling.
   bool should_exit_;
   int exit_status_;
+
+  // Used to synchronize runtime shutdown with JDWP command handler thread.
+  // When the runtime shuts down, it needs to stop JDWP command handler thread by closing the
+  // JDWP connection. However, if the JDWP thread is processing a command, it needs to wait
+  // for the command to finish so we can send its reply before closing the connection.
+  Mutex shutdown_lock_ ACQUIRED_AFTER(event_list_lock_);
+  ConditionVariable shutdown_cond_ GUARDED_BY(shutdown_lock_);
+  bool processing_request_ GUARDED_BY(shutdown_lock_);
 };
 
 std::string DescribeField(const FieldId& field_id) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/jdwp/jdwp_handler.cc b/runtime/jdwp/jdwp_handler.cc
index 0d161bc..d0ca214 100644
--- a/runtime/jdwp/jdwp_handler.cc
+++ b/runtime/jdwp/jdwp_handler.cc
@@ -271,7 +271,7 @@
 
 static JdwpError VM_Dispose(JdwpState*, Request*, ExpandBuf*)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  Dbg::Disposed();
+  Dbg::Dispose();
   return ERR_NONE;
 }
 
diff --git a/runtime/jdwp/jdwp_main.cc b/runtime/jdwp/jdwp_main.cc
index e2b88a5..5b30f0c 100644
--- a/runtime/jdwp/jdwp_main.cc
+++ b/runtime/jdwp/jdwp_main.cc
@@ -126,6 +126,7 @@
  */
 ssize_t JdwpNetStateBase::WritePacket(ExpandBuf* pReply, size_t length) {
   MutexLock mu(Thread::Current(), socket_lock_);
+  DCHECK(IsConnected()) << "Connection with debugger is closed";
   DCHECK_LE(length, expandBufGetLength(pReply));
   return TEMP_FAILURE_RETRY(write(clientSock, expandBufGetBuffer(pReply), length));
 }
@@ -140,6 +141,7 @@
 
 ssize_t JdwpNetStateBase::WriteBufferedPacketLocked(const std::vector<iovec>& iov) {
   socket_lock_.AssertHeld(Thread::Current());
+  DCHECK(IsConnected()) << "Connection with debugger is closed";
   return TEMP_FAILURE_RETRY(writev(clientSock, &iov[0], iov.size()));
 }
 
@@ -225,7 +227,10 @@
       jdwp_token_owner_thread_id_(0),
       ddm_is_active_(false),
       should_exit_(false),
-      exit_status_(0) {
+      exit_status_(0),
+      shutdown_lock_("JDWP shutdown lock", kJdwpShutdownLock),
+      shutdown_cond_("JDWP shutdown condition variable", shutdown_lock_),
+      processing_request_(false) {
 }
 
 /*
@@ -338,10 +343,20 @@
 JdwpState::~JdwpState() {
   if (netState != nullptr) {
     /*
-     * Close down the network to inspire the thread to halt.
+     * Close down the network to inspire the thread to halt. If a request is being processed,
+     * we need to wait for it to finish first.
      */
-    VLOG(jdwp) << "JDWP shutting down net...";
-    netState->Shutdown();
+    {
+      Thread* self = Thread::Current();
+      MutexLock mu(self, shutdown_lock_);
+      while (processing_request_) {
+        VLOG(jdwp) << "JDWP command in progress: wait for it to finish ...";
+        shutdown_cond_.Wait(self);
+      }
+
+      VLOG(jdwp) << "JDWP shutting down net...";
+      netState->Shutdown();
+    }
 
     if (debug_thread_started_) {
       run = false;
@@ -369,7 +384,13 @@
 
 // Returns "false" if we encounter a connection-fatal error.
 bool JdwpState::HandlePacket() {
-  JdwpNetStateBase* netStateBase = reinterpret_cast<JdwpNetStateBase*>(netState);
+  Thread* const self = Thread::Current();
+  {
+    MutexLock mu(self, shutdown_lock_);
+    processing_request_ = true;
+  }
+  JdwpNetStateBase* netStateBase = netState;
+  CHECK(netStateBase != nullptr) << "Connection has been closed";
   JDWP::Request request(netStateBase->input_buffer_, netStateBase->input_count_);
 
   ExpandBuf* pReply = expandBufAlloc();
@@ -388,6 +409,11 @@
   }
   expandBufFree(pReply);
   netStateBase->ConsumeBytes(request.GetLength());
+  {
+    MutexLock mu(self, shutdown_lock_);
+    processing_request_ = false;
+    shutdown_cond_.Broadcast(self);
+  }
   return true;
 }
 
diff --git a/test/004-UnsafeTest/src/Main.java b/test/004-UnsafeTest/src/Main.java
index 3d0f074..708f61f 100644
--- a/test/004-UnsafeTest/src/Main.java
+++ b/test/004-UnsafeTest/src/Main.java
@@ -104,6 +104,16 @@
     if (!unsafe.compareAndSwapInt(t, intOffset, 0, 1)) {
         System.out.println("Unexpectedly not succeeding compareAndSwap...");
     }
+
+    if (unsafe.compareAndSwapLong(t, longOffset, 0, 1)) {
+        System.out.println("Unexpectedly succeeding compareAndSwapLong...");
+    }
+    if (!unsafe.compareAndSwapLong(t, longOffset, longValue, 0)) {
+        System.out.println("Unexpectedly not succeeding compareAndSwapLong...");
+    }
+    if (!unsafe.compareAndSwapLong(t, longOffset, 0, 1)) {
+        System.out.println("Unexpectedly not succeeding compareAndSwapLong...");
+    }
   }
 
   private static class TestClass {