Merge "[MIPS64] Temporary placeholder build, to allow other projects to build"
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index 18122b3..2bc36a5 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -210,6 +210,22 @@
   kOpInvalid,
 };
 
+enum MoveType {
+  kMov8GP,      // Move 8-bit general purpose register.
+  kMov16GP,     // Move 16-bit general purpose register.
+  kMov32GP,     // Move 32-bit general purpose register.
+  kMov64GP,     // Move 64-bit general purpose register.
+  kMov32FP,     // Move 32-bit FP register.
+  kMov64FP,     // Move 64-bit FP register.
+  kMovLo64FP,   // Move low 32-bits of 64-bit FP register.
+  kMovHi64FP,   // Move high 32-bits of 64-bit FP register.
+  kMovU128FP,   // Move 128-bit FP register to/from possibly unaligned region.
+  kMov128FP = kMovU128FP,
+  kMovA128FP,   // Move 128-bit FP register to/from region surely aligned to 16-bytes.
+  kMovLo128FP,  // Move low 64-bits of 128-bit FP register.
+  kMovHi128FP,  // Move high 64-bits of 128-bit FP register.
+};
+
 std::ostream& operator<<(std::ostream& os, const OpKind& kind);
 
 enum ConditionCode {
diff --git a/compiler/dex/growable_array.h b/compiler/dex/growable_array.h
index b5842e1..6ed207c 100644
--- a/compiler/dex/growable_array.h
+++ b/compiler/dex/growable_array.h
@@ -40,6 +40,7 @@
   kGrowableArrayFillArrayData,
   kGrowableArraySuccessorBlocks,
   kGrowableArrayPredecessors,
+  kGrowableArraySlowPaths,
   kGNumListKinds
 };
 
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index debcb5c..f8706c4 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -647,7 +647,7 @@
   }
 
   RegLocation GetRegLocation(int index) {
-    DCHECK((index >= 0) && (index > num_ssa_regs_));
+    DCHECK((index >= 0) && (index < num_ssa_regs_));
     return reg_location_[index];
   }
 
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 32673db..0ed4576 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -154,6 +154,8 @@
     LIR* OpRegImm(OpKind op, int r_dest_src1, int value);
     LIR* OpRegMem(OpKind op, int r_dest, int rBase, int offset);
     LIR* OpRegReg(OpKind op, int r_dest_src1, int r_src2);
+    LIR* OpMovRegMem(int r_dest, int r_base, int offset, MoveType move_type);
+    LIR* OpMovMemReg(int r_base, int offset, int r_src, MoveType move_type);
     LIR* OpCondRegReg(OpKind op, ConditionCode cc, int r_dest, int r_src);
     LIR* OpRegRegImm(OpKind op, int r_dest, int r_src1, int value);
     LIR* OpRegRegReg(OpKind op, int r_dest, int r_src1, int r_src2);
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index 07fc6c7..9d3968b 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -367,6 +367,16 @@
   return OpRegRegShift(op, r_dest_src1, r_src2, 0);
 }
 
+LIR* ArmMir2Lir::OpMovRegMem(int r_dest, int r_base, int offset, MoveType move_type) {
+  UNIMPLEMENTED(FATAL);
+  return nullptr;
+}
+
+LIR* ArmMir2Lir::OpMovMemReg(int r_base, int offset, int r_src, MoveType move_type) {
+  UNIMPLEMENTED(FATAL);
+  return nullptr;
+}
+
 LIR* ArmMir2Lir::OpCondRegReg(OpKind op, ConditionCode cc, int r_dest, int r_src) {
   LOG(FATAL) << "Unexpected use of OpCondRegReg for Arm";
   return NULL;
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 072c6fa..5e0fed7 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -1003,7 +1003,8 @@
       core_spill_mask_(0),
       fp_spill_mask_(0),
       first_lir_insn_(NULL),
-      last_lir_insn_(NULL) {
+      last_lir_insn_(NULL),
+      slow_paths_(arena, 32, kGrowableArraySlowPaths) {
   // Reserve pointer id 0 for NULL.
   size_t null_idx = WrapPointer(NULL);
   DCHECK_EQ(null_idx, 0U);
@@ -1182,4 +1183,7 @@
   return branch;
 }
 
+void Mir2Lir::AddSlowPath(LIRSlowPath* slowpath) {
+  slow_paths_.Insert(slowpath);
+}
 }  // namespace art
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 760e06e..c59f3b8 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -21,6 +21,7 @@
 #include "mirror/array.h"
 #include "mirror/object-inl.h"
 #include "verifier/method_verifier.h"
+#include <functional>
 
 namespace art {
 
@@ -358,6 +359,34 @@
   }
 }
 
+//
+// Slow path to ensure a class is initialized for sget/sput.
+//
+class StaticFieldSlowPath : public Mir2Lir::LIRSlowPath {
+ public:
+  StaticFieldSlowPath(Mir2Lir* m2l, LIR* unresolved, LIR* uninit, LIR* cont,
+           int storage_index, int r_base) :
+    LIRSlowPath(m2l, m2l->GetCurrentDexPc(), unresolved, cont), uninit_(uninit), storage_index_(storage_index),
+    r_base_(r_base) {
+  }
+
+  void Compile() {
+    LIR* unresolved_target = GenerateTargetLabel();
+    uninit_->target = unresolved_target;
+    m2l_->CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(pInitializeStaticStorage),
+                            storage_index_, true);
+    // Copy helper's result into r_base, a no-op on all but MIPS.
+    m2l_->OpRegCopy(r_base_,  m2l_->TargetReg(kRet0));
+
+    m2l_->OpUnconditionalBranch(cont_);
+  }
+
+ private:
+  LIR* const uninit_;
+  const int storage_index_;
+  const int r_base_;
+};
+
 void Mir2Lir::GenSput(uint32_t field_idx, RegLocation rl_src, bool is_long_or_double,
                       bool is_object) {
   int field_offset;
@@ -401,23 +430,20 @@
       // r_base now points at static storage (Class*) or NULL if the type is not yet resolved.
       if (!is_initialized) {
         // Check if r_base is NULL or a not yet initialized class.
-        // TUNING: fast path should fall through
+
+        // The slow path is invoked if the r_base is NULL or the class pointed
+        // to by it is not initialized.
         LIR* unresolved_branch = OpCmpImmBranch(kCondEq, r_base, 0, NULL);
         int r_tmp = TargetReg(kArg2);
         LockTemp(r_tmp);
-        LIR* initialized_branch = OpCmpMemImmBranch(kCondGe, r_tmp, r_base,
+        LIR* uninit_branch = OpCmpMemImmBranch(kCondLt, r_tmp, r_base,
                                           mirror::Class::StatusOffset().Int32Value(),
                                           mirror::Class::kStatusInitialized, NULL);
+        LIR* cont = NewLIR0(kPseudoTargetLabel);
 
-        LIR* unresolved_target = NewLIR0(kPseudoTargetLabel);
-        unresolved_branch->target = unresolved_target;
-        CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(pInitializeStaticStorage), storage_index,
-                             true);
-        // Copy helper's result into r_base, a no-op on all but MIPS.
-        OpRegCopy(r_base, TargetReg(kRet0));
-
-        LIR* initialized_target = NewLIR0(kPseudoTargetLabel);
-        initialized_branch->target = initialized_target;
+        AddSlowPath(new (arena_) StaticFieldSlowPath(this,
+                                                     unresolved_branch, uninit_branch, cont,
+                                                     storage_index, r_base));
 
         FreeTemp(r_tmp);
       }
@@ -494,23 +520,20 @@
       // r_base now points at static storage (Class*) or NULL if the type is not yet resolved.
       if (!is_initialized) {
         // Check if r_base is NULL or a not yet initialized class.
-        // TUNING: fast path should fall through
+
+        // The slow path is invoked if the r_base is NULL or the class pointed
+        // to by it is not initialized.
         LIR* unresolved_branch = OpCmpImmBranch(kCondEq, r_base, 0, NULL);
         int r_tmp = TargetReg(kArg2);
         LockTemp(r_tmp);
-        LIR* initialized_branch = OpCmpMemImmBranch(kCondGe, r_tmp, r_base,
+        LIR* uninit_branch = OpCmpMemImmBranch(kCondLt, r_tmp, r_base,
                                           mirror::Class::StatusOffset().Int32Value(),
                                           mirror::Class::kStatusInitialized, NULL);
+        LIR* cont = NewLIR0(kPseudoTargetLabel);
 
-        LIR* unresolved_target = NewLIR0(kPseudoTargetLabel);
-        unresolved_branch->target = unresolved_target;
-        CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(pInitializeStaticStorage), storage_index,
-                             true);
-        // Copy helper's result into r_base, a no-op on all but MIPS.
-        OpRegCopy(r_base, TargetReg(kRet0));
-
-        LIR* initialized_target = NewLIR0(kPseudoTargetLabel);
-        initialized_branch->target = initialized_target;
+        AddSlowPath(new (arena_) StaticFieldSlowPath(this,
+                                                     unresolved_branch, uninit_branch, cont,
+                                                     storage_index, r_base));
 
         FreeTemp(r_tmp);
       }
@@ -550,6 +573,16 @@
   }
 }
 
+// Generate code for all slow paths.
+void Mir2Lir::HandleSlowPaths() {
+  int n = slow_paths_.Size();
+  for (int i = 0; i < n; ++i) {
+    LIRSlowPath* slowpath = slow_paths_.Get(i);
+    slowpath->Compile();
+  }
+  slow_paths_.Reset();
+}
+
 void Mir2Lir::HandleSuspendLaunchPads() {
   int num_elems = suspend_launchpads_.Size();
   ThreadOffset helper_offset = QUICK_ENTRYPOINT_OFFSET(pTestSuspend);
@@ -818,32 +851,40 @@
         type_idx) || SLOW_TYPE_PATH) {
       // Slow path, at runtime test if type is null and if so initialize
       FlushAllRegs();
-      LIR* branch1 = OpCmpImmBranch(kCondEq, rl_result.low_reg, 0, NULL);
-      // Resolved, store and hop over following code
+      LIR* branch = OpCmpImmBranch(kCondEq, rl_result.low_reg, 0, NULL);
+      LIR* cont = NewLIR0(kPseudoTargetLabel);
+
+      // Object to generate the slow path for class resolution.
+      class SlowPath : public LIRSlowPath {
+       public:
+        SlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont, const int type_idx,
+                 const RegLocation& rl_method, const RegLocation& rl_result) :
+                   LIRSlowPath(m2l, m2l->GetCurrentDexPc(), fromfast, cont), type_idx_(type_idx),
+                   rl_method_(rl_method), rl_result_(rl_result) {
+        }
+
+        void Compile() {
+          GenerateTargetLabel();
+
+          m2l_->CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(pInitializeType), type_idx_,
+                                        rl_method_.low_reg, true);
+          m2l_->OpRegCopy(rl_result_.low_reg,  m2l_->TargetReg(kRet0));
+
+          m2l_->OpUnconditionalBranch(cont_);
+        }
+
+       private:
+        const int type_idx_;
+        const RegLocation rl_method_;
+        const RegLocation rl_result_;
+      };
+
+      // Add to list for future.
+      AddSlowPath(new (arena_) SlowPath(this, branch, cont,
+                                        type_idx, rl_method, rl_result));
+
       StoreValue(rl_dest, rl_result);
-      /*
-       * Because we have stores of the target value on two paths,
-       * clobber temp tracking for the destination using the ssa name
-       */
-      ClobberSReg(rl_dest.s_reg_low);
-      LIR* branch2 = OpUnconditionalBranch(0);
-      // TUNING: move slow path to end & remove unconditional branch
-      LIR* target1 = NewLIR0(kPseudoTargetLabel);
-      // Call out to helper, which will return resolved type in kArg0
-      CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(pInitializeType), type_idx,
-                              rl_method.low_reg, true);
-      RegLocation rl_result = GetReturn(false);
-      StoreValue(rl_dest, rl_result);
-      /*
-       * Because we have stores of the target value on two paths,
-       * clobber temp tracking for the destination using the ssa name
-       */
-      ClobberSReg(rl_dest.s_reg_low);
-      // Rejoin code paths
-      LIR* target2 = NewLIR0(kPseudoTargetLabel);
-      branch1->target = target1;
-      branch2->target = target2;
-    } else {
+     } else {
       // Fast path, we're done - just store result
       StoreValue(rl_dest, rl_result);
     }
@@ -875,32 +916,41 @@
                  TargetReg(kArg0));
 
     // Might call out to helper, which will return resolved string in kRet0
-    int r_tgt = CallHelperSetup(QUICK_ENTRYPOINT_OFFSET(pResolveString));
     LoadWordDisp(TargetReg(kArg0), offset_of_string, TargetReg(kRet0));
-    if (cu_->instruction_set == kThumb2) {
+    if (cu_->instruction_set == kThumb2 ||
+        cu_->instruction_set == kMips) {
+      //  OpRegImm(kOpCmp, TargetReg(kRet0), 0);  // Is resolved?
       LoadConstant(TargetReg(kArg1), string_idx);
-      OpRegImm(kOpCmp, TargetReg(kRet0), 0);  // Is resolved?
+      LIR* fromfast = OpCmpImmBranch(kCondEq, TargetReg(kRet0), 0, NULL);
+      LIR* cont = NewLIR0(kPseudoTargetLabel);
       GenBarrier();
-      // For testing, always force through helper
-      if (!EXERCISE_SLOWEST_STRING_PATH) {
-        OpIT(kCondEq, "T");
-      }
-      // The copy MUST generate exactly one instruction (for OpIT).
-      DCHECK_NE(TargetReg(kArg0), r_method);
-      OpRegCopy(TargetReg(kArg0), r_method);   // .eq
 
-      LIR* call_inst = OpReg(kOpBlx, r_tgt);    // .eq, helper(Method*, string_idx)
-      MarkSafepointPC(call_inst);
-      FreeTemp(r_tgt);
-    } else if (cu_->instruction_set == kMips) {
-      LIR* branch = OpCmpImmBranch(kCondNe, TargetReg(kRet0), 0, NULL);
-      LoadConstant(TargetReg(kArg1), string_idx);
-      OpRegCopy(TargetReg(kArg0), r_method);   // .eq
-      LIR* call_inst = OpReg(kOpBlx, r_tgt);
-      MarkSafepointPC(call_inst);
-      FreeTemp(r_tgt);
-      LIR* target = NewLIR0(kPseudoTargetLabel);
-      branch->target = target;
+      // Object to generate the slow path for string resolution.
+      class SlowPath : public LIRSlowPath {
+       public:
+        SlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont, int r_method) :
+          LIRSlowPath(m2l, m2l->GetCurrentDexPc(), fromfast, cont), r_method_(r_method) {
+        }
+
+        void Compile() {
+          GenerateTargetLabel();
+
+          int r_tgt = m2l_->CallHelperSetup(QUICK_ENTRYPOINT_OFFSET(pResolveString));
+
+          m2l_->OpRegCopy(m2l_->TargetReg(kArg0), r_method_);   // .eq
+          LIR* call_inst = m2l_->OpReg(kOpBlx, r_tgt);
+          m2l_->MarkSafepointPC(call_inst);
+          m2l_->FreeTemp(r_tgt);
+
+          m2l_->OpUnconditionalBranch(cont_);
+        }
+
+       private:
+         int r_method_;
+      };
+
+      // Add to list for future.
+      AddSlowPath(new (arena_) SlowPath(this, fromfast, cont, r_method));
     } else {
       DCHECK_EQ(cu_->instruction_set, kX86);
       LIR* branch = OpCmpImmBranch(kCondNe, TargetReg(kRet0), 0, NULL);
@@ -1213,37 +1263,90 @@
     LoadWordDisp(class_reg, offset_of_type, class_reg);
     if (!cu_->compiler_driver->CanAssumeTypeIsPresentInDexCache(*cu_->dex_file, type_idx)) {
       // Need to test presence of type in dex cache at runtime
-      LIR* hop_branch = OpCmpImmBranch(kCondNe, class_reg, 0, NULL);
-      // Not resolved
-      // Call out to helper, which will return resolved type in kArg0
-      // InitializeTypeFromCode(idx, method)
-      CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(pInitializeType), type_idx,
-                              TargetReg(kArg1), true);
-      OpRegCopy(class_reg, TargetReg(kRet0));  // Align usage with fast path
-      // Rejoin code paths
-      LIR* hop_target = NewLIR0(kPseudoTargetLabel);
-      hop_branch->target = hop_target;
+      LIR* hop_branch = OpCmpImmBranch(kCondEq, class_reg, 0, NULL);
+      LIR* cont = NewLIR0(kPseudoTargetLabel);
+
+      // Slow path to initialize the type.  Executed if the type is NULL.
+      class SlowPath : public LIRSlowPath {
+       public:
+        SlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont, const int type_idx,
+                 const int class_reg) :
+                   LIRSlowPath(m2l, m2l->GetCurrentDexPc(), fromfast, cont), type_idx_(type_idx),
+                   class_reg_(class_reg) {
+        }
+
+        void Compile() {
+          GenerateTargetLabel();
+
+          // Call out to helper, which will return resolved type in kArg0
+          // InitializeTypeFromCode(idx, method)
+          m2l_->CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(pInitializeType), type_idx_,
+                                        m2l_->TargetReg(kArg1), true);
+          m2l_->OpRegCopy(class_reg_, m2l_->TargetReg(kRet0));  // Align usage with fast path
+          m2l_->OpUnconditionalBranch(cont_);
+        }
+       public:
+        const int type_idx_;
+        const int class_reg_;
+      };
+
+      AddSlowPath(new (arena_) SlowPath(this, hop_branch, cont,
+                                        type_idx, class_reg));
     }
   }
   // At this point, class_reg (kArg2) has class
   LoadValueDirectFixed(rl_src, TargetReg(kArg0));  // kArg0 <= ref
-  /* Null is OK - continue */
-  LIR* branch1 = OpCmpImmBranch(kCondEq, TargetReg(kArg0), 0, NULL);
-  /* load object->klass_ */
-  DCHECK_EQ(mirror::Object::ClassOffset().Int32Value(), 0);
-  LoadWordDisp(TargetReg(kArg0), mirror::Object::ClassOffset().Int32Value(), TargetReg(kArg1));
-  /* kArg1 now contains object->klass_ */
-  LIR* branch2 = NULL;
-  if (!type_known_abstract) {
-    branch2 = OpCmpBranch(kCondEq, TargetReg(kArg1), class_reg, NULL);
-  }
-  CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(pCheckCast), TargetReg(kArg2),
-                          TargetReg(kArg1), true);
-  /* branch target here */
-  LIR* target = NewLIR0(kPseudoTargetLabel);
-  branch1->target = target;
-  if (branch2 != NULL) {
-    branch2->target = target;
+
+  // Slow path for the case where the classes are not equal.  In this case we need
+  // to call a helper function to do the check.
+  class SlowPath : public LIRSlowPath {
+   public:
+    SlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont, bool load):
+               LIRSlowPath(m2l, m2l->GetCurrentDexPc(), fromfast, cont), load_(load) {
+    }
+
+    void Compile() {
+      GenerateTargetLabel();
+
+      if (load_) {
+        m2l_->LoadWordDisp(m2l_->TargetReg(kArg0), mirror::Object::ClassOffset().Int32Value(),
+                           m2l_->TargetReg(kArg1));
+      }
+      m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(pCheckCast), m2l_->TargetReg(kArg2),
+                                    m2l_->TargetReg(kArg1), true);
+
+      m2l_->OpUnconditionalBranch(cont_);
+    }
+
+   private:
+    bool load_;
+  };
+
+  if (type_known_abstract) {
+    // Easier case, run slow path if target is non-null (slow path will load from target)
+    LIR* branch = OpCmpImmBranch(kCondNe, TargetReg(kArg0), 0, NULL);
+    LIR* cont = NewLIR0(kPseudoTargetLabel);
+    AddSlowPath(new (arena_) SlowPath(this, branch, cont, true));
+  } else {
+    // Harder, more common case.  We need to generate a forward branch over the load
+    // if the target is null.  If it's non-null we perform the load and branch to the
+    // slow path if the classes are not equal.
+
+    /* Null is OK - continue */
+    LIR* branch1 = OpCmpImmBranch(kCondEq, TargetReg(kArg0), 0, NULL);
+    /* load object->klass_ */
+    DCHECK_EQ(mirror::Object::ClassOffset().Int32Value(), 0);
+    LoadWordDisp(TargetReg(kArg0), mirror::Object::ClassOffset().Int32Value(),
+                    TargetReg(kArg1));
+
+    LIR* branch2 = OpCmpBranch(kCondNe, TargetReg(kArg1), class_reg, NULL);
+    LIR* cont = NewLIR0(kPseudoTargetLabel);
+
+    // Add the slow path that will not perform load since this is already done.
+    AddSlowPath(new (arena_) SlowPath(this, branch2, cont, false));
+
+    // Set the null check to branch to the continuation.
+    branch1->target = cont;
   }
 }
 
@@ -1323,6 +1426,7 @@
 
 void Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
                             RegLocation rl_src1, RegLocation rl_src2) {
+  DCHECK_NE(cu_->instruction_set, kX86);
   OpKind op = kOpBkpt;
   bool is_div_rem = false;
   bool check_zero = false;
@@ -1401,15 +1505,9 @@
     } else {
       if (shift_op) {
         int t_reg = INVALID_REG;
-        if (cu_->instruction_set == kX86) {
-          // X86 doesn't require masking and must use ECX
-          t_reg = TargetReg(kCount);  // rCX
-          LoadValueDirectFixed(rl_src2, t_reg);
-        } else {
-          rl_src2 = LoadValue(rl_src2, kCoreReg);
-          t_reg = AllocTemp();
-          OpRegRegImm(kOpAnd, t_reg, rl_src2.low_reg, 31);
-        }
+        rl_src2 = LoadValue(rl_src2, kCoreReg);
+        t_reg = AllocTemp();
+        OpRegRegImm(kOpAnd, t_reg, rl_src2.low_reg, 31);
         rl_src1 = LoadValue(rl_src1, kCoreReg);
         rl_result = EvalLoc(rl_dest, kCoreReg, true);
         OpRegRegReg(op, rl_result.low_reg, rl_src1.low_reg, t_reg);
@@ -1432,9 +1530,6 @@
       }
       rl_result = GenDivRem(rl_dest, rl_src1.low_reg, rl_src2.low_reg, op == kOpDiv);
       done = true;
-    } else if (cu_->instruction_set == kX86) {
-      rl_result = GenDivRem(rl_dest, rl_src1, rl_src2, op == kOpDiv, check_zero);
-      done = true;
     } else if (cu_->instruction_set == kThumb2) {
       if (cu_->GetInstructionSetFeatures().HasDivideInstruction()) {
         // Use ARM SDIV instruction for division.  For remainder we also need to
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 3823fb3..6382dd6 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -811,42 +811,145 @@
     }
   }
 
+  // Logic below assumes that Method pointer is at offset zero from SP.
+  DCHECK_EQ(VRegOffset(static_cast<int>(kVRegMethodPtrBaseReg)), 0);
+
+  // The first 3 arguments are passed via registers.
+  // TODO: For 64-bit, instead of hardcoding 4 for Method* size, we should either
+  // get size of uintptr_t or size of object reference according to model being used.
+  int outs_offset = 4 /* Method* */ + (3 * sizeof(uint32_t));
   int start_offset = SRegOffset(info->args[3].s_reg_low);
-  int outs_offset = 4 /* Method* */ + (3 * 4);
-  if (cu_->instruction_set != kThumb2) {
+  int regs_left_to_pass_via_stack = info->num_arg_words - 3;
+  DCHECK_GT(regs_left_to_pass_via_stack, 0);
+
+  if (cu_->instruction_set == kThumb2 && regs_left_to_pass_via_stack <= 16) {
+    // Use vldm/vstm pair using kArg3 as a temp
+    call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
+                             direct_code, direct_method, type);
+    OpRegRegImm(kOpAdd, TargetReg(kArg3), TargetReg(kSp), start_offset);
+    LIR* ld = OpVldm(TargetReg(kArg3), regs_left_to_pass_via_stack);
+    // TUNING: loosen barrier
+    ld->u.m.def_mask = ENCODE_ALL;
+    SetMemRefType(ld, true /* is_load */, kDalvikReg);
+    call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
+                             direct_code, direct_method, type);
+    OpRegRegImm(kOpAdd, TargetReg(kArg3), TargetReg(kSp), 4 /* Method* */ + (3 * 4));
+    call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
+                             direct_code, direct_method, type);
+    LIR* st = OpVstm(TargetReg(kArg3), regs_left_to_pass_via_stack);
+    SetMemRefType(st, false /* is_load */, kDalvikReg);
+    st->u.m.def_mask = ENCODE_ALL;
+    call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
+                             direct_code, direct_method, type);
+  } else if (cu_->instruction_set == kX86) {
+    int current_src_offset = start_offset;
+    int current_dest_offset = outs_offset;
+
+    while (regs_left_to_pass_via_stack > 0) {
+      // This is based on the knowledge that the stack itself is 16-byte aligned.
+      bool src_is_16b_aligned = (current_src_offset & 0xF) == 0;
+      bool dest_is_16b_aligned = (current_dest_offset & 0xF) == 0;
+      size_t bytes_to_move;
+
+      /*
+       * The amount to move defaults to 32-bit. If there are 4 registers left to move, then do a
+       * a 128-bit move because we won't get the chance to try to aligned. If there are more than
+       * 4 registers left to move, consider doing a 128-bit only if either src or dest are aligned.
+       * We do this because we could potentially do a smaller move to align.
+       */
+      if (regs_left_to_pass_via_stack == 4 ||
+          (regs_left_to_pass_via_stack > 4 && (src_is_16b_aligned || dest_is_16b_aligned))) {
+        // Moving 128-bits via xmm register.
+        bytes_to_move = sizeof(uint32_t) * 4;
+
+        // Allocate a free xmm temp. Since we are working through the calling sequence,
+        // we expect to have an xmm temporary available.
+        int temp = AllocTempDouble();
+        CHECK_GT(temp, 0);
+
+        LIR* ld1 = nullptr;
+        LIR* ld2 = nullptr;
+        LIR* st1 = nullptr;
+        LIR* st2 = nullptr;
+
+        /*
+         * The logic is similar for both loads and stores. If we have 16-byte alignment,
+         * do an aligned move. If we have 8-byte alignment, then do the move in two
+         * parts. This approach prevents possible cache line splits. Finally, fall back
+         * to doing an unaligned move. In most cases we likely won't split the cache
+         * line but we cannot prove it and thus take a conservative approach.
+         */
+        bool src_is_8b_aligned = (current_src_offset & 0x7) == 0;
+        bool dest_is_8b_aligned = (current_dest_offset & 0x7) == 0;
+
+        if (src_is_16b_aligned) {
+          ld1 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset, kMovA128FP);
+        } else if (src_is_8b_aligned) {
+          ld1 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset, kMovLo128FP);
+          ld2 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset + (bytes_to_move >> 1), kMovHi128FP);
+        } else {
+          ld1 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset, kMovU128FP);
+        }
+
+        if (dest_is_16b_aligned) {
+          st1 = OpMovMemReg(TargetReg(kSp), current_dest_offset, temp, kMovA128FP);
+        } else if (dest_is_8b_aligned) {
+          st1 = OpMovMemReg(TargetReg(kSp), current_dest_offset, temp, kMovLo128FP);
+          st2 = OpMovMemReg(TargetReg(kSp), current_dest_offset + (bytes_to_move >> 1), temp, kMovHi128FP);
+        } else {
+          st1 = OpMovMemReg(TargetReg(kSp), current_dest_offset, temp, kMovU128FP);
+        }
+
+        // TODO If we could keep track of aliasing information for memory accesses that are wider
+        // than 64-bit, we wouldn't need to set up a barrier.
+        if (ld1 != nullptr) {
+          if (ld2 != nullptr) {
+            // For 64-bit load we can actually set up the aliasing information.
+            AnnotateDalvikRegAccess(ld1, current_src_offset >> 2, true, true);
+            AnnotateDalvikRegAccess(ld2, (current_src_offset + (bytes_to_move >> 1)) >> 2, true, true);
+          } else {
+            // Set barrier for 128-bit load.
+            SetMemRefType(ld1, true /* is_load */, kDalvikReg);
+            ld1->u.m.def_mask = ENCODE_ALL;
+          }
+        }
+        if (st1 != nullptr) {
+          if (st2 != nullptr) {
+            // For 64-bit store we can actually set up the aliasing information.
+            AnnotateDalvikRegAccess(st1, current_dest_offset >> 2, false, true);
+            AnnotateDalvikRegAccess(st2, (current_dest_offset + (bytes_to_move >> 1)) >> 2, false, true);
+          } else {
+            // Set barrier for 128-bit store.
+            SetMemRefType(st1, false /* is_load */, kDalvikReg);
+            st1->u.m.def_mask = ENCODE_ALL;
+          }
+        }
+
+        // Free the temporary used for the data movement.
+        FreeTemp(temp);
+      } else {
+        // Moving 32-bits via general purpose register.
+        bytes_to_move = sizeof(uint32_t);
+
+        // Instead of allocating a new temp, simply reuse one of the registers being used
+        // for argument passing.
+        int temp = TargetReg(kArg3);
+
+        // Now load the argument VR and store to the outs.
+        LoadWordDisp(TargetReg(kSp), current_src_offset, temp);
+        StoreWordDisp(TargetReg(kSp), current_dest_offset, temp);
+      }
+
+      current_src_offset += bytes_to_move;
+      current_dest_offset += bytes_to_move;
+      regs_left_to_pass_via_stack -= (bytes_to_move >> 2);
+    }
+  } else {
     // Generate memcpy
     OpRegRegImm(kOpAdd, TargetReg(kArg0), TargetReg(kSp), outs_offset);
     OpRegRegImm(kOpAdd, TargetReg(kArg1), TargetReg(kSp), start_offset);
     CallRuntimeHelperRegRegImm(QUICK_ENTRYPOINT_OFFSET(pMemcpy), TargetReg(kArg0),
                                TargetReg(kArg1), (info->num_arg_words - 3) * 4, false);
-  } else {
-    if (info->num_arg_words >= 20) {
-      // Generate memcpy
-      OpRegRegImm(kOpAdd, TargetReg(kArg0), TargetReg(kSp), outs_offset);
-      OpRegRegImm(kOpAdd, TargetReg(kArg1), TargetReg(kSp), start_offset);
-      CallRuntimeHelperRegRegImm(QUICK_ENTRYPOINT_OFFSET(pMemcpy), TargetReg(kArg0),
-                                 TargetReg(kArg1), (info->num_arg_words - 3) * 4, false);
-    } else {
-      // Use vldm/vstm pair using kArg3 as a temp
-      int regs_left = std::min(info->num_arg_words - 3, 16);
-      call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                               direct_code, direct_method, type);
-      OpRegRegImm(kOpAdd, TargetReg(kArg3), TargetReg(kSp), start_offset);
-      LIR* ld = OpVldm(TargetReg(kArg3), regs_left);
-      // TUNING: loosen barrier
-      ld->u.m.def_mask = ENCODE_ALL;
-      SetMemRefType(ld, true /* is_load */, kDalvikReg);
-      call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                               direct_code, direct_method, type);
-      OpRegRegImm(kOpAdd, TargetReg(kArg3), TargetReg(kSp), 4 /* Method* */ + (3 * 4));
-      call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                               direct_code, direct_method, type);
-      LIR* st = OpVstm(TargetReg(kArg3), regs_left);
-      SetMemRefType(st, false /* is_load */, kDalvikReg);
-      st->u.m.def_mask = ENCODE_ALL;
-      call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                               direct_code, direct_method, type);
-    }
   }
 
   call_state = LoadArgRegs(info, call_state, next_call_insn,
diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc
index 65582dd..f7c2821 100644
--- a/compiler/dex/quick/gen_loadstore.cc
+++ b/compiler/dex/quick/gen_loadstore.cc
@@ -294,6 +294,38 @@
   }
 }
 
+void Mir2Lir::StoreFinalValue(RegLocation rl_dest, RegLocation rl_src) {
+  DCHECK_EQ(rl_src.location, kLocPhysReg);
+
+  if (rl_dest.location == kLocPhysReg) {
+    OpRegCopy(rl_dest.low_reg, rl_src.low_reg);
+  } else {
+    // Just re-assign the register.  Dest gets Src's reg.
+    rl_dest.low_reg = rl_src.low_reg;
+    rl_dest.location = kLocPhysReg;
+    Clobber(rl_src.low_reg);
+  }
+
+  // Dest is now live and dirty (until/if we flush it to home location)
+  MarkLive(rl_dest.low_reg, rl_dest.s_reg_low);
+  MarkDirty(rl_dest);
+
+
+  ResetDefLoc(rl_dest);
+  if (IsDirty(rl_dest.low_reg) &&
+      oat_live_out(rl_dest.s_reg_low)) {
+    LIR *def_start = last_lir_insn_;
+    StoreBaseDisp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low),
+                  rl_dest.low_reg, kWord);
+    MarkClean(rl_dest);
+    LIR *def_end = last_lir_insn_;
+    if (!rl_dest.ref) {
+      // Exclude references from store elimination
+      MarkDef(rl_dest, def_start, def_end);
+    }
+  }
+}
+
 void Mir2Lir::StoreFinalValueWide(RegLocation rl_dest, RegLocation rl_src) {
   DCHECK_EQ(IsFpReg(rl_src.low_reg), IsFpReg(rl_src.high_reg));
   DCHECK(rl_dest.wide);
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index aca93f5..11b8f83 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -151,6 +151,8 @@
     LIR* OpRegImm(OpKind op, int r_dest_src1, int value);
     LIR* OpRegMem(OpKind op, int r_dest, int rBase, int offset);
     LIR* OpRegReg(OpKind op, int r_dest_src1, int r_src2);
+    LIR* OpMovRegMem(int r_dest, int r_base, int offset, MoveType move_type);
+    LIR* OpMovMemReg(int r_base, int offset, int r_src, MoveType move_type);
     LIR* OpCondRegReg(OpKind op, ConditionCode cc, int r_dest, int r_src);
     LIR* OpRegRegImm(OpKind op, int r_dest, int r_src1, int value);
     LIR* OpRegRegReg(OpKind op, int r_dest, int r_src1, int r_src2);
diff --git a/compiler/dex/quick/mips/utility_mips.cc b/compiler/dex/quick/mips/utility_mips.cc
index c5e2b36..21c971c 100644
--- a/compiler/dex/quick/mips/utility_mips.cc
+++ b/compiler/dex/quick/mips/utility_mips.cc
@@ -325,6 +325,16 @@
   return NewLIR2(opcode, r_dest_src1, r_src2);
 }
 
+LIR* MipsMir2Lir::OpMovRegMem(int r_dest, int r_base, int offset, MoveType move_type) {
+  UNIMPLEMENTED(FATAL);
+  return nullptr;
+}
+
+LIR* MipsMir2Lir::OpMovMemReg(int r_base, int offset, int r_src, MoveType move_type) {
+  UNIMPLEMENTED(FATAL);
+  return nullptr;
+}
+
 LIR* MipsMir2Lir::OpCondRegReg(OpKind op, ConditionCode cc, int r_dest, int r_src) {
   LOG(FATAL) << "Unexpected use of OpCondRegReg for MIPS";
   return NULL;
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 1f4122d..ae54fb8 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -839,6 +839,8 @@
       next_bb = iter.Next();
     } while ((next_bb != NULL) && (next_bb->block_type == kDead));
   }
+  HandleSlowPaths();
+
   cu_->NewTimingSplit("Launchpads");
   HandleSuspendLaunchPads();
 
@@ -847,4 +849,15 @@
   HandleIntrinsicLaunchPads();
 }
 
+//
+// LIR Slow Path
+//
+
+LIR* Mir2Lir::LIRSlowPath::GenerateTargetLabel() {
+  LIR* target = m2l_->RawLIR(current_dex_pc_, kPseudoTargetLabel);
+  m2l_->AppendLIR(target);
+  fromfast_->target = target;
+  m2l_->SetCurrentDexPc(current_dex_pc_);
+  return target;
+}
 }  // namespace art
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 81053a3..3a68044 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -258,6 +258,63 @@
       bool first_in_pair;
     };
 
+    //
+    // Slow paths.  This object is used generate a sequence of code that is executed in the
+    // slow path.  For example, resolving a string or class is slow as it will only be executed
+    // once (after that it is resolved and doesn't need to be done again).  We want slow paths
+    // to be placed out-of-line, and not require a (mispredicted, probably) conditional forward
+    // branch over them.
+    //
+    // If you want to create a slow path, declare a class derived from LIRSlowPath and provide
+    // the Compile() function that will be called near the end of the code generated by the
+    // method.
+    //
+    // The basic flow for a slow path is:
+    //
+    //     CMP reg, #value
+    //     BEQ fromfast
+    //   cont:
+    //     ...
+    //     fast path code
+    //     ...
+    //     more code
+    //     ...
+    //     RETURN
+    ///
+    //   fromfast:
+    //     ...
+    //     slow path code
+    //     ...
+    //     B cont
+    //
+    // So you see we need two labels and two branches.  The first branch (called fromfast) is
+    // the conditional branch to the slow path code.  The second label (called cont) is used
+    // as an unconditional branch target for getting back to the code after the slow path
+    // has completed.
+    //
+
+    class LIRSlowPath {
+     public:
+      LIRSlowPath(Mir2Lir* m2l, const DexOffset dexpc, LIR* fromfast,
+                  LIR* cont = nullptr) :
+        m2l_(m2l), current_dex_pc_(dexpc), fromfast_(fromfast), cont_(cont) {
+      }
+      virtual ~LIRSlowPath() {}
+      virtual void Compile() = 0;
+
+      static void* operator new(size_t size, ArenaAllocator* arena) {
+        return arena->Alloc(size, ArenaAllocator::kAllocData);
+      }
+
+     protected:
+      LIR* GenerateTargetLabel();
+
+      Mir2Lir* const m2l_;
+      const DexOffset current_dex_pc_;
+      LIR* const fromfast_;
+      LIR* const cont_;
+    };
+
     virtual ~Mir2Lir() {}
 
     int32_t s4FromSwitchData(const void* switch_data) {
@@ -323,6 +380,10 @@
      */
     size_t GetNumBytesForCompilerTempSpillRegion();
 
+    DexOffset GetCurrentDexPc() const {
+      return current_dalvik_offset_;
+    }
+
     int ComputeFrameSize();
     virtual void Materialize();
     virtual CompiledMethod* GetCompiledMethod();
@@ -470,6 +531,7 @@
     void HandleSuspendLaunchPads();
     void HandleIntrinsicLaunchPads();
     void HandleThrowLaunchPads();
+    void HandleSlowPaths();
     void GenBarrier();
     LIR* GenCheck(ConditionCode c_code, ThrowKind kind);
     LIR* GenImmedCheck(ConditionCode c_code, int reg, int imm_val,
@@ -510,8 +572,6 @@
                       RegLocation rl_src1, RegLocation rl_src2);
     void GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
                         RegLocation rl_src1, RegLocation rl_shift);
-    void GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
-                       RegLocation rl_src1, RegLocation rl_src2);
     void GenArithOpIntLit(Instruction::Code opcode, RegLocation rl_dest,
                           RegLocation rl_src, int lit);
     void GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest,
@@ -520,8 +580,11 @@
                            RegLocation rl_src);
     void GenSuspendTest(int opt_flags);
     void GenSuspendTestAndBranch(int opt_flags, LIR* target);
+
     // This will be overridden by x86 implementation.
     virtual void GenConstWide(RegLocation rl_dest, int64_t value);
+    virtual void GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
+                       RegLocation rl_src1, RegLocation rl_src2);
 
     // Shared by all targets - implemented in gen_invoke.cc.
     int CallHelperSetup(ThreadOffset helper_offset);
@@ -644,6 +707,18 @@
     void StoreValueWide(RegLocation rl_dest, RegLocation rl_src);
 
     /**
+     * @brief Used to do the final store to a destination as per bytecode semantics.
+     * @see StoreValue
+     * @param rl_dest The destination dalvik register location.
+     * @param rl_src The source register location. It must be kLocPhysReg
+     *
+     * This is used for x86 two operand computations, where we have computed the correct
+     * register value that now needs to be properly registered.  This is used to avoid an
+     * extra register copy that would result if StoreValue was called.
+     */
+    void StoreFinalValue(RegLocation rl_dest, RegLocation rl_src);
+
+    /**
      * @brief Used to do the final store in a wide destination as per bytecode semantics.
      * @see StoreValueWide
      * @param rl_dest The destination dalvik register location.
@@ -871,6 +946,27 @@
     virtual LIR* OpRegReg(OpKind op, int r_dest_src1, int r_src2) = 0;
 
     /**
+     * @brief Used to generate an LIR that does a load from mem to reg.
+     * @param r_dest The destination physical register.
+     * @param r_base The base physical register for memory operand.
+     * @param offset The displacement for memory operand.
+     * @param move_type Specification on the move desired (size, alignment, register kind).
+     * @return Returns the generate move LIR.
+     */
+    virtual LIR* OpMovRegMem(int r_dest, int r_base, int offset, MoveType move_type) = 0;
+
+    /**
+     * @brief Used to generate an LIR that does a store from reg to mem.
+     * @param r_base The base physical register for memory operand.
+     * @param offset The displacement for memory operand.
+     * @param r_src The destination physical register.
+     * @param bytes_to_move The number of bytes to move.
+     * @param is_aligned Whether the memory location is known to be aligned.
+     * @return Returns the generate move LIR.
+     */
+    virtual LIR* OpMovMemReg(int r_base, int offset, int r_src, MoveType move_type) = 0;
+
+    /**
      * @brief Used for generating a conditional register to register operation.
      * @param op The opcode kind.
      * @param cc The condition code that when true will perform the opcode.
@@ -948,6 +1044,8 @@
     virtual void GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx,
                                     RegLocation rl_dest, RegLocation rl_src);
 
+    void AddSlowPath(LIRSlowPath* slowpath);
+
   private:
     void GenInstanceofCallingHelper(bool needs_access_check, bool type_known_final,
                                     bool type_known_abstract, bool use_declaring_class,
@@ -961,6 +1059,11 @@
       p->def_end = NULL;
     }
 
+    void SetCurrentDexPc(DexOffset dexpc) {
+      current_dalvik_offset_ = dexpc;
+    }
+
+
   public:
     // TODO: add accessors for these.
     LIR* literal_list_;                        // Constants.
@@ -1016,6 +1119,8 @@
     unsigned int fp_spill_mask_;
     LIR* first_lir_insn_;
     LIR* last_lir_insn_;
+
+    GrowableArray<LIRSlowPath*> slow_paths_;
 };  // Class Mir2Lir
 
 }  // namespace art
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index c29d6c4..ae53ddb 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -297,6 +297,24 @@
   { kX86SqrtsdRR, kRegReg, IS_BINARY_OP | REG_DEF0_USE1, { 0xF2, 0, 0x0F, 0x51, 0, 0, 0, 0 }, "SqrtsdRR", "!0r,!1r" },
   { kX86FstpdM, kMem, IS_STORE | IS_BINARY_OP | REG_USE0, { 0x0, 0, 0xDD, 0x00, 0, 3, 0, 0 }, "FstpdM", "[!0r,!1d]" },
 
+  EXT_0F_ENCODING_MAP(Movups,    0x0, 0x10, REG_DEF0),
+  { kX86MovupsMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x0, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovupsMR", "[!0r+!1d],!2r" },
+  { kX86MovupsAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x0, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovupsAR", "[!0r+!1r<<!2d+!3d],!4r" },
+
+  EXT_0F_ENCODING_MAP(Movaps,    0x0, 0x28, REG_DEF0),
+  { kX86MovapsMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x0, 0, 0x0F, 0x29, 0, 0, 0, 0 }, "MovapsMR", "[!0r+!1d],!2r" },
+  { kX86MovapsAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x0, 0, 0x0F, 0x29, 0, 0, 0, 0 }, "MovapsAR", "[!0r+!1r<<!2d+!3d],!4r" },
+
+  { kX86MovlpsRM, kRegMem,      IS_LOAD | IS_TERTIARY_OP | REG_DEF0 | REG_USE01,  { 0x0, 0, 0x0F, 0x12, 0, 0, 0, 0 }, "MovlpsRM", "!0r,[!1r+!2d]" },
+  { kX86MovlpsRA, kRegArray,    IS_LOAD | IS_QUIN_OP     | REG_DEF0 | REG_USE012, { 0x0, 0, 0x0F, 0x12, 0, 0, 0, 0 }, "MovlpsRA", "!0r,[!1r+!2r<<!3d+!4d]" },
+  { kX86MovlpsMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x0, 0, 0x0F, 0x13, 0, 0, 0, 0 }, "MovlpsMR", "[!0r+!1d],!2r" },
+  { kX86MovlpsAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x0, 0, 0x0F, 0x13, 0, 0, 0, 0 }, "MovlpsAR", "[!0r+!1r<<!2d+!3d],!4r" },
+
+  { kX86MovhpsRM, kRegMem,      IS_LOAD | IS_TERTIARY_OP | REG_DEF0 | REG_USE01,  { 0x0, 0, 0x0F, 0x16, 0, 0, 0, 0 }, "MovhpsRM", "!0r,[!1r+!2d]" },
+  { kX86MovhpsRA, kRegArray,    IS_LOAD | IS_QUIN_OP     | REG_DEF0 | REG_USE012, { 0x0, 0, 0x0F, 0x16, 0, 0, 0, 0 }, "MovhpsRA", "!0r,[!1r+!2r<<!3d+!4d]" },
+  { kX86MovhpsMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x0, 0, 0x0F, 0x17, 0, 0, 0, 0 }, "MovhpsMR", "[!0r+!1d],!2r" },
+  { kX86MovhpsAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x0, 0, 0x0F, 0x17, 0, 0, 0, 0 }, "MovhpsAR", "[!0r+!1r<<!2d+!3d],!4r" },
+
   EXT_0F_ENCODING_MAP(Movdxr,    0x66, 0x6E, REG_DEF0),
   { kX86MovdrxRR, kRegRegStore, IS_BINARY_OP | REG_DEF0   | REG_USE01,  { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovdrxRR", "!0r,!1r" },
   { kX86MovdrxMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovdrxMR", "[!0r+!1d],!2r" },
@@ -863,6 +881,20 @@
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
 }
 
+void X86Mir2Lir::EmitShiftMemCl(const X86EncodingMap* entry, uint8_t base,
+                                int displacement, uint8_t cl) {
+  DCHECK_EQ(cl, static_cast<uint8_t>(rCX));
+  EmitPrefix(entry);
+  code_buffer_.push_back(entry->skeleton.opcode);
+  DCHECK_NE(0x0F, entry->skeleton.opcode);
+  DCHECK_EQ(0, entry->skeleton.extra_opcode1);
+  DCHECK_EQ(0, entry->skeleton.extra_opcode2);
+  DCHECK_LT(base, 8);
+  EmitModrmDisp(entry->skeleton.modrm_opcode, base, displacement);
+  DCHECK_EQ(0, entry->skeleton.ax_opcode);
+  DCHECK_EQ(0, entry->skeleton.immediate_bytes);
+}
+
 void X86Mir2Lir::EmitRegCond(const X86EncodingMap* entry, uint8_t reg, uint8_t condition) {
   if (entry->skeleton.prefix1 != 0) {
     code_buffer_.push_back(entry->skeleton.prefix1);
@@ -1140,6 +1172,17 @@
           break;
         }
         default:
+          if (lir->flags.fixup == kFixupLoad) {
+            LIR *target_lir = lir->target;
+            DCHECK(target_lir != NULL);
+            CodeOffset target = target_lir->offset;
+            lir->operands[2] = target;
+            int newSize = GetInsnSize(lir);
+            if (newSize != lir->flags.size) {
+              lir->flags.size = newSize;
+              res = kRetryAll;
+            }
+          }
           break;
       }
     }
@@ -1230,6 +1273,9 @@
       case kShiftRegCl:  // lir operands - 0: reg, 1: cl
         EmitShiftRegCl(entry, lir->operands[0], lir->operands[1]);
         break;
+      case kShiftMemCl:  // lir operands - 0: base, 1:displacement, 2: cl
+        EmitShiftMemCl(entry, lir->operands[0], lir->operands[1], lir->operands[2]);
+        break;
       case kRegCond:  // lir operands - 0: reg, 1: condition
         EmitRegCond(entry, lir->operands[0], lir->operands[1]);
         break;
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index 4267b5b..93875c9 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -84,10 +84,19 @@
 
   // Get the switch value
   rl_src = LoadValue(rl_src, kCoreReg);
-  int start_of_method_reg = AllocTemp();
-  // Materialize a pointer to the switch table
   // NewLIR0(kX86Bkpt);
-  NewLIR1(kX86StartOfMethod, start_of_method_reg);
+
+  // Materialize a pointer to the switch table
+  int start_of_method_reg;
+  if (base_of_code_ != nullptr) {
+    // We can use the saved value.
+    RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
+    rl_method = LoadValue(rl_method, kCoreReg);
+    start_of_method_reg = rl_method.low_reg;
+  } else {
+    start_of_method_reg = AllocTemp();
+    NewLIR1(kX86StartOfMethod, start_of_method_reg);
+  }
   int low_key = s4FromSwitchData(&table[2]);
   int keyReg;
   // Remove the bias, if necessary
@@ -142,7 +151,13 @@
   FlushAllRegs();   /* Everything to home location */
   LoadValueDirectFixed(rl_src, rX86_ARG0);
   // Materialize a pointer to the fill data image
-  NewLIR1(kX86StartOfMethod, rX86_ARG2);
+  if (base_of_code_ != nullptr) {
+    // We can use the saved value.
+    RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
+    LoadValueDirect(rl_method, rX86_ARG2);
+  } else {
+    NewLIR1(kX86StartOfMethod, rX86_ARG2);
+  }
   NewLIR2(kX86PcRelAdr, rX86_ARG1, WrapPointer(tab_rec));
   NewLIR2(kX86Add32RR, rX86_ARG1, rX86_ARG2);
   CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(pHandleFillArrayData), rX86_ARG0,
@@ -211,6 +226,13 @@
 
   FlushIns(ArgLocs, rl_method);
 
+  if (base_of_code_ != nullptr) {
+    // We have been asked to save the address of the method start for later use.
+    NewLIR1(kX86StartOfMethod, rX86_ARG0);
+    int displacement = SRegOffset(base_of_code_->s_reg_low);
+    StoreBaseDisp(rX86_SP, displacement, rX86_ARG0, kWord);
+  }
+
   FreeTemp(rX86_ARG0);
   FreeTemp(rX86_ARG1);
   FreeTemp(rX86_ARG2);
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index f054d82..4c1c171 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -204,7 +204,11 @@
     LIR* OpRegCopyNoInsert(int r_dest, int r_src);
     LIR* OpRegImm(OpKind op, int r_dest_src1, int value);
     LIR* OpRegMem(OpKind op, int r_dest, int rBase, int offset);
+    LIR* OpMemReg(OpKind op, RegLocation rl_dest, int value);
+    LIR* OpRegMem(OpKind op, int r_dest, RegLocation value);
     LIR* OpRegReg(OpKind op, int r_dest_src1, int r_src2);
+    LIR* OpMovRegMem(int r_dest, int r_base, int offset, MoveType move_type);
+    LIR* OpMovMemReg(int r_base, int offset, int r_src, MoveType move_type);
     LIR* OpCondRegReg(OpKind op, ConditionCode cc, int r_dest, int r_src);
     LIR* OpRegRegImm(OpKind op, int r_dest, int r_src1, int value);
     LIR* OpRegRegReg(OpKind op, int r_dest, int r_src1, int r_src2);
@@ -231,6 +235,16 @@
     int AllocTempDouble();
     void ResetDefLocWide(RegLocation rl);
 
+    /*
+     * @brief x86 specific codegen for int operations.
+     * @param opcode Operation to perform.
+     * @param rl_dest Destination for the result.
+     * @param rl_lhs Left hand operand.
+     * @param rl_rhs Right hand operand.
+     */
+    void GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
+                       RegLocation rl_lhs, RegLocation rl_rhs);
+
   private:
     void EmitPrefix(const X86EncodingMap* entry);
     void EmitOpcode(const X86EncodingMap* entry);
@@ -260,6 +274,7 @@
     void EmitThreadImm(const X86EncodingMap* entry, int disp, int imm);
     void EmitMovRegImm(const X86EncodingMap* entry, uint8_t reg, int imm);
     void EmitShiftRegImm(const X86EncodingMap* entry, uint8_t reg, int imm);
+    void EmitShiftMemCl(const X86EncodingMap* entry, uint8_t base, int displacement, uint8_t cl);
     void EmitShiftRegCl(const X86EncodingMap* entry, uint8_t reg, uint8_t cl);
     void EmitRegCond(const X86EncodingMap* entry, uint8_t reg, uint8_t condition);
 
@@ -366,6 +381,7 @@
      * @param val Constant multiplier.
      */
     void GenImulRegImm(int dest, int src, int val);
+
     /*
      * Generate an imul of a memory location by a constant or a better sequence.
      * @param dest Destination Register.
@@ -386,6 +402,68 @@
      */
     LIR* OpCmpMemImmBranch(ConditionCode cond, int temp_reg, int base_reg,
                            int offset, int check_value, LIR* target);
+    /*
+     * Can this operation be using core registers without temporaries?
+     * @param rl_lhs Left hand operand.
+     * @param rl_rhs Right hand operand.
+     * @returns 'true' if the operation can proceed without needing temporary regs.
+     */
+    bool IsOperationSafeWithoutTemps(RegLocation rl_lhs, RegLocation rl_rhs);
+
+    /*
+     * @brief Perform MIR analysis before compiling method.
+     * @note Invokes Mir2LiR::Materialize after analysis.
+     */
+    void Materialize();
+
+    /*
+     * @brief Analyze MIR before generating code, to prepare for the code generation.
+     */
+    void AnalyzeMIR();
+
+    /*
+     * @brief Analyze one basic block.
+     * @param bb Basic block to analyze.
+     */
+    void AnalyzeBB(BasicBlock * bb);
+
+    /*
+     * @brief Analyze one extended MIR instruction
+     * @param opcode MIR instruction opcode.
+     * @param bb Basic block containing instruction.
+     * @param mir Extended instruction to analyze.
+     */
+    void AnalyzeExtendedMIR(int opcode, BasicBlock * bb, MIR *mir);
+
+    /*
+     * @brief Analyze one MIR instruction
+     * @param opcode MIR instruction opcode.
+     * @param bb Basic block containing instruction.
+     * @param mir Instruction to analyze.
+     */
+    void AnalyzeMIR(int opcode, BasicBlock * bb, MIR *mir);
+
+    /*
+     * @brief Analyze one MIR float/double instruction
+     * @param opcode MIR instruction opcode.
+     * @param bb Basic block containing instruction.
+     * @param mir Instruction to analyze.
+     */
+    void AnalyzeFPInstruction(int opcode, BasicBlock * bb, MIR *mir);
+
+    /*
+     * @brief Analyze one use of a double operand.
+     * @param rl_use Double RegLocation for the operand.
+     */
+    void AnalyzeDoubleUse(RegLocation rl_use);
+
+    // Information derived from analysis of MIR
+
+    // Have we decided to compute a ptr to code and store in temporary VR?
+    bool store_method_addr_;
+
+    // The compiler temporary for the code address of the method.
+    CompilerTemp *base_of_code_;
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index e458e5f..a567a8a0 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -1717,4 +1717,234 @@
   StoreValue(rl_dest, rl_result);
 }
 
+void X86Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
+                            RegLocation rl_lhs, RegLocation rl_rhs) {
+  OpKind op = kOpBkpt;
+  bool is_div_rem = false;
+  bool unary = false;
+  bool shift_op = false;
+  bool is_two_addr = false;
+  RegLocation rl_result;
+  switch (opcode) {
+    case Instruction::NEG_INT:
+      op = kOpNeg;
+      unary = true;
+      break;
+    case Instruction::NOT_INT:
+      op = kOpMvn;
+      unary = true;
+      break;
+    case Instruction::ADD_INT_2ADDR:
+      is_two_addr = true;
+      // Fallthrough
+    case Instruction::ADD_INT:
+      op = kOpAdd;
+      break;
+    case Instruction::SUB_INT_2ADDR:
+      is_two_addr = true;
+      // Fallthrough
+    case Instruction::SUB_INT:
+      op = kOpSub;
+      break;
+    case Instruction::MUL_INT_2ADDR:
+      is_two_addr = true;
+      // Fallthrough
+    case Instruction::MUL_INT:
+      op = kOpMul;
+      break;
+    case Instruction::DIV_INT_2ADDR:
+      is_two_addr = true;
+      // Fallthrough
+    case Instruction::DIV_INT:
+      op = kOpDiv;
+      is_div_rem = true;
+      break;
+    /* NOTE: returns in kArg1 */
+    case Instruction::REM_INT_2ADDR:
+      is_two_addr = true;
+      // Fallthrough
+    case Instruction::REM_INT:
+      op = kOpRem;
+      is_div_rem = true;
+      break;
+    case Instruction::AND_INT_2ADDR:
+      is_two_addr = true;
+      // Fallthrough
+    case Instruction::AND_INT:
+      op = kOpAnd;
+      break;
+    case Instruction::OR_INT_2ADDR:
+      is_two_addr = true;
+      // Fallthrough
+    case Instruction::OR_INT:
+      op = kOpOr;
+      break;
+    case Instruction::XOR_INT_2ADDR:
+      is_two_addr = true;
+      // Fallthrough
+    case Instruction::XOR_INT:
+      op = kOpXor;
+      break;
+    case Instruction::SHL_INT_2ADDR:
+      is_two_addr = true;
+      // Fallthrough
+    case Instruction::SHL_INT:
+      shift_op = true;
+      op = kOpLsl;
+      break;
+    case Instruction::SHR_INT_2ADDR:
+      is_two_addr = true;
+      // Fallthrough
+    case Instruction::SHR_INT:
+      shift_op = true;
+      op = kOpAsr;
+      break;
+    case Instruction::USHR_INT_2ADDR:
+      is_two_addr = true;
+      // Fallthrough
+    case Instruction::USHR_INT:
+      shift_op = true;
+      op = kOpLsr;
+      break;
+    default:
+      LOG(FATAL) << "Invalid word arith op: " << opcode;
+  }
+
+    // Can we convert to a two address instruction?
+  if (!is_two_addr &&
+        (mir_graph_->SRegToVReg(rl_dest.s_reg_low) ==
+         mir_graph_->SRegToVReg(rl_lhs.s_reg_low))) {
+      is_two_addr = true;
+    }
+
+  // Get the div/rem stuff out of the way.
+  if (is_div_rem) {
+    rl_result = GenDivRem(rl_dest, rl_lhs, rl_rhs, op == kOpDiv, true);
+    StoreValue(rl_dest, rl_result);
+    return;
+  }
+
+  if (unary) {
+    rl_lhs = LoadValue(rl_lhs, kCoreReg);
+    rl_result = UpdateLoc(rl_dest);
+    rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    OpRegReg(op, rl_result.low_reg, rl_lhs.low_reg);
+  } else {
+    if (shift_op) {
+      // X86 doesn't require masking and must use ECX.
+      int t_reg = TargetReg(kCount);  // rCX
+      LoadValueDirectFixed(rl_rhs, t_reg);
+      if (is_two_addr) {
+        // Can we do this directly into memory?
+        rl_result = UpdateLoc(rl_dest);
+        rl_rhs = LoadValue(rl_rhs, kCoreReg);
+        if (rl_result.location != kLocPhysReg) {
+          // Okay, we can do this into memory
+          OpMemReg(op, rl_result, t_reg);
+          FreeTemp(t_reg);
+          return;
+        } else if (!IsFpReg(rl_result.low_reg)) {
+          // Can do this directly into the result register
+          OpRegReg(op, rl_result.low_reg, t_reg);
+          FreeTemp(t_reg);
+          StoreFinalValue(rl_dest, rl_result);
+          return;
+        }
+      }
+      // Three address form, or we can't do directly.
+      rl_lhs = LoadValue(rl_lhs, kCoreReg);
+      rl_result = EvalLoc(rl_dest, kCoreReg, true);
+      OpRegRegReg(op, rl_result.low_reg, rl_lhs.low_reg, t_reg);
+      FreeTemp(t_reg);
+    } else {
+      // Multiply is 3 operand only (sort of).
+      if (is_two_addr && op != kOpMul) {
+        // Can we do this directly into memory?
+        rl_result = UpdateLoc(rl_dest);
+        if (rl_result.location == kLocPhysReg) {
+          // Can we do this from memory directly?
+          rl_rhs = UpdateLoc(rl_rhs);
+          if (rl_rhs.location != kLocPhysReg) {
+            OpRegMem(op, rl_result.low_reg, rl_rhs);
+            StoreFinalValue(rl_dest, rl_result);
+            return;
+          } else if (!IsFpReg(rl_rhs.low_reg)) {
+            OpRegReg(op, rl_result.low_reg, rl_rhs.low_reg);
+            StoreFinalValue(rl_dest, rl_result);
+            return;
+          }
+        }
+        rl_rhs = LoadValue(rl_rhs, kCoreReg);
+        if (rl_result.location != kLocPhysReg) {
+          // Okay, we can do this into memory.
+          OpMemReg(op, rl_result, rl_rhs.low_reg);
+          return;
+        } else if (!IsFpReg(rl_result.low_reg)) {
+          // Can do this directly into the result register.
+          OpRegReg(op, rl_result.low_reg, rl_rhs.low_reg);
+          StoreFinalValue(rl_dest, rl_result);
+          return;
+        } else {
+          rl_lhs = LoadValue(rl_lhs, kCoreReg);
+          rl_result = EvalLoc(rl_dest, kCoreReg, true);
+          OpRegRegReg(op, rl_result.low_reg, rl_lhs.low_reg, rl_rhs.low_reg);
+        }
+      } else {
+        // Try to use reg/memory instructions.
+        rl_lhs = UpdateLoc(rl_lhs);
+        rl_rhs = UpdateLoc(rl_rhs);
+        // We can't optimize with FP registers.
+        if (!IsOperationSafeWithoutTemps(rl_lhs, rl_rhs)) {
+          // Something is difficult, so fall back to the standard case.
+          rl_lhs = LoadValue(rl_lhs, kCoreReg);
+          rl_rhs = LoadValue(rl_rhs, kCoreReg);
+          rl_result = EvalLoc(rl_dest, kCoreReg, true);
+          OpRegRegReg(op, rl_result.low_reg, rl_lhs.low_reg, rl_rhs.low_reg);
+        } else {
+          // We can optimize by moving to result and using memory operands.
+          if (rl_rhs.location != kLocPhysReg) {
+            // Force LHS into result.
+            rl_result = EvalLoc(rl_dest, kCoreReg, true);
+            LoadValueDirect(rl_lhs, rl_result.low_reg);
+            OpRegMem(op, rl_result.low_reg, rl_rhs);
+          } else if (rl_lhs.location != kLocPhysReg) {
+            // RHS is in a register; LHS is in memory.
+            if (op != kOpSub) {
+              // Force RHS into result and operate on memory.
+              rl_result = EvalLoc(rl_dest, kCoreReg, true);
+              OpRegCopy(rl_result.low_reg, rl_rhs.low_reg);
+              OpRegMem(op, rl_result.low_reg, rl_lhs);
+            } else {
+              // Subtraction isn't commutative.
+              rl_lhs = LoadValue(rl_lhs, kCoreReg);
+              rl_rhs = LoadValue(rl_rhs, kCoreReg);
+              rl_result = EvalLoc(rl_dest, kCoreReg, true);
+              OpRegRegReg(op, rl_result.low_reg, rl_lhs.low_reg, rl_rhs.low_reg);
+            }
+          } else {
+            // Both are in registers.
+            rl_lhs = LoadValue(rl_lhs, kCoreReg);
+            rl_rhs = LoadValue(rl_rhs, kCoreReg);
+            rl_result = EvalLoc(rl_dest, kCoreReg, true);
+            OpRegRegReg(op, rl_result.low_reg, rl_lhs.low_reg, rl_rhs.low_reg);
+          }
+        }
+      }
+    }
+  }
+  StoreValue(rl_dest, rl_result);
+}
+
+bool X86Mir2Lir::IsOperationSafeWithoutTemps(RegLocation rl_lhs, RegLocation rl_rhs) {
+  // If we have non-core registers, then we can't do good things.
+  if (rl_lhs.location == kLocPhysReg && IsFpReg(rl_lhs.low_reg)) {
+    return false;
+  }
+  if (rl_rhs.location == kLocPhysReg && IsFpReg(rl_rhs.low_reg)) {
+    return false;
+  }
+
+  // Everything will be fine :-).
+  return true;
+}
 }  // namespace art
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index f223548..a347d8b 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -808,4 +808,12 @@
              << ", orig: " << loc.orig_sreg;
 }
 
+void X86Mir2Lir::Materialize() {
+  // A good place to put the analysis before starting.
+  AnalyzeMIR();
+
+  // Now continue with regular code generation.
+  Mir2Lir::Materialize();
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index bd38c03..e2744d0 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -16,6 +16,7 @@
 
 #include "codegen_x86.h"
 #include "dex/quick/mir_to_lir-inl.h"
+#include "dex/dataflow_iterator-inl.h"
 #include "x86_lir.h"
 
 namespace art {
@@ -61,7 +62,7 @@
 }
 
 bool X86Mir2Lir::InexpensiveConstantDouble(int64_t value) {
-  return false;  // TUNING
+  return value == 0;
 }
 
 /*
@@ -210,6 +211,110 @@
     return NewLIR2(opcode, r_dest_src1, r_src2);
 }
 
+LIR* X86Mir2Lir::OpMovRegMem(int r_dest, int r_base, int offset, MoveType move_type) {
+  DCHECK(!(X86_FPREG(r_base)));
+
+  X86OpCode opcode = kX86Nop;
+  switch (move_type) {
+    case kMov8GP:
+      CHECK(!X86_FPREG(r_dest));
+      opcode = kX86Mov8RM;
+      break;
+    case kMov16GP:
+      CHECK(!X86_FPREG(r_dest));
+      opcode = kX86Mov16RM;
+      break;
+    case kMov32GP:
+      CHECK(!X86_FPREG(r_dest));
+      opcode = kX86Mov32RM;
+      break;
+    case kMov32FP:
+      CHECK(X86_FPREG(r_dest));
+      opcode = kX86MovssRM;
+      break;
+    case kMov64FP:
+      CHECK(X86_FPREG(r_dest));
+      opcode = kX86MovsdRM;
+      break;
+    case kMovU128FP:
+      CHECK(X86_FPREG(r_dest));
+      opcode = kX86MovupsRM;
+      break;
+    case kMovA128FP:
+      CHECK(X86_FPREG(r_dest));
+      opcode = kX86MovapsRM;
+      break;
+    case kMovLo128FP:
+      CHECK(X86_FPREG(r_dest));
+      opcode = kX86MovlpsRM;
+      break;
+    case kMovHi128FP:
+      CHECK(X86_FPREG(r_dest));
+      opcode = kX86MovhpsRM;
+      break;
+    case kMov64GP:
+    case kMovLo64FP:
+    case kMovHi64FP:
+    default:
+      LOG(FATAL) << "Bad case in OpMovRegMem";
+      break;
+  }
+
+  return NewLIR3(opcode, r_dest, r_base, offset);
+}
+
+LIR* X86Mir2Lir::OpMovMemReg(int r_base, int offset, int r_src, MoveType move_type) {
+  DCHECK(!(X86_FPREG(r_base)));
+
+  X86OpCode opcode = kX86Nop;
+  switch (move_type) {
+    case kMov8GP:
+      CHECK(!X86_FPREG(r_src));
+      opcode = kX86Mov8MR;
+      break;
+    case kMov16GP:
+      CHECK(!X86_FPREG(r_src));
+      opcode = kX86Mov16MR;
+      break;
+    case kMov32GP:
+      CHECK(!X86_FPREG(r_src));
+      opcode = kX86Mov32MR;
+      break;
+    case kMov32FP:
+      CHECK(X86_FPREG(r_src));
+      opcode = kX86MovssMR;
+      break;
+    case kMov64FP:
+      CHECK(X86_FPREG(r_src));
+      opcode = kX86MovsdMR;
+      break;
+    case kMovU128FP:
+      CHECK(X86_FPREG(r_src));
+      opcode = kX86MovupsMR;
+      break;
+    case kMovA128FP:
+      CHECK(X86_FPREG(r_src));
+      opcode = kX86MovapsMR;
+      break;
+    case kMovLo128FP:
+      CHECK(X86_FPREG(r_src));
+      opcode = kX86MovlpsMR;
+      break;
+    case kMovHi128FP:
+      CHECK(X86_FPREG(r_src));
+      opcode = kX86MovhpsMR;
+      break;
+    case kMov64GP:
+    case kMovLo64FP:
+    case kMovHi64FP:
+    default:
+      LOG(FATAL) << "Bad case in OpMovMemReg";
+      break;
+  }
+
+  return NewLIR3(opcode, r_base, offset, r_src);
+}
+
 LIR* X86Mir2Lir::OpCondRegReg(OpKind op, ConditionCode cc, int r_dest, int r_src) {
   // The only conditional reg to reg operation supported is Cmov
   DCHECK_EQ(op, kOpCmov);
@@ -236,7 +341,57 @@
       LOG(FATAL) << "Bad case in OpRegMem " << op;
       break;
   }
-  return NewLIR3(opcode, r_dest, rBase, offset);
+  LIR *l = NewLIR3(opcode, r_dest, rBase, offset);
+  if (rBase == rX86_SP) {
+    AnnotateDalvikRegAccess(l, offset >> 2, true /* is_load */, false /* is_64bit */);
+  }
+  return l;
+}
+
+LIR* X86Mir2Lir::OpMemReg(OpKind op, RegLocation rl_dest, int r_value) {
+  DCHECK_NE(rl_dest.location, kLocPhysReg);
+  int displacement = SRegOffset(rl_dest.s_reg_low);
+  X86OpCode opcode = kX86Nop;
+  switch (op) {
+    case kOpSub: opcode = kX86Sub32MR; break;
+    case kOpMov: opcode = kX86Mov32MR; break;
+    case kOpCmp: opcode = kX86Cmp32MR; break;
+    case kOpAdd: opcode = kX86Add32MR; break;
+    case kOpAnd: opcode = kX86And32MR; break;
+    case kOpOr:  opcode = kX86Or32MR; break;
+    case kOpXor: opcode = kX86Xor32MR; break;
+    case kOpLsl: opcode = kX86Sal32MC; break;
+    case kOpLsr: opcode = kX86Shr32MC; break;
+    case kOpAsr: opcode = kX86Sar32MC; break;
+    default:
+      LOG(FATAL) << "Bad case in OpMemReg " << op;
+      break;
+  }
+  LIR *l = NewLIR3(opcode, rX86_SP, displacement, r_value);
+  AnnotateDalvikRegAccess(l, displacement >> 2, false /* is_load */, false /* is_64bit */);
+  return l;
+}
+
+LIR* X86Mir2Lir::OpRegMem(OpKind op, int r_dest, RegLocation rl_value) {
+  DCHECK_NE(rl_value.location, kLocPhysReg);
+  int displacement = SRegOffset(rl_value.s_reg_low);
+  X86OpCode opcode = kX86Nop;
+  switch (op) {
+    case kOpSub: opcode = kX86Sub32RM; break;
+    case kOpMov: opcode = kX86Mov32RM; break;
+    case kOpCmp: opcode = kX86Cmp32RM; break;
+    case kOpAdd: opcode = kX86Add32RM; break;
+    case kOpAnd: opcode = kX86And32RM; break;
+    case kOpOr:  opcode = kX86Or32RM; break;
+    case kOpXor: opcode = kX86Xor32RM; break;
+    case kOpMul: opcode = kX86Imul32RM; break;
+    default:
+      LOG(FATAL) << "Bad case in OpRegMem " << op;
+      break;
+  }
+  LIR *l = NewLIR3(opcode, r_dest, rX86_SP, displacement);
+  AnnotateDalvikRegAccess(l, displacement >> 2, true /* is_load */, false /* is_64bit */);
+  return l;
 }
 
 LIR* X86Mir2Lir::OpRegRegReg(OpKind op, int r_dest, int r_src1,
@@ -344,6 +499,27 @@
       DCHECK_EQ(r_dest_lo, r_dest_hi);
       if (value == 0) {
         return NewLIR2(kX86XorpsRR, r_dest_lo, r_dest_lo);
+      } else if (base_of_code_ != nullptr) {
+        // We will load the value from the literal area.
+        LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi);
+        if (data_target == NULL) {
+          data_target = AddWideData(&literal_list_, val_lo, val_hi);
+        }
+
+        // Address the start of the method
+        RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
+        rl_method = LoadValue(rl_method, kCoreReg);
+
+        // Load the proper value from the literal area.
+        // We don't know the proper offset for the value, so pick one that will force
+        // 4 byte offset.  We will fix this up in the assembler later to have the right
+        // value.
+        res = LoadBaseDisp(rl_method.low_reg, 256 /* bogus */, r_dest_lo, kDouble, INVALID_SREG);
+        res->target = data_target;
+        res->flags.fixup = kFixupLoad;
+        SetMemRefType(res, true, kLiteral);
+        // Redo after we assign target to ensure size is correct.
+        SetupResourceMasks(res);
       } else {
         if (val_lo == 0) {
           res = NewLIR2(kX86XorpsRR, r_dest_lo, r_dest_lo);
@@ -612,4 +788,121 @@
     return branch;
 }
 
+void X86Mir2Lir::AnalyzeMIR() {
+  // Assume we don't need a pointer to the base of the code.
+  cu_->NewTimingSplit("X86 MIR Analysis");
+  store_method_addr_ = false;
+
+  // Walk the MIR looking for interesting items.
+  PreOrderDfsIterator iter(mir_graph_);
+  BasicBlock* curr_bb = iter.Next();
+  while (curr_bb != NULL) {
+    AnalyzeBB(curr_bb);
+    curr_bb = iter.Next();
+  }
+
+  // Did we need a pointer to the method code?
+  if (store_method_addr_) {
+    base_of_code_ = mir_graph_->GetNewCompilerTemp(kCompilerTempVR, false);
+  } else {
+    base_of_code_ = nullptr;
+  }
+}
+
+void X86Mir2Lir::AnalyzeBB(BasicBlock * bb) {
+  if (bb->block_type == kDead) {
+    // Ignore dead blocks
+    return;
+  }
+
+  for (MIR *mir = bb->first_mir_insn; mir != NULL; mir = mir->next) {
+    int opcode = mir->dalvikInsn.opcode;
+    if (opcode >= kMirOpFirst) {
+      AnalyzeExtendedMIR(opcode, bb, mir);
+    } else {
+      AnalyzeMIR(opcode, bb, mir);
+    }
+  }
+}
+
+
+void X86Mir2Lir::AnalyzeExtendedMIR(int opcode, BasicBlock * bb, MIR *mir) {
+  switch (opcode) {
+    // Instructions referencing doubles.
+    case kMirOpFusedCmplDouble:
+    case kMirOpFusedCmpgDouble:
+      AnalyzeFPInstruction(opcode, bb, mir);
+      break;
+    default:
+      // Ignore the rest.
+      break;
+  }
+}
+
+void X86Mir2Lir::AnalyzeMIR(int opcode, BasicBlock * bb, MIR *mir) {
+  // Looking for
+  // - Do we need a pointer to the code (used for packed switches and double lits)?
+
+  switch (opcode) {
+    // Instructions referencing doubles.
+    case Instruction::CMPL_DOUBLE:
+    case Instruction::CMPG_DOUBLE:
+    case Instruction::NEG_DOUBLE:
+    case Instruction::ADD_DOUBLE:
+    case Instruction::SUB_DOUBLE:
+    case Instruction::MUL_DOUBLE:
+    case Instruction::DIV_DOUBLE:
+    case Instruction::REM_DOUBLE:
+    case Instruction::ADD_DOUBLE_2ADDR:
+    case Instruction::SUB_DOUBLE_2ADDR:
+    case Instruction::MUL_DOUBLE_2ADDR:
+    case Instruction::DIV_DOUBLE_2ADDR:
+    case Instruction::REM_DOUBLE_2ADDR:
+      AnalyzeFPInstruction(opcode, bb, mir);
+      break;
+    // Packed switches and array fills need a pointer to the base of the method.
+    case Instruction::FILL_ARRAY_DATA:
+    case Instruction::PACKED_SWITCH:
+      store_method_addr_ = true;
+      break;
+    default:
+      // Other instructions are not interesting yet.
+      break;
+  }
+}
+
+void X86Mir2Lir::AnalyzeFPInstruction(int opcode, BasicBlock * bb, MIR *mir) {
+  // Look at all the uses, and see if they are double constants.
+  uint64_t attrs = mir_graph_->oat_data_flow_attributes_[opcode];
+  int next_sreg = 0;
+  if (attrs & DF_UA) {
+    if (attrs & DF_A_WIDE) {
+      AnalyzeDoubleUse(mir_graph_->GetSrcWide(mir, next_sreg));
+      next_sreg += 2;
+    } else {
+      next_sreg++;
+    }
+  }
+  if (attrs & DF_UB) {
+    if (attrs & DF_B_WIDE) {
+      AnalyzeDoubleUse(mir_graph_->GetSrcWide(mir, next_sreg));
+      next_sreg += 2;
+    } else {
+      next_sreg++;
+    }
+  }
+  if (attrs & DF_UC) {
+    if (attrs & DF_C_WIDE) {
+      AnalyzeDoubleUse(mir_graph_->GetSrcWide(mir, next_sreg));
+    }
+  }
+}
+
+void X86Mir2Lir::AnalyzeDoubleUse(RegLocation use) {
+  // If this is a double literal, we will want it in the literal pool.
+  if (use.is_const) {
+    store_method_addr_ = true;
+  }
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index 7f35d06..6962ff7 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -357,6 +357,14 @@
   kX86PsllqRI,                  // left shift of floating point registers
   kX86SqrtsdRR,                 // sqrt of floating point register
   kX86FstpdM,                   // Store and pop top x87 fp stack
+  Binary0fOpCode(kX86Movups),   // load unaligned packed single FP values from xmm2/m128 to xmm1
+  kX86MovupsMR, kX86MovupsAR,   // store unaligned packed single FP values from xmm1 to m128
+  Binary0fOpCode(kX86Movaps),   // load aligned packed single FP values from xmm2/m128 to xmm1
+  kX86MovapsMR, kX86MovapsAR,   // store aligned packed single FP values from xmm1 to m128
+  kX86MovlpsRM, kX86MovlpsRA,   // load packed single FP values from m64 to low quadword of xmm
+  kX86MovlpsMR, kX86MovlpsAR,   // store packed single FP values from low quadword of xmm to m64
+  kX86MovhpsRM, kX86MovhpsRA,   // load packed single FP values from m64 to high quadword of xmm
+  kX86MovhpsMR, kX86MovhpsAR,   // store packed single FP values from high quadword of xmm to m64
   Binary0fOpCode(kX86Movdxr),   // move into xmm from gpr
   kX86MovdrxRR, kX86MovdrxMR, kX86MovdrxAR,  // move into reg from xmm
   kX86Set8R, kX86Set8M, kX86Set8A,  // set byte depending on condition operand
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 97df199..582a0e9 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -180,7 +180,11 @@
 
   ~Dex2Oat() {
     delete runtime_;
-    VLOG(compiler) << "dex2oat took " << PrettyDuration(NanoTime() - start_ns_)
+    LogCompletionTime();
+  }
+
+  void LogCompletionTime() {
+    LOG(INFO) << "dex2oat took " << PrettyDuration(NanoTime() - start_ns_)
               << " (threads: " << thread_count_ << ")";
   }
 
@@ -1189,6 +1193,7 @@
   // Everything was successfully written, do an explicit exit here to avoid running Runtime
   // destructors that take time (bug 10645725) unless we're a debug build or running on valgrind.
   if (!kIsDebugBuild || (RUNNING_ON_VALGRIND == 0)) {
+    dex2oat->LogCompletionTime();
     exit(EXIT_SUCCESS);
   }
 
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index 6c25e0a..903d755 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -246,6 +246,42 @@
         load = *instr == 0x10;
         store = !load;
         break;
+      case 0x12: case 0x13:
+        if (prefix[2] == 0x66) {
+          opcode << "movlpd";
+          prefix[2] = 0;  // clear prefix now it's served its purpose as part of the opcode
+        } else if (prefix[0] == 0) {
+          opcode << "movlps";
+        }
+        has_modrm = true;
+        src_reg_file = dst_reg_file = SSE;
+        load = *instr == 0x12;
+        store = !load;
+        break;
+      case 0x16: case 0x17:
+        if (prefix[2] == 0x66) {
+          opcode << "movhpd";
+          prefix[2] = 0;  // clear prefix now it's served its purpose as part of the opcode
+        } else if (prefix[0] == 0) {
+          opcode << "movhps";
+        }
+        has_modrm = true;
+        src_reg_file = dst_reg_file = SSE;
+        load = *instr == 0x16;
+        store = !load;
+        break;
+      case 0x28: case 0x29:
+        if (prefix[2] == 0x66) {
+          opcode << "movapd";
+          prefix[2] = 0;  // clear prefix now it's served its purpose as part of the opcode
+        } else if (prefix[0] == 0) {
+          opcode << "movaps";
+        }
+        has_modrm = true;
+        src_reg_file = dst_reg_file = SSE;
+        load = *instr == 0x28;
+        store = !load;
+        break;
       case 0x2A:
         if (prefix[2] == 0x66) {
           opcode << "cvtpi2pd";
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 462c328..f1f5905 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -649,15 +649,9 @@
 
 const OatFile* ClassLinker::RegisterOatFile(const OatFile* oat_file) {
   WriterMutexLock mu(Thread::Current(), dex_lock_);
-  for (size_t i = 0; i < oat_files_.size(); ++i) {
-    if (UNLIKELY(oat_file->GetLocation() == oat_files_[i]->GetLocation())) {
-      VLOG(class_linker) << "Attempt to register oat file that's already registered: "
-          << oat_file->GetLocation();
-      for (size_t j = i; j < oat_files_.size(); ++j) {
-        CHECK_NE(oat_file, oat_files_[j]) << "Attempt to re-register dex file.";
-      }
-      delete oat_file;
-      return oat_files_[i];
+  if (kIsDebugBuild) {
+    for (size_t i = 0; i < oat_files_.size(); ++i) {
+      CHECK_NE(oat_file, oat_files_[i]) << oat_file->GetLocation();
     }
   }
   VLOG(class_linker) << "Registering " << oat_file->GetLocation();
@@ -826,20 +820,6 @@
       << oat_location << "': " << *error_msg;
   error_msg->clear();
 
-  {
-    // We might have registered an outdated OatFile in FindDexFileInOatLocation().
-    // Get rid of it as its MAP_PRIVATE mapping may not reflect changes we're about to do.
-    WriterMutexLock mu(Thread::Current(), dex_lock_);
-    for (size_t i = 0; i < oat_files_.size(); ++i) {
-      if (oat_location == oat_files_[i]->GetLocation()) {
-        VLOG(class_linker) << "De-registering old OatFile: " << oat_location;
-        delete oat_files_[i];
-        oat_files_.erase(oat_files_.begin() + i);
-        break;
-      }
-    }
-  }
-
   // Generate the output oat file for the dex file
   VLOG(class_linker) << "Generating oat file " << oat_location << " for " << dex_location;
   if (!GenerateOatFile(dex_location, scoped_flock.GetFile().Fd(), oat_location, error_msg)) {
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index 429c516..d28d986 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -284,6 +284,27 @@
   }
 }
 
+DexFile::DexFile(const byte* base, size_t size,
+                 const std::string& location,
+                 uint32_t location_checksum,
+                 MemMap* mem_map)
+    : begin_(base),
+      size_(size),
+      location_(location),
+      location_checksum_(location_checksum),
+      mem_map_(mem_map),
+      modification_lock("DEX modification lock"),
+      header_(reinterpret_cast<const Header*>(base)),
+      string_ids_(reinterpret_cast<const StringId*>(base + header_->string_ids_off_)),
+      type_ids_(reinterpret_cast<const TypeId*>(base + header_->type_ids_off_)),
+      field_ids_(reinterpret_cast<const FieldId*>(base + header_->field_ids_off_)),
+      method_ids_(reinterpret_cast<const MethodId*>(base + header_->method_ids_off_)),
+      proto_ids_(reinterpret_cast<const ProtoId*>(base + header_->proto_ids_off_)),
+      class_defs_(reinterpret_cast<const ClassDef*>(base + header_->class_defs_off_)) {
+  CHECK(begin_ != NULL) << GetLocation();
+  CHECK_GT(size_, 0U) << GetLocation();
+}
+
 DexFile::~DexFile() {
   // We don't call DeleteGlobalRef on dex_object_ because we're only called by DestroyJavaVM, and
   // that's only called after DetachCurrentThread, which means there's no JNIEnv. We could
@@ -292,25 +313,12 @@
 }
 
 bool DexFile::Init(std::string* error_msg) {
-  InitMembers();
   if (!CheckMagicAndVersion(error_msg)) {
     return false;
   }
   return true;
 }
 
-void DexFile::InitMembers() {
-  const byte* b = begin_;
-  header_ = reinterpret_cast<const Header*>(b);
-  const Header* h = header_;
-  string_ids_ = reinterpret_cast<const StringId*>(b + h->string_ids_off_);
-  type_ids_ = reinterpret_cast<const TypeId*>(b + h->type_ids_off_);
-  field_ids_ = reinterpret_cast<const FieldId*>(b + h->field_ids_off_);
-  method_ids_ = reinterpret_cast<const MethodId*>(b + h->method_ids_off_);
-  proto_ids_ = reinterpret_cast<const ProtoId*>(b + h->proto_ids_off_);
-  class_defs_ = reinterpret_cast<const ClassDef*>(b + h->class_defs_off_);
-}
-
 bool DexFile::CheckMagicAndVersion(std::string* error_msg) const {
   CHECK(header_->magic_ != NULL) << GetLocation();
   if (!IsMagicValid(header_->magic_)) {
@@ -856,6 +864,13 @@
   }
 }
 
+std::ostream& operator<<(std::ostream& os, const DexFile& dex_file) {
+  os << StringPrintf("[DexFile: %s dex-checksum=%08x location-checksum=%08x %p-%p]",
+                     dex_file.GetLocation().c_str(),
+                     dex_file.GetHeader().checksum_, dex_file.GetLocationChecksum(),
+                     dex_file.Begin(), dex_file.Begin() + dex_file.Size());
+  return os;
+}
 std::string Signature::ToString() const {
   if (dex_file_ == nullptr) {
     CHECK(proto_id_ == nullptr);
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index 69593cd..bc2bfde 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -849,30 +849,11 @@
   DexFile(const byte* base, size_t size,
           const std::string& location,
           uint32_t location_checksum,
-          MemMap* mem_map)
-      : begin_(base),
-        size_(size),
-        location_(location),
-        location_checksum_(location_checksum),
-        mem_map_(mem_map),
-        modification_lock("DEX modification lock"),
-        header_(0),
-        string_ids_(0),
-        type_ids_(0),
-        field_ids_(0),
-        method_ids_(0),
-        proto_ids_(0),
-        class_defs_(0) {
-    CHECK(begin_ != NULL) << GetLocation();
-    CHECK_GT(size_, 0U) << GetLocation();
-  }
+          MemMap* mem_map);
 
   // Top-level initializer that calls other Init methods.
   bool Init(std::string* error_msg);
 
-  // Caches pointers into to the various file sections.
-  void InitMembers();
-
   // Returns true if the header magic and version numbers are of the expected values.
   bool CheckMagicAndVersion(std::string* error_msg) const;
 
@@ -903,26 +884,27 @@
   Mutex modification_lock;
 
   // Points to the header section.
-  const Header* header_;
+  const Header* const header_;
 
   // Points to the base of the string identifier list.
-  const StringId* string_ids_;
+  const StringId* const string_ids_;
 
   // Points to the base of the type identifier list.
-  const TypeId* type_ids_;
+  const TypeId* const type_ids_;
 
   // Points to the base of the field identifier list.
-  const FieldId* field_ids_;
+  const FieldId* const field_ids_;
 
   // Points to the base of the method identifier list.
-  const MethodId* method_ids_;
+  const MethodId* const method_ids_;
 
   // Points to the base of the prototype identifier list.
-  const ProtoId* proto_ids_;
+  const ProtoId* const proto_ids_;
 
   // Points to the base of the class definition list.
-  const ClassDef* class_defs_;
+  const ClassDef* const class_defs_;
 };
+std::ostream& operator<<(std::ostream& os, const DexFile& dex_file);
 
 // Iterate over a dex file's ProtoId's paramters
 class DexFileParameterIterator {
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 0a3e1a1..2795e1d 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -120,7 +120,7 @@
   CHECK_NE(0, prot);
   CHECK_NE(0, flags & (MAP_SHARED | MAP_PRIVATE));
   if (byte_count == 0) {
-    return new MemMap("file", NULL, 0, NULL, 0, prot);
+    return new MemMap(filename, NULL, 0, NULL, 0, prot);
   }
   // Adjust 'offset' to be page-aligned as required by mmap.
   int page_offset = start % kPageSize;
@@ -153,7 +153,7 @@
                               maps.c_str());
     return NULL;
   }
-  return new MemMap("file", actual + page_offset, byte_count, actual, page_aligned_byte_count,
+  return new MemMap(filename, actual + page_offset, byte_count, actual, page_aligned_byte_count,
                     prot);
 }
 
@@ -267,4 +267,11 @@
   return false;
 }
 
+std::ostream& operator<<(std::ostream& os, const MemMap& mem_map) {
+  os << StringPrintf("[MemMap: %s prot=%x %p-%p]",
+                     mem_map.GetName().c_str(), mem_map.GetProtect(),
+                     mem_map.BaseBegin(), mem_map.BaseEnd());
+  return os;
+}
+
 }  // namespace art
diff --git a/runtime/mem_map.h b/runtime/mem_map.h
index 2c65833..d2059b5 100644
--- a/runtime/mem_map.h
+++ b/runtime/mem_map.h
@@ -62,6 +62,10 @@
   // Releases the memory mapping
   ~MemMap();
 
+  const std::string& GetName() const {
+    return name_;
+  }
+
   bool Protect(int prot);
 
   int GetProtect() const {
@@ -80,6 +84,18 @@
     return Begin() + Size();
   }
 
+  void* BaseBegin() const {
+    return base_begin_;
+  }
+
+  size_t BaseSize() const {
+    return base_size_;
+  }
+
+  void* BaseEnd() const {
+    return reinterpret_cast<byte*>(BaseBegin()) + BaseSize();
+  }
+
   bool HasAddress(const void* addr) const {
     return Begin() <= addr && addr < End();
   }
@@ -102,6 +118,7 @@
 
   friend class MemMapTest;  // To allow access to base_begin_ and base_size_.
 };
+std::ostream& operator<<(std::ostream& os, const MemMap& mem_map);
 
 }  // namespace art
 
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index fa2b485..be882048 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -338,12 +338,17 @@
   }
 
   if (warn_if_not_found) {
+    std::string checksum("<unspecified>");
+    if (dex_location_checksum != NULL) {
+      checksum = StringPrintf("0x%08x", *dex_location_checksum);
+    }
     LOG(WARNING) << "Failed to find OatDexFile for DexFile " << dex_location
-                 << " in OatFile " << GetLocation();
+                 << " with checksum " << checksum << " in OatFile " << GetLocation();
     if (kIsDebugBuild) {
       for (Table::const_iterator it = oat_dex_files_.begin(); it != oat_dex_files_.end(); ++it) {
         LOG(WARNING) << "OatFile " << GetLocation()
-                     << " contains OatDexFile " << it->second->GetDexFileLocation();
+                     << " contains OatDexFile " << it->second->GetDexFileLocation()
+                     << " with checksum 0x" << std::hex << it->second->GetDexFileLocationChecksum();
       }
     }
   }