Improve GenConstString, GenS{get,put} for x86

Rewrite GenConstString for x86 to skip calling ResolveString when the
string is already resolved.  Also try to avoid a register copy if the
Method* is in a promoted register.

Implement the TODO for GenS{get,put} to use compare to memory for x86 by
adding a new codegen function to compare directly to memory.  Implement
a default implementation that uses a temporary register for RISC
architectures.

Change-Id: Ie163cca3d3d841aa10c50dc6592ec30af7a7cbc9
Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 1eb79c9..d938af2 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -1161,4 +1161,12 @@
   return (abs(mir_graph_->SRegToVReg(rl_src.s_reg_low) - mir_graph_->SRegToVReg(rl_dest.s_reg_low)) == 1);
 }
 
+LIR *Mir2Lir::OpCmpMemImmBranch(ConditionCode cond, int temp_reg, int base_reg,
+                                int offset, int check_value, LIR* target) {
+  // Handle this for architectures that can't compare to memory.
+  LoadWordDisp(base_reg, offset, temp_reg);
+  LIR* branch = OpCmpImmBranch(cond, temp_reg, check_value, target);
+  return branch;
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index d8b9869..44d671de 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -375,10 +375,9 @@
         LIR* unresolved_branch = OpCmpImmBranch(kCondEq, r_base, 0, NULL);
         int r_tmp = TargetReg(kArg2);
         LockTemp(r_tmp);
-        // TODO: Fuse the compare of a constant with memory on X86 and avoid the load.
-        LoadWordDisp(r_base, mirror::Class::StatusOffset().Int32Value(), r_tmp);
-        LIR* initialized_branch = OpCmpImmBranch(kCondGe, r_tmp, mirror::Class::kStatusInitialized,
-                                                 NULL);
+        LIR* initialized_branch = OpCmpMemImmBranch(kCondGe, r_tmp, r_base,
+                                          mirror::Class::StatusOffset().Int32Value(),
+                                          mirror::Class::kStatusInitialized, NULL);
 
         LIR* unresolved_target = NewLIR0(kPseudoTargetLabel);
         unresolved_branch->target = unresolved_target;
@@ -469,10 +468,9 @@
         LIR* unresolved_branch = OpCmpImmBranch(kCondEq, r_base, 0, NULL);
         int r_tmp = TargetReg(kArg2);
         LockTemp(r_tmp);
-        // TODO: Fuse the compare of a constant with memory on X86 and avoid the load.
-        LoadWordDisp(r_base, mirror::Class::StatusOffset().Int32Value(), r_tmp);
-        LIR* initialized_branch = OpCmpImmBranch(kCondGe, r_tmp, mirror::Class::kStatusInitialized,
-                                                 NULL);
+        LIR* initialized_branch = OpCmpMemImmBranch(kCondGe, r_tmp, r_base,
+                                          mirror::Class::StatusOffset().Int32Value(),
+                                          mirror::Class::kStatusInitialized, NULL);
 
         LIR* unresolved_target = NewLIR0(kPseudoTargetLabel);
         unresolved_branch->target = unresolved_target;
@@ -831,27 +829,43 @@
     // slow path, resolve string if not in dex cache
     FlushAllRegs();
     LockCallTemps();  // Using explicit registers
-    LoadCurrMethodDirect(TargetReg(kArg2));
-    LoadWordDisp(TargetReg(kArg2),
-                 mirror::ArtMethod::DexCacheStringsOffset().Int32Value(), TargetReg(kArg0));
+
+    // If the Method* is already in a register, we can save a copy.
+    RegLocation rl_method = mir_graph_->GetMethodLoc();
+    int r_method;
+    if (rl_method.location == kLocPhysReg) {
+      // A temp would conflict with register use below.
+      DCHECK(!IsTemp(rl_method.low_reg));
+      r_method = rl_method.low_reg;
+    } else {
+      r_method = TargetReg(kArg2);
+      LoadCurrMethodDirect(r_method);
+    }
+    LoadWordDisp(r_method, mirror::ArtMethod::DexCacheStringsOffset().Int32Value(),
+                 TargetReg(kArg0));
+
     // Might call out to helper, which will return resolved string in kRet0
     int r_tgt = CallHelperSetup(QUICK_ENTRYPOINT_OFFSET(pResolveString));
     LoadWordDisp(TargetReg(kArg0), offset_of_string, TargetReg(kRet0));
-    LoadConstant(TargetReg(kArg1), string_idx);
     if (cu_->instruction_set == kThumb2) {
+      LoadConstant(TargetReg(kArg1), string_idx);
       OpRegImm(kOpCmp, TargetReg(kRet0), 0);  // Is resolved?
       GenBarrier();
       // For testing, always force through helper
       if (!EXERCISE_SLOWEST_STRING_PATH) {
         OpIT(kCondEq, "T");
       }
-      OpRegCopy(TargetReg(kArg0), TargetReg(kArg2));   // .eq
+      // The copy MUST generate exactly one instruction (for OpIT).
+      DCHECK_NE(TargetReg(kArg0), r_method);
+      OpRegCopy(TargetReg(kArg0), r_method);   // .eq
+
       LIR* call_inst = OpReg(kOpBlx, r_tgt);    // .eq, helper(Method*, string_idx)
       MarkSafepointPC(call_inst);
       FreeTemp(r_tgt);
     } else if (cu_->instruction_set == kMips) {
       LIR* branch = OpCmpImmBranch(kCondNe, TargetReg(kRet0), 0, NULL);
-      OpRegCopy(TargetReg(kArg0), TargetReg(kArg2));   // .eq
+      LoadConstant(TargetReg(kArg1), string_idx);
+      OpRegCopy(TargetReg(kArg0), r_method);   // .eq
       LIR* call_inst = OpReg(kOpBlx, r_tgt);
       MarkSafepointPC(call_inst);
       FreeTemp(r_tgt);
@@ -859,8 +873,12 @@
       branch->target = target;
     } else {
       DCHECK_EQ(cu_->instruction_set, kX86);
-      CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(pResolveString), TargetReg(kArg2),
+      LIR* branch = OpCmpImmBranch(kCondNe, TargetReg(kRet0), 0, NULL);
+      LoadConstant(TargetReg(kArg1), string_idx);
+      CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(pResolveString), r_method,
                               TargetReg(kArg1), true);
+      LIR* target = NewLIR0(kPseudoTargetLabel);
+      branch->target = target;
     }
     GenBarrier();
     StoreValue(rl_dest, GetReturn(false));
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 10136b6..c67a129 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -647,7 +647,19 @@
     void SpecialMIR2LIR(const InlineMethod& special);
     void MethodMIR2LIR();
 
-
+    // Routines that work for the generic case, but may be overriden by target.
+    /*
+     * @brief Compare memory to immediate, and branch if condition true.
+     * @param cond The condition code that when true will branch to the target.
+     * @param temp_reg A temporary register that can be used if compare to memory is not
+     * supported by the architecture.
+     * @param base_reg The register holding the base address.
+     * @param offset The offset from the base.
+     * @param check_value The immediate to compare to.
+     * @returns The branch instruction that was generated.
+     */
+    virtual LIR* OpCmpMemImmBranch(ConditionCode cond, int temp_reg, int base_reg,
+                                   int offset, int check_value, LIR* target);
 
     // Required for target - codegen helpers.
     virtual bool SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 6280b64..b46c324 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -365,6 +365,18 @@
      * @param val Constant multiplier.
      */
     void GenImulMemImm(int dest, int sreg, int displacement, int val);
+
+    /*
+     * @brief Compare memory to immediate, and branch if condition true.
+     * @param cond The condition code that when true will branch to the target.
+     * @param temp_reg A temporary register that can be used if compare memory is not
+     * supported by the architecture.
+     * @param base_reg The register holding the base address.
+     * @param offset The offset from the base.
+     * @param check_value The immediate to compare to.
+     */
+    LIR* OpCmpMemImmBranch(ConditionCode cond, int temp_reg, int base_reg,
+                           int offset, int check_value, LIR* target);
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index 97c04dc..bd38c03 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -604,4 +604,12 @@
   FreeTemp(tmp_reg);
 }
 
+LIR* X86Mir2Lir::OpCmpMemImmBranch(ConditionCode cond, int temp_reg, int base_reg,
+                                   int offset, int check_value, LIR* target) {
+    NewLIR3(IS_SIMM8(check_value) ? kX86Cmp32MI8 : kX86Cmp32MI, base_reg, offset,
+            check_value);
+    LIR* branch = OpCondBranch(cond, target);
+    return branch;
+}
+
 }  // namespace art