[optimizing compiler] x86 goodness

Implement the x86 version of
https://android-review.googlesource.com/#/c/129560/, which made some
enhancements to x86_64 code.
- Use leal to implement 3 operand adds
- Use testl rather than cmpl to 0 for registers
- Use leaq for x86_64 for adds with constant in int32_t range

Note:
- The range and register allocator tests seem quite fragile.  I had to
  change ADD_INT_LIT8 to XOR_INT_LIT8 for the register allocator test to
  get the code to run.  It seems like this is a bit hard-coded to
  expected code generation sequences.  I also changes BuildTwoAdds to
  BuildTwoSubs for the same reason.
- For the live range test, I just changed the expected output, as the
  Locations were different.

Change-Id: I402f2e95ddc8be4eb0befb3dae1b29feadfa29ab
Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 7b35cfd..116dd15 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -819,7 +819,7 @@
         // Materialized condition, compare against 0.
         Location lhs = if_instr->GetLocations()->InAt(0);
         if (lhs.IsRegister()) {
-          __ cmpl(lhs.AsRegister<Register>(), Immediate(0));
+          __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
         } else {
           __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0));
         }
@@ -836,9 +836,12 @@
       if (rhs.IsRegister()) {
         __ cmpl(lhs.AsRegister<Register>(), rhs.AsRegister<Register>());
       } else if (rhs.IsConstant()) {
-        HIntConstant* instruction = rhs.GetConstant()->AsIntConstant();
-        Immediate imm(instruction->AsIntConstant()->GetValue());
-        __ cmpl(lhs.AsRegister<Register>(), imm);
+        int32_t constant = rhs.GetConstant()->AsIntConstant()->GetValue();
+        if (constant == 0) {
+          __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
+        } else {
+          __ cmpl(lhs.AsRegister<Register>(), Immediate(constant));
+        }
       } else {
         __ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex()));
       }
@@ -914,16 +917,19 @@
     Register reg = locations->Out().AsRegister<Register>();
     // Clear register: setcc only sets the low byte.
     __ xorl(reg, reg);
-    if (locations->InAt(1).IsRegister()) {
-      __ cmpl(locations->InAt(0).AsRegister<Register>(),
-              locations->InAt(1).AsRegister<Register>());
-    } else if (locations->InAt(1).IsConstant()) {
-      HConstant* instruction = locations->InAt(1).GetConstant();
-      Immediate imm(CodeGenerator::GetInt32ValueOf(instruction));
-      __ cmpl(locations->InAt(0).AsRegister<Register>(), imm);
+    Location lhs = locations->InAt(0);
+    Location rhs = locations->InAt(1);
+    if (rhs.IsRegister()) {
+      __ cmpl(lhs.AsRegister<Register>(), rhs.AsRegister<Register>());
+    } else if (rhs.IsConstant()) {
+      int32_t constant = rhs.GetConstant()->AsIntConstant()->GetValue();
+      if (constant == 0) {
+        __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
+      } else {
+      __ cmpl(lhs.AsRegister<Register>(), Immediate(constant));
+      }
     } else {
-      __ cmpl(locations->InAt(0).AsRegister<Register>(),
-              Address(ESP, locations->InAt(1).GetStackIndex()));
+      __ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex()));
     }
     __ setb(X86Condition(comp->GetCondition()), reg);
   }
@@ -1798,7 +1804,13 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(add, LocationSummary::kNoCall);
   switch (add->GetResultType()) {
-    case Primitive::kPrimInt:
+    case Primitive::kPrimInt: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+    }
+
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::Any());
@@ -1824,15 +1836,26 @@
   LocationSummary* locations = add->GetLocations();
   Location first = locations->InAt(0);
   Location second = locations->InAt(1);
-  DCHECK(first.Equals(locations->Out()));
+  Location out = locations->Out();
+
   switch (add->GetResultType()) {
     case Primitive::kPrimInt: {
       if (second.IsRegister()) {
-        __ addl(first.AsRegister<Register>(), second.AsRegister<Register>());
+        if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
+          __ addl(out.AsRegister<Register>(), second.AsRegister<Register>());
+        } else {
+          __ leal(out.AsRegister<Register>(), Address(
+              first.AsRegister<Register>(), second.AsRegister<Register>(), TIMES_1, 0));
+          }
       } else if (second.IsConstant()) {
-        __ addl(first.AsRegister<Register>(),
-                Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
+        int32_t value = second.GetConstant()->AsIntConstant()->GetValue();
+        if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
+          __ addl(out.AsRegister<Register>(), Immediate(value));
+        } else {
+          __ leal(out.AsRegister<Register>(), Address(first.AsRegister<Register>(), value));
+        }
       } else {
+        DCHECK(first.Equals(locations->Out()));
         __ addl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
       }
       break;
@@ -3502,12 +3525,16 @@
   } else if (source.IsConstant()) {
     HConstant* constant = source.GetConstant();
     if (constant->IsIntConstant() || constant->IsNullConstant()) {
-      Immediate imm(CodeGenerator::GetInt32ValueOf(constant));
+      int32_t value = CodeGenerator::GetInt32ValueOf(constant);
       if (destination.IsRegister()) {
-        __ movl(destination.AsRegister<Register>(), imm);
+        if (value == 0) {
+          __ xorl(destination.AsRegister<Register>(), destination.AsRegister<Register>());
+        } else {
+          __ movl(destination.AsRegister<Register>(), Immediate(value));
+        }
       } else {
         DCHECK(destination.IsStackSlot()) << destination;
-        __ movl(Address(ESP, destination.GetStackIndex()), imm);
+        __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
       }
     } else {
       DCHECK(constant->IsFloatConstant());
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 74adb31..adc022a 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -1868,8 +1868,19 @@
 
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
-      locations->SetOut(Location::SameAsFirstInput());
+      // We can use a leaq or addq if the constant can fit in an immediate.
+      HInstruction* rhs = add->InputAt(1);
+      bool is_int32_constant = false;
+      if (rhs->IsLongConstant()) {
+        int64_t value = rhs->AsLongConstant()->GetValue();
+        if (static_cast<int32_t>(value) == value) {
+          is_int32_constant = true;
+        }
+      }
+      locations->SetInAt(1,
+          is_int32_constant ? Location::RegisterOrConstant(rhs) :
+                              Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
     }
 
@@ -1917,7 +1928,25 @@
     }
 
     case Primitive::kPrimLong: {
-      __ addq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+      if (second.IsRegister()) {
+        if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
+          __ addq(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+        } else {
+          __ leaq(out.AsRegister<CpuRegister>(), Address(
+              first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
+        }
+      } else {
+        DCHECK(second.IsConstant());
+        int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
+        int32_t int32_value = Low32Bits(value);
+        DCHECK_EQ(int32_value, value);
+        if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
+          __ addq(out.AsRegister<CpuRegister>(), Immediate(int32_value));
+        } else {
+          __ leaq(out.AsRegister<CpuRegister>(), Address(
+              first.AsRegister<CpuRegister>(), int32_value));
+        }
+      }
       break;
     }
 
diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc
index 17914e8..c102c4f 100644
--- a/compiler/optimizing/live_ranges_test.cc
+++ b/compiler/optimizing/live_ranges_test.cc
@@ -399,11 +399,11 @@
   LiveInterval* interval = liveness.GetInstructionFromSsaIndex(0)->GetLiveInterval();
   LiveRange* range = interval->GetFirstRange();
   ASSERT_EQ(2u, range->GetStart());
-  ASSERT_EQ(16u, range->GetEnd());
+  ASSERT_EQ(17u, range->GetEnd());
   range = range->GetNext();
   ASSERT_TRUE(range != nullptr);
   ASSERT_EQ(20u, range->GetStart());
-  ASSERT_EQ(22u, range->GetEnd());
+  ASSERT_EQ(23u, range->GetEnd());
   ASSERT_TRUE(range->GetNext() == nullptr);
 
   // Test for the 4 constant.
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index e5d06a9..b757a3b 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -322,9 +322,9 @@
 TEST(RegisterAllocatorTest, FirstRegisterUse) {
   const uint16_t data[] = THREE_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
-    Instruction::ADD_INT_LIT8 | 1 << 8, 1 << 8,
-    Instruction::ADD_INT_LIT8 | 0 << 8, 1 << 8,
-    Instruction::ADD_INT_LIT8 | 1 << 8, 1 << 8 | 1,
+    Instruction::XOR_INT_LIT8 | 1 << 8, 1 << 8,
+    Instruction::XOR_INT_LIT8 | 0 << 8, 1 << 8,
+    Instruction::XOR_INT_LIT8 | 1 << 8, 1 << 8 | 1,
     Instruction::RETURN_VOID);
 
   ArenaPool pool;
@@ -334,27 +334,27 @@
   SsaLivenessAnalysis liveness(*graph, &codegen);
   liveness.Analyze();
 
-  HAdd* first_add = graph->GetBlocks().Get(1)->GetFirstInstruction()->AsAdd();
-  HAdd* last_add = graph->GetBlocks().Get(1)->GetLastInstruction()->GetPrevious()->AsAdd();
-  ASSERT_EQ(last_add->InputAt(0), first_add);
-  LiveInterval* interval = first_add->GetLiveInterval();
-  ASSERT_EQ(interval->GetEnd(), last_add->GetLifetimePosition());
+  HXor* first_xor = graph->GetBlocks().Get(1)->GetFirstInstruction()->AsXor();
+  HXor* last_xor = graph->GetBlocks().Get(1)->GetLastInstruction()->GetPrevious()->AsXor();
+  ASSERT_EQ(last_xor->InputAt(0), first_xor);
+  LiveInterval* interval = first_xor->GetLiveInterval();
+  ASSERT_EQ(interval->GetEnd(), last_xor->GetLifetimePosition());
   ASSERT_TRUE(interval->GetNextSibling() == nullptr);
 
   // We need a register for the output of the instruction.
-  ASSERT_EQ(interval->FirstRegisterUse(), first_add->GetLifetimePosition());
+  ASSERT_EQ(interval->FirstRegisterUse(), first_xor->GetLifetimePosition());
 
   // Split at the next instruction.
-  interval = interval->SplitAt(first_add->GetLifetimePosition() + 2);
+  interval = interval->SplitAt(first_xor->GetLifetimePosition() + 2);
   // The user of the split is the last add.
-  ASSERT_EQ(interval->FirstRegisterUse(), last_add->GetLifetimePosition());
+  ASSERT_EQ(interval->FirstRegisterUse(), last_xor->GetLifetimePosition());
 
   // Split before the last add.
-  LiveInterval* new_interval = interval->SplitAt(last_add->GetLifetimePosition() - 1);
+  LiveInterval* new_interval = interval->SplitAt(last_xor->GetLifetimePosition() - 1);
   // Ensure the current interval has no register use...
   ASSERT_EQ(interval->FirstRegisterUse(), kNoLifetime);
   // And the new interval has it for the last add.
-  ASSERT_EQ(new_interval->FirstRegisterUse(), last_add->GetLifetimePosition());
+  ASSERT_EQ(new_interval->FirstRegisterUse(), last_xor->GetLifetimePosition());
 }
 
 TEST(RegisterAllocatorTest, DeadPhi) {
@@ -634,9 +634,9 @@
   }
 }
 
-static HGraph* BuildTwoAdds(ArenaAllocator* allocator,
-                            HInstruction** first_add,
-                            HInstruction** second_add) {
+static HGraph* BuildTwoSubs(ArenaAllocator* allocator,
+                            HInstruction** first_sub,
+                            HInstruction** second_sub) {
   HGraph* graph = new (allocator) HGraph(allocator);
   HBasicBlock* entry = new (allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
@@ -652,10 +652,10 @@
   graph->AddBlock(block);
   entry->AddSuccessor(block);
 
-  *first_add = new (allocator) HAdd(Primitive::kPrimInt, parameter, constant1);
-  block->AddInstruction(*first_add);
-  *second_add = new (allocator) HAdd(Primitive::kPrimInt, *first_add, constant2);
-  block->AddInstruction(*second_add);
+  *first_sub = new (allocator) HSub(Primitive::kPrimInt, parameter, constant1);
+  block->AddInstruction(*first_sub);
+  *second_sub = new (allocator) HSub(Primitive::kPrimInt, *first_sub, constant2);
+  block->AddInstruction(*second_sub);
 
   block->AddInstruction(new (allocator) HExit());
   return graph;
@@ -664,10 +664,10 @@
 TEST(RegisterAllocatorTest, SameAsFirstInputHint) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  HInstruction *first_add, *second_add;
+  HInstruction *first_sub, *second_sub;
 
   {
-    HGraph* graph = BuildTwoAdds(&allocator, &first_add, &second_add);
+    HGraph* graph = BuildTwoSubs(&allocator, &first_sub, &second_sub);
     x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
     SsaLivenessAnalysis liveness(*graph, &codegen);
     liveness.Analyze();
@@ -676,27 +676,27 @@
     register_allocator.AllocateRegisters();
 
     // Sanity check that in normal conditions, the registers are the same.
-    ASSERT_EQ(first_add->GetLiveInterval()->GetRegister(), 1);
-    ASSERT_EQ(second_add->GetLiveInterval()->GetRegister(), 1);
+    ASSERT_EQ(first_sub->GetLiveInterval()->GetRegister(), 1);
+    ASSERT_EQ(second_sub->GetLiveInterval()->GetRegister(), 1);
   }
 
   {
-    HGraph* graph = BuildTwoAdds(&allocator, &first_add, &second_add);
+    HGraph* graph = BuildTwoSubs(&allocator, &first_sub, &second_sub);
     x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
     SsaLivenessAnalysis liveness(*graph, &codegen);
     liveness.Analyze();
 
     // check that both adds get the same register.
     // Don't use UpdateOutput because output is already allocated.
-    first_add->InputAt(0)->GetLocations()->output_ = Location::RegisterLocation(2);
-    ASSERT_EQ(first_add->GetLocations()->Out().GetPolicy(), Location::kSameAsFirstInput);
-    ASSERT_EQ(second_add->GetLocations()->Out().GetPolicy(), Location::kSameAsFirstInput);
+    first_sub->InputAt(0)->GetLocations()->output_ = Location::RegisterLocation(2);
+    ASSERT_EQ(first_sub->GetLocations()->Out().GetPolicy(), Location::kSameAsFirstInput);
+    ASSERT_EQ(second_sub->GetLocations()->Out().GetPolicy(), Location::kSameAsFirstInput);
 
     RegisterAllocator register_allocator(&allocator, &codegen, liveness);
     register_allocator.AllocateRegisters();
 
-    ASSERT_EQ(first_add->GetLiveInterval()->GetRegister(), 2);
-    ASSERT_EQ(second_add->GetLiveInterval()->GetRegister(), 2);
+    ASSERT_EQ(first_sub->GetLiveInterval()->GetRegister(), 2);
+    ASSERT_EQ(second_sub->GetLiveInterval()->GetRegister(), 2);
   }
 }