X86: Assembler support for near labels

The optimizing compiler uses 32 bit relative jumps for all forward
jumps, just in case the offset is too large to fit in one byte.  Some of
the generated code knows that the jumps will in fact fit.

Add a 'NearLabel' class to the x86 and x86_64 assemblers.  This will be
used to generate known short forward branches.

Add jecxz/jrcxz instructions, which only handle a short offset.  They
will be used for intrinsics.

Add tests for the new instructions and NearLabel.

Change-Id: I11177f36394d35d63b32364b0e6289ee6d97de46
Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>
diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h
index 3097cd5..64d76b8 100644
--- a/compiler/utils/assembler.h
+++ b/compiler/utils/assembler.h
@@ -53,9 +53,11 @@
 }
 namespace x86 {
   class X86Assembler;
+  class NearLabel;
 }
 namespace x86_64 {
   class X86_64Assembler;
+  class NearLabel;
 }
 
 class ExternalLabel {
@@ -126,7 +128,9 @@
   friend class mips::MipsAssembler;
   friend class mips64::Mips64Assembler;
   friend class x86::X86Assembler;
+  friend class x86::NearLabel;
   friend class x86_64::X86_64Assembler;
+  friend class x86_64::NearLabel;
 
   DISALLOW_COPY_AND_ASSIGN(Label);
 };
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 9b3d792..a03f857 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1510,6 +1510,38 @@
 }
 
 
+void X86Assembler::j(Condition condition, NearLabel* label) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  if (label->IsBound()) {
+    static const int kShortSize = 2;
+    int offset = label->Position() - buffer_.Size();
+    CHECK_LE(offset, 0);
+    CHECK(IsInt<8>(offset - kShortSize));
+    EmitUint8(0x70 + condition);
+    EmitUint8((offset - kShortSize) & 0xFF);
+  } else {
+    EmitUint8(0x70 + condition);
+    EmitLabelLink(label);
+  }
+}
+
+
+void X86Assembler::jecxz(NearLabel* label) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  if (label->IsBound()) {
+    static const int kShortSize = 2;
+    int offset = label->Position() - buffer_.Size();
+    CHECK_LE(offset, 0);
+    CHECK(IsInt<8>(offset - kShortSize));
+    EmitUint8(0xE3);
+    EmitUint8((offset - kShortSize) & 0xFF);
+  } else {
+    EmitUint8(0xE3);
+    EmitLabelLink(label);
+  }
+}
+
+
 void X86Assembler::jmp(Register reg) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xFF);
@@ -1543,6 +1575,22 @@
 }
 
 
+void X86Assembler::jmp(NearLabel* label) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  if (label->IsBound()) {
+    static const int kShortSize = 2;
+    int offset = label->Position() - buffer_.Size();
+    CHECK_LE(offset, 0);
+    CHECK(IsInt<8>(offset - kShortSize));
+    EmitUint8(0xEB);
+    EmitUint8((offset - kShortSize) & 0xFF);
+  } else {
+    EmitUint8(0xEB);
+    EmitLabelLink(label);
+  }
+}
+
+
 void X86Assembler::repne_scasw() {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
@@ -1675,6 +1723,21 @@
 }
 
 
+void X86Assembler::Bind(NearLabel* label) {
+  int bound = buffer_.Size();
+  CHECK(!label->IsBound());  // Labels can only be bound once.
+  while (label->IsLinked()) {
+    int position = label->LinkPosition();
+    uint8_t delta = buffer_.Load<uint8_t>(position);
+    int offset = bound - (position + 1);
+    CHECK(IsInt<8>(offset));
+    buffer_.Store<int8_t>(position, offset);
+    label->position_ = delta != 0u ? label->position_ - delta : 0;
+  }
+  label->BindTo(bound);
+}
+
+
 void X86Assembler::EmitOperand(int reg_or_opcode, const Operand& operand) {
   CHECK_GE(reg_or_opcode, 0);
   CHECK_LT(reg_or_opcode, 8);
@@ -1736,6 +1799,21 @@
 }
 
 
+void X86Assembler::EmitLabelLink(NearLabel* label) {
+  CHECK(!label->IsBound());
+  int position = buffer_.Size();
+  if (label->IsLinked()) {
+    // Save the delta in the byte that we have to play with.
+    uint32_t delta = position - label->LinkPosition();
+    CHECK(IsUint<8>(delta));
+    EmitUint8(delta & 0xFF);
+  } else {
+    EmitUint8(0);
+  }
+  label->LinkTo(position);
+}
+
+
 void X86Assembler::EmitGenericShift(int reg_or_opcode,
                                     const Operand& operand,
                                     const Immediate& imm) {
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index a9227f3..0c90f28 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -203,6 +203,30 @@
 };
 
 
+// This is equivalent to the Label class, used in a slightly different context. We
+// inherit the functionality of the Label class, but prevent unintended
+// derived-to-base conversions by making the base class private.
+class NearLabel : private Label {
+ public:
+  NearLabel() : Label() {}
+
+  // Expose the Label routines that we need.
+  using Label::Position;
+  using Label::LinkPosition;
+  using Label::IsBound;
+  using Label::IsUnused;
+  using Label::IsLinked;
+
+ private:
+  using Label::BindTo;
+  using Label::LinkTo;
+
+  friend class x86::X86Assembler;
+
+  DISALLOW_COPY_AND_ASSIGN(NearLabel);
+};
+
+
 class X86Assembler FINAL : public Assembler {
  public:
   X86Assembler() {}
@@ -464,10 +488,13 @@
   void hlt();
 
   void j(Condition condition, Label* label);
+  void j(Condition condition, NearLabel* label);
+  void jecxz(NearLabel* label);
 
   void jmp(Register reg);
   void jmp(const Address& address);
   void jmp(Label* label);
+  void jmp(NearLabel* label);
 
   void repne_scasw();
   void repe_cmpsw();
@@ -506,6 +533,7 @@
   int PreferredLoopAlignment() { return 16; }
   void Align(int alignment, int offset);
   void Bind(Label* label);
+  void Bind(NearLabel* label);
 
   //
   // Overridden common assembler high-level functionality
@@ -652,6 +680,7 @@
   void EmitComplex(int rm, const Operand& operand, const Immediate& immediate);
   void EmitLabel(Label* label, int instruction_size);
   void EmitLabelLink(Label* label);
+  void EmitLabelLink(NearLabel* label);
 
   void EmitGenericShift(int rm, const Operand& operand, const Immediate& imm);
   void EmitGenericShift(int rm, const Operand& operand, Register shifter);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index 731b5f4..9ac54af 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -243,4 +243,43 @@
   DriverStr(expected, "bsrl_address");
 }
 
+/////////////////
+// Near labels //
+/////////////////
+
+TEST_F(AssemblerX86Test, Jecxz) {
+  x86::NearLabel target;
+  GetAssembler()->jecxz(&target);
+  GetAssembler()->addl(x86::EDI, x86::Address(x86::ESP, 4));
+  GetAssembler()->Bind(&target);
+  const char* expected =
+    "jecxz 1f\n"
+    "addl 4(%ESP),%EDI\n"
+    "1:\n";
+
+  DriverStr(expected, "jecxz");
+}
+
+TEST_F(AssemblerX86Test, NearLabel) {
+  // Test both forward and backward branches.
+  x86::NearLabel start, target;
+  GetAssembler()->Bind(&start);
+  GetAssembler()->j(x86::kEqual, &target);
+  GetAssembler()->jmp(&target);
+  GetAssembler()->jecxz(&target);
+  GetAssembler()->addl(x86::EDI, x86::Address(x86::ESP, 4));
+  GetAssembler()->Bind(&target);
+  GetAssembler()->j(x86::kNotEqual, &start);
+  GetAssembler()->jmp(&start);
+  const char* expected =
+    "1: je 2f\n"
+    "jmp 2f\n"
+    "jecxz 2f\n"
+    "addl 4(%ESP),%EDI\n"
+    "2: jne 1b\n"
+    "jmp 1b\n";
+
+  DriverStr(expected, "near_label");
+}
+
 }  // namespace art
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index dc61c99..88ea990 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1971,6 +1971,38 @@
 }
 
 
+void X86_64Assembler::j(Condition condition, NearLabel* label) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  if (label->IsBound()) {
+    static const int kShortSize = 2;
+    int offset = label->Position() - buffer_.Size();
+    CHECK_LE(offset, 0);
+    CHECK(IsInt<8>(offset - kShortSize));
+    EmitUint8(0x70 + condition);
+    EmitUint8((offset - kShortSize) & 0xFF);
+  } else {
+    EmitUint8(0x70 + condition);
+    EmitLabelLink(label);
+  }
+}
+
+
+void X86_64Assembler::jrcxz(NearLabel* label) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  if (label->IsBound()) {
+    static const int kShortSize = 2;
+    int offset = label->Position() - buffer_.Size();
+    CHECK_LE(offset, 0);
+    CHECK(IsInt<8>(offset - kShortSize));
+    EmitUint8(0xE3);
+    EmitUint8((offset - kShortSize) & 0xFF);
+  } else {
+    EmitUint8(0xE3);
+    EmitLabelLink(label);
+  }
+}
+
+
 void X86_64Assembler::jmp(CpuRegister reg) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(reg);
@@ -2006,6 +2038,22 @@
 }
 
 
+void X86_64Assembler::jmp(NearLabel* label) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  if (label->IsBound()) {
+    static const int kShortSize = 2;
+    int offset = label->Position() - buffer_.Size();
+    CHECK_LE(offset, 0);
+    CHECK(IsInt<8>(offset - kShortSize));
+    EmitUint8(0xEB);
+    EmitUint8((offset - kShortSize) & 0xFF);
+  } else {
+    EmitUint8(0xEB);
+    EmitLabelLink(label);
+  }
+}
+
+
 void X86_64Assembler::rep_movsw() {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
@@ -2187,6 +2235,21 @@
 }
 
 
+void X86_64Assembler::Bind(NearLabel* label) {
+  int bound = buffer_.Size();
+  CHECK(!label->IsBound());  // Labels can only be bound once.
+  while (label->IsLinked()) {
+    int position = label->LinkPosition();
+    uint8_t delta = buffer_.Load<uint8_t>(position);
+    int offset = bound - (position + 1);
+    CHECK(IsInt<8>(offset));
+    buffer_.Store<int8_t>(position, offset);
+    label->position_ = delta != 0u ? label->position_ - delta : 0;
+  }
+  label->BindTo(bound);
+}
+
+
 void X86_64Assembler::EmitOperand(uint8_t reg_or_opcode, const Operand& operand) {
   CHECK_GE(reg_or_opcode, 0);
   CHECK_LT(reg_or_opcode, 8);
@@ -2256,6 +2319,21 @@
 }
 
 
+void X86_64Assembler::EmitLabelLink(NearLabel* label) {
+  CHECK(!label->IsBound());
+  int position = buffer_.Size();
+  if (label->IsLinked()) {
+    // Save the delta in the byte that we have to play with.
+    uint32_t delta = position - label->LinkPosition();
+    CHECK(IsUint<8>(delta));
+    EmitUint8(delta & 0xFF);
+  } else {
+    EmitUint8(0);
+  }
+  label->LinkTo(position);
+}
+
+
 void X86_64Assembler::EmitGenericShift(bool wide,
                                        int reg_or_opcode,
                                        CpuRegister reg,
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index da42213..c38aba5 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -302,6 +302,30 @@
 };
 
 
+// This is equivalent to the Label class, used in a slightly different context. We
+// inherit the functionality of the Label class, but prevent unintended
+// derived-to-base conversions by making the base class private.
+class NearLabel : private Label {
+ public:
+  NearLabel() : Label() {}
+
+  // Expose the Label routines that we need.
+  using Label::Position;
+  using Label::LinkPosition;
+  using Label::IsBound;
+  using Label::IsUnused;
+  using Label::IsLinked;
+
+ private:
+  using Label::BindTo;
+  using Label::LinkTo;
+
+  friend class x86_64::X86_64Assembler;
+
+  DISALLOW_COPY_AND_ASSIGN(NearLabel);
+};
+
+
 class X86_64Assembler FINAL : public Assembler {
  public:
   X86_64Assembler() {}
@@ -588,10 +612,13 @@
   void hlt();
 
   void j(Condition condition, Label* label);
+  void j(Condition condition, NearLabel* label);
+  void jrcxz(NearLabel* label);
 
   void jmp(CpuRegister reg);
   void jmp(const Address& address);
   void jmp(Label* label);
+  void jmp(NearLabel* label);
 
   X86_64Assembler* lock();
   void cmpxchgl(const Address& address, CpuRegister reg);
@@ -639,6 +666,7 @@
   int PreferredLoopAlignment() { return 16; }
   void Align(int alignment, int offset);
   void Bind(Label* label);
+  void Bind(NearLabel* label);
 
   //
   // Overridden common assembler high-level functionality
@@ -809,6 +837,7 @@
   void EmitComplex(uint8_t rm, const Operand& operand, const Immediate& immediate);
   void EmitLabel(Label* label, int instruction_size);
   void EmitLabelLink(Label* label);
+  void EmitLabelLink(NearLabel* label);
 
   void EmitGenericShift(bool wide, int rm, CpuRegister reg, const Immediate& imm);
   void EmitGenericShift(bool wide, int rm, CpuRegister operand, CpuRegister shifter);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 8673f03..9e64b47 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -1179,6 +1179,47 @@
   DriverStr(expected, "bsrq_address");
 }
 
+/////////////////
+// Near labels //
+/////////////////
+
+TEST_F(AssemblerX86_64Test, Jrcxz) {
+  x86_64::NearLabel target;
+  GetAssembler()->jrcxz(&target);
+  GetAssembler()->addl(x86_64::CpuRegister(x86_64::RDI),
+                       x86_64::Address(x86_64::CpuRegister(x86_64::RSP), 4));
+  GetAssembler()->Bind(&target);
+  const char* expected =
+    "jrcxz 1f\n"
+    "addl 4(%RSP),%EDI\n"
+    "1:\n";
+
+  DriverStr(expected, "jrcxz");
+}
+
+TEST_F(AssemblerX86_64Test, NearLabel) {
+  // Test both forward and backward branches.
+  x86_64::NearLabel start, target;
+  GetAssembler()->Bind(&start);
+  GetAssembler()->j(x86_64::kEqual, &target);
+  GetAssembler()->jmp(&target);
+  GetAssembler()->jrcxz(&target);
+  GetAssembler()->addl(x86_64::CpuRegister(x86_64::RDI),
+                       x86_64::Address(x86_64::CpuRegister(x86_64::RSP), 4));
+  GetAssembler()->Bind(&target);
+  GetAssembler()->j(x86_64::kNotEqual, &start);
+  GetAssembler()->jmp(&start);
+  const char* expected =
+    "1: je 2f\n"
+    "jmp 2f\n"
+    "jrcxz 2f\n"
+    "addl 4(%RSP),%EDI\n"
+    "2: jne 1b\n"
+    "jmp 1b\n";
+
+  DriverStr(expected, "near_label");
+}
+
 std::string setcc_test_fn(AssemblerX86_64Test::Base* assembler_test,
                           x86_64::X86_64Assembler* assembler) {
   // From Condition