Add TEST8ri_NOREX pseudo to constrain sub_8bit_hi copies.

In 64-bit mode, sub_8bit_hi sub-registers can only be used by NOREX
instructions. The COPY created from the EXTRACT_SUBREG DAG node cannot
target all GR8 registers, only those in GR8_NOREX.

TO enforce this, we ensure that all instructions using the
EXTRACT_SUBREG are GR8_NOREX constrained.

This fixes PR11088.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@141499 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 015ebf5..02b0ff2 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -2173,9 +2173,10 @@
         SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl,
                                                         MVT::i8, Reg);
 
-        // Emit a testb. No special NOREX tricks are needed since there's
-        // only one GPR operand!
-        return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32,
+        // Emit a testb.  The EXTRACT_SUBREG becomes a COPY that can only
+        // target GR8_NOREX registers, so make sure the register class is
+        // forced.
+        return CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl, MVT::i32,
                                       Subreg, ShiftedImm);
       }
 
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
index 6d2c534..f6ed722 100644
--- a/lib/Target/X86/X86InstrArithmetic.td
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -1145,5 +1145,11 @@
                            "{$src, %eax|EAX, $src}">;
   def TEST64i32  : BinOpAI<0xA8, "test", Xi64, RAX,
                            "{$src, %rax|RAX, $src}">;
+
+  // When testing the result of EXTRACT_SUBREG sub_8bit_hi, make sure the
+  // register class is constrained to GR8_NOREX.
+  let isPseudo = 1 in
+  def TEST8ri_NOREX : I<0, Pseudo, (outs), (ins GR8_NOREX:$src, i8imm:$mask),
+                        "", []>;
 }                          
 
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 86fb3a5..3a02de0 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -2421,6 +2421,9 @@
   switch (MI->getOpcode()) {
   case X86::V_SET0:
     return Expand2AddrUndef(MI, get(HasAVX ? X86::VPXORrr : X86::PXORrr));
+  case X86::TEST8ri_NOREX:
+    MI->setDesc(get(X86::TEST8ri));
+    return true;
   }
   return false;
 }
diff --git a/test/CodeGen/X86/norex-subreg.ll b/test/CodeGen/X86/norex-subreg.ll
index 0850fe9..2c529fd 100644
--- a/test/CodeGen/X86/norex-subreg.ll
+++ b/test/CodeGen/X86/norex-subreg.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -O0 < %s
+; RUN: llc < %s
 target triple = "x86_64-apple-macosx10.7"
 
 ; This test case extracts a sub_8bit_hi sub-register:
@@ -37,3 +38,43 @@
   store i16 %10, i16* undef, align 1
   ret void
 }
+
+; This test case extracts a sub_8bit_hi sub-register:
+;
+;       %vreg2<def> = COPY %vreg1:sub_8bit_hi; GR8:%vreg2 GR64_ABCD:%vreg1
+;       TEST8ri %vreg2, 1, %EFLAGS<imp-def>; GR8:%vreg2
+;
+; %vreg2 must be constrained to GR8_NOREX, or the COPY could become impossible.
+;
+; PR11088
+
+define fastcc i32 @g(i64 %FB) nounwind uwtable readnone align 2 {
+entry:
+  %and32 = and i64 %FB, 256
+  %cmp33 = icmp eq i64 %and32, 0
+  %Features.6.or35 = select i1 %cmp33, i32 0, i32 undef
+  %cmp38 = icmp eq i64 undef, 0
+  %or40 = or i32 %Features.6.or35, 4
+  %Features.8 = select i1 %cmp38, i32 %Features.6.or35, i32 %or40
+  %and42 = and i64 %FB, 32
+  %or45 = or i32 %Features.8, 2
+  %cmp43 = icmp eq i64 %and42, 0
+  %Features.8.or45 = select i1 %cmp43, i32 %Features.8, i32 %or45
+  %and47 = and i64 %FB, 8192
+  %cmp48 = icmp eq i64 %and47, 0
+  %or50 = or i32 %Features.8.or45, 32
+  %Features.10 = select i1 %cmp48, i32 %Features.8.or45, i32 %or50
+  %or55 = or i32 %Features.10, 64
+  %Features.10.or55 = select i1 undef, i32 %Features.10, i32 %or55
+  %and57 = lshr i64 %FB, 2
+  %and57.tr = trunc i64 %and57 to i32
+  %or60 = and i32 %and57.tr, 1
+  %Features.12 = or i32 %Features.10.or55, %or60
+  %and62 = and i64 %FB, 128
+  %or65 = or i32 %Features.12, 8
+  %cmp63 = icmp eq i64 %and62, 0
+  %Features.12.or65 = select i1 %cmp63, i32 %Features.12, i32 %or65
+  %Features.14 = select i1 undef, i32 undef, i32 %Features.12.or65
+  %Features.16 = select i1 undef, i32 undef, i32 %Features.14
+  ret i32 %Features.16
+}