Implement FCM{EQ,GE,GT}, FAC{GE,GT} (vector).


git-svn-id: svn://svn.valgrind.org/vex/trunk@2842 8f6e269a-dfd6-0310-a8e1-e2731360e62c
diff --git a/priv/guest_arm64_toIR.c b/priv/guest_arm64_toIR.c
index 9b895e5..6dde926 100644
--- a/priv/guest_arm64_toIR.c
+++ b/priv/guest_arm64_toIR.c
@@ -5751,6 +5751,85 @@
       }
    }
 
+   /* ------------ FCM{EQ,GE,GT}, FAC{GE,GT} (vector) ------------ */
+   /* 31  28      22   20 15     9 4                  case
+      0q1 01110 0 sz 1 m  111011 n d  FACGE Vd, Vn, Vm
+      0q1 01110 1 sz 1 m  111011 n d  FACGT Vd, Vn, Vm
+      0q0 01110 0 sz 1 m  111001 n d  FCMEQ Vd, Vn, Vm
+      0q1 01110 0 sz 1 m  111001 n d  FCMGE Vd, Vn, Vm
+      0q1 01110 1 sz 1 m  111001 n d  FCMGT Vd, Vn, Vm
+   */
+   if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0) && INSN(21,21) == 1
+       && INSN(15,12) == BITS4(1,1,1,0) && INSN(10,10) == 1) {
+      Bool isQ   = INSN(30,30) == 1;
+      UInt U     = INSN(29,29);
+      UInt E     = INSN(23,23);
+      Bool isF64 = INSN(22,22) == 1;
+      UInt ac    = INSN(11,11);
+      UInt mm    = INSN(20,16);
+      UInt nn    = INSN(9,5);
+      UInt dd    = INSN(4,0);
+      /* */
+      UInt   EUac   = (E << 2) | (U << 1) | ac;
+      IROp   opABS  = Iop_INVALID;
+      IROp   opCMP  = Iop_INVALID;
+      IRType laneTy = Ity_INVALID;
+      Bool   zeroHI = False;
+      Bool   swap   = True;
+      const HChar* arr = "??";
+      const HChar* nm  = "??";
+      Bool ok
+         = getLaneInfo_Q_SZ(NULL, &laneTy, NULL, &zeroHI, &arr, isQ, isF64);
+      if (ok) {
+         vassert((isF64 && laneTy == Ity_F64) || (!isF64 && laneTy == Ity_F32));
+         switch (EUac) {
+            case BITS3(0,0,0):
+               nm    = "fcmeq";
+               opCMP = isF64 ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4;
+               swap  = False;
+               break;
+            case BITS3(0,1,0):
+               nm    = "fcmge";
+               opCMP = isF64 ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
+               break;
+            case BITS3(0,1,1):
+               nm    = "facge";
+               opCMP = isF64 ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
+               opABS = isF64 ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
+               break;
+            case BITS3(1,1,0):
+               nm    = "fcmgt";
+               opCMP = isF64 ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
+               break;
+            case BITS3(1,1,1):
+               nm    = "fcagt";
+               opCMP = isF64 ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
+               opABS = isF64 ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
+               break;
+            default:
+               break;
+         }
+      }
+      if (opCMP != Iop_INVALID) {
+         IRExpr* argN = getQReg128(nn);
+         IRExpr* argM = getQReg128(mm);
+         if (opABS != Iop_INVALID) {
+            argN = unop(opABS, argN);
+            argM = unop(opABS, argM);
+         }
+         IRExpr* res = swap ? binop(opCMP, argM, argN)
+                            : binop(opCMP, argN, argM);
+         if (zeroHI) {
+            res = unop(Iop_ZeroHI64ofV128, res);
+         }
+         putQReg128(dd, res);
+         DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
+             nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
+         return True;
+      }
+      /* else fall through */
+   }
+
    /* -------------------- FCVTN -------------------- */
    /* 31  28    23  20    15     9 4
       0q0 01110 0s1 00001 011010 n d  FCVTN Vd, Vn
diff --git a/priv/host_arm64_defs.c b/priv/host_arm64_defs.c
index a26f577..dd37053 100644
--- a/priv/host_arm64_defs.c
+++ b/priv/host_arm64_defs.c
@@ -885,6 +885,12 @@
       case ARM64vecb_CMEQ32x4: *nm = "cmeq"; *ar = "4s";  return;
       case ARM64vecb_CMEQ16x8: *nm = "cmeq"; *ar = "8h";  return;
       case ARM64vecb_CMEQ8x16: *nm = "cmeq"; *ar = "16b"; return;
+      case ARM64vecb_FCMEQ64x2: *nm = "fcmeq"; *ar = "2d"; return;
+      case ARM64vecb_FCMEQ32x4: *nm = "fcmeq"; *ar = "4s"; return;
+      case ARM64vecb_FCMGE64x2: *nm = "fcmge"; *ar = "2d"; return;
+      case ARM64vecb_FCMGE32x4: *nm = "fcmge"; *ar = "4s"; return;
+      case ARM64vecb_FCMGT64x2: *nm = "fcmgt"; *ar = "2d"; return;
+      case ARM64vecb_FCMGT32x4: *nm = "fcmgt"; *ar = "4s"; return;
       default: vpanic("showARM64VecBinOp");
    }
 }
@@ -4955,6 +4961,15 @@
 
             011 01110 11 1 m  001101 n d   CMHI Vd.2d, Vn.2d, Vm.2d  >u, ATC
             010 01110 11 1 m  001101 n d   CMGT Vd.2d, Vn.2d, Vm.2d  >s, ATC
+
+            010 01110 01 1 m  111001 n d   FCMEQ Vd.2d, Vn.2d, Vm.2d
+            010 01110 00 1 m  111001 n d   FCMEQ Vd.4s, Vn.4s, Vm.4s
+
+            011 01110 01 1 m  111001 n d   FCMGE Vd.2d, Vn.2d, Vm.2d
+            011 01110 00 1 m  111001 n d   FCMGE Vd.4s, Vn.4s, Vm.4s
+
+            011 01110 11 1 m  111001 n d   FCMGT Vd.2d, Vn.2d, Vm.2d
+            011 01110 10 1 m  111001 n d   FCMGT Vd.4s, Vn.4s, Vm.4s
          */
          UInt vD = qregNo(i->ARM64in.VBinV.dst);
          UInt vN = qregNo(i->ARM64in.VBinV.argL);
@@ -5072,6 +5087,26 @@
                *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X100011, vN, vD);
                break;
 
+            case ARM64vecb_FCMEQ64x2:
+               *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X111001, vN, vD);
+               break;
+            case ARM64vecb_FCMEQ32x4:
+               *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X111001, vN, vD);
+               break;
+
+            case ARM64vecb_FCMGE64x2:
+               *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X111001, vN, vD);
+               break;
+            case ARM64vecb_FCMGE32x4:
+               *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X111001, vN, vD);
+               break;
+
+            case ARM64vecb_FCMGT64x2:
+               *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X111001, vN, vD);
+               break;
+            case ARM64vecb_FCMGT32x4:
+               *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X111001, vN, vD);
+               break;
             default:
                goto bad;
          }
@@ -5091,6 +5126,9 @@
             case ARM64vecu_FABS64x2:
                *p++ = X_3_8_5_6_5_5(X010, X01110111, X00000, X111110, vN, vD);
                break;
+            case ARM64vecu_FABS32x4:
+               *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X111110, vN, vD);
+               break;
             case ARM64vecu_FNEG64x2:
                *p++ = X_3_8_5_6_5_5(X011, X01110111, X00000, X111110, vN, vD);
                break;
diff --git a/priv/host_arm64_defs.h b/priv/host_arm64_defs.h
index ede7e55..bee6d2c 100644
--- a/priv/host_arm64_defs.h
+++ b/priv/host_arm64_defs.h
@@ -342,6 +342,12 @@
       ARM64vecb_CMEQ32x4,
       ARM64vecb_CMEQ16x8,
       ARM64vecb_CMEQ8x16,
+      ARM64vecb_FCMEQ64x2,
+      ARM64vecb_FCMEQ32x4,
+      ARM64vecb_FCMGE64x2,
+      ARM64vecb_FCMGE32x4,
+      ARM64vecb_FCMGT64x2,
+      ARM64vecb_FCMGT32x4,
       ARM64vecb_INVALID
    }
    ARM64VecBinOp;
diff --git a/priv/host_arm64_isel.c b/priv/host_arm64_isel.c
index d50a6db..712d161 100644
--- a/priv/host_arm64_isel.c
+++ b/priv/host_arm64_isel.c
@@ -4401,6 +4401,7 @@
       switch (e->Iex.Unop.op) {
          case Iop_NotV128:
          case Iop_Abs64Fx2:
+         case Iop_Abs32Fx4:
          case Iop_Neg64Fx2: {
             HReg res = newVRegV(env);
             HReg arg = iselV128Expr(env, e->Iex.Unop.arg);
@@ -4408,6 +4409,7 @@
             switch (e->Iex.Unop.op) {
                case Iop_NotV128:  op = ARM64vecu_NOT;      break;
                case Iop_Abs64Fx2: op = ARM64vecu_FABS64x2; break;
+               case Iop_Abs32Fx4: op = ARM64vecu_FABS32x4; break;
                case Iop_Neg64Fx2: op = ARM64vecu_FNEG64x2; break;
                default: vassert(0);
             }
@@ -4921,39 +4923,57 @@
          case Iop_Sub16x8:
          case Iop_Mul32x4:
          case Iop_Mul16x8:
-         case Iop_CmpEQ64x2: {
+         case Iop_CmpEQ64x2:
+         case Iop_CmpEQ64Fx2:
+         case Iop_CmpEQ32Fx4:
+         case Iop_CmpLE64Fx2:
+         case Iop_CmpLE32Fx4:
+         case Iop_CmpLT64Fx2:
+         case Iop_CmpLT32Fx4:
+         {
             HReg res  = newVRegV(env);
             HReg argL = iselV128Expr(env, e->Iex.Binop.arg1);
             HReg argR = iselV128Expr(env, e->Iex.Binop.arg2);
+            Bool sw   = False;
             ARM64VecBinOp op = ARM64vecb_INVALID;
             switch (e->Iex.Binop.op) {
-               case Iop_AndV128:   op = ARM64vecb_AND; break;
-               case Iop_OrV128:    op = ARM64vecb_ORR; break;
-               case Iop_XorV128:   op = ARM64vecb_XOR; break;
-               case Iop_Max32Ux4:  op = ARM64vecb_UMAX32x4; break;
-               case Iop_Max16Ux8:  op = ARM64vecb_UMAX16x8; break;
-               case Iop_Max8Ux16:  op = ARM64vecb_UMAX8x16; break;
-               case Iop_Min32Ux4:  op = ARM64vecb_UMIN32x4; break;
-               case Iop_Min16Ux8:  op = ARM64vecb_UMIN16x8; break;
-               case Iop_Min8Ux16:  op = ARM64vecb_UMIN8x16; break;
-               case Iop_Max32Sx4:  op = ARM64vecb_SMAX32x4; break;
-               case Iop_Max16Sx8:  op = ARM64vecb_SMAX16x8; break;
-               case Iop_Max8Sx16:  op = ARM64vecb_SMAX8x16; break;
-               case Iop_Min32Sx4:  op = ARM64vecb_SMIN32x4; break;
-               case Iop_Min16Sx8:  op = ARM64vecb_SMIN16x8; break;
-               case Iop_Min8Sx16:  op = ARM64vecb_SMIN8x16; break;
-               case Iop_Add64x2:   op = ARM64vecb_ADD64x2; break;
-               case Iop_Add32x4:   op = ARM64vecb_ADD32x4; break;
-               case Iop_Add16x8:   op = ARM64vecb_ADD16x8; break;
-               case Iop_Sub64x2:   op = ARM64vecb_SUB64x2; break;
-               case Iop_Sub32x4:   op = ARM64vecb_SUB32x4; break;
-               case Iop_Sub16x8:   op = ARM64vecb_SUB16x8; break;
-               case Iop_Mul32x4:   op = ARM64vecb_MUL32x4; break;
-               case Iop_Mul16x8:   op = ARM64vecb_MUL16x8; break;
-               case Iop_CmpEQ64x2: op = ARM64vecb_CMEQ64x2; break;
+               case Iop_AndV128:    op = ARM64vecb_AND; break;
+               case Iop_OrV128:     op = ARM64vecb_ORR; break;
+               case Iop_XorV128:    op = ARM64vecb_XOR; break;
+               case Iop_Max32Ux4:   op = ARM64vecb_UMAX32x4; break;
+               case Iop_Max16Ux8:   op = ARM64vecb_UMAX16x8; break;
+               case Iop_Max8Ux16:   op = ARM64vecb_UMAX8x16; break;
+               case Iop_Min32Ux4:   op = ARM64vecb_UMIN32x4; break;
+               case Iop_Min16Ux8:   op = ARM64vecb_UMIN16x8; break;
+               case Iop_Min8Ux16:   op = ARM64vecb_UMIN8x16; break;
+               case Iop_Max32Sx4:   op = ARM64vecb_SMAX32x4; break;
+               case Iop_Max16Sx8:   op = ARM64vecb_SMAX16x8; break;
+               case Iop_Max8Sx16:   op = ARM64vecb_SMAX8x16; break;
+               case Iop_Min32Sx4:   op = ARM64vecb_SMIN32x4; break;
+               case Iop_Min16Sx8:   op = ARM64vecb_SMIN16x8; break;
+               case Iop_Min8Sx16:   op = ARM64vecb_SMIN8x16; break;
+               case Iop_Add64x2:    op = ARM64vecb_ADD64x2; break;
+               case Iop_Add32x4:    op = ARM64vecb_ADD32x4; break;
+               case Iop_Add16x8:    op = ARM64vecb_ADD16x8; break;
+               case Iop_Sub64x2:    op = ARM64vecb_SUB64x2; break;
+               case Iop_Sub32x4:    op = ARM64vecb_SUB32x4; break;
+               case Iop_Sub16x8:    op = ARM64vecb_SUB16x8; break;
+               case Iop_Mul32x4:    op = ARM64vecb_MUL32x4; break;
+               case Iop_Mul16x8:    op = ARM64vecb_MUL16x8; break;
+               case Iop_CmpEQ64x2:  op = ARM64vecb_CMEQ64x2; break;
+               case Iop_CmpEQ64Fx2: op = ARM64vecb_FCMEQ64x2; break;
+               case Iop_CmpEQ32Fx4: op = ARM64vecb_FCMEQ32x4; break;
+               case Iop_CmpLE64Fx2: op = ARM64vecb_FCMGE64x2; sw = True; break;
+               case Iop_CmpLE32Fx4: op = ARM64vecb_FCMGE32x4; sw = True; break;
+               case Iop_CmpLT64Fx2: op = ARM64vecb_FCMGT64x2; sw = True; break;
+               case Iop_CmpLT32Fx4: op = ARM64vecb_FCMGT32x4; sw = True; break;
                default: vassert(0);
             }
-            addInstr(env, ARM64Instr_VBinV(op, res, argL, argR));
+            if (sw) {
+               addInstr(env, ARM64Instr_VBinV(op, res, argR, argL));
+            } else {
+               addInstr(env, ARM64Instr_VBinV(op, res, argL, argR));
+            }
             return res;
          }
 //ZZ          case Iop_Add32Fx4: {