amd64 front and back ends: track the change of type of Iop_Sqrt32Fx4
and Iop_Sqrt64Fx2 as introduced in r3120, in which they acquired a
rounding-mode argument.


git-svn-id: svn://svn.valgrind.org/vex/trunk@3121 8f6e269a-dfd6-0310-a8e1-e2731360e62c
diff --git a/priv/guest_amd64_toIR.c b/priv/guest_amd64_toIR.c
index f3fe669..add943d 100644
--- a/priv/guest_amd64_toIR.c
+++ b/priv/guest_amd64_toIR.c
@@ -8850,17 +8850,26 @@
    Int     alen;
    IRTemp  addr;
    UChar   rm = getUChar(delta);
+   // Sqrt32Fx4 and Sqrt64Fx2 take a rounding mode, which is faked
+   // up in the usual way.
+   Bool needsIRRM = op == Iop_Sqrt32Fx4 || op == Iop_Sqrt64Fx2;
    if (epartIsReg(rm)) {
-      putXMMReg( gregOfRexRM(pfx,rm), 
-                 unop(op, getXMMReg(eregOfRexRM(pfx,rm))) );
+      IRExpr* src = getXMMReg(eregOfRexRM(pfx,rm));
+      /* XXXROUNDINGFIXME */
+      IRExpr* res = needsIRRM ? binop(op, get_FAKE_roundingmode(), src)
+                              : unop(op, src);
+      putXMMReg( gregOfRexRM(pfx,rm), res );
       DIP("%s %s,%s\n", opname,
                         nameXMMReg(eregOfRexRM(pfx,rm)),
                         nameXMMReg(gregOfRexRM(pfx,rm)) );
       return delta+1;
    } else {
       addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
-      putXMMReg( gregOfRexRM(pfx,rm), 
-                 unop(op, loadLE(Ity_V128, mkexpr(addr))) );
+      IRExpr* src = loadLE(Ity_V128, mkexpr(addr));
+      /* XXXROUNDINGFIXME */
+      IRExpr* res = needsIRRM ? binop(op, get_FAKE_roundingmode(), src)
+                              : unop(op, src);
+      putXMMReg( gregOfRexRM(pfx,rm), res );
       DIP("%s %s,%s\n", opname,
                         dis_buf,
                         nameXMMReg(gregOfRexRM(pfx,rm)) );
@@ -23046,7 +23055,13 @@
       delta += alen;
       DIP("%s %s,%s\n", opname, dis_buf, nameXMMReg(rG));
    }
-   putYMMRegLoAndZU( rG, unop(op, mkexpr(arg)) );
+   // Sqrt32Fx4 and Sqrt64Fx2 take a rounding mode, which is faked
+   // up in the usual way.
+   Bool needsIRRM = op == Iop_Sqrt32Fx4 || op == Iop_Sqrt64Fx2;
+   /* XXXROUNDINGFIXME */
+   IRExpr* res = needsIRRM ? binop(op, get_FAKE_roundingmode(), mkexpr(arg))
+                           : unop(op, mkexpr(arg));
+   putYMMRegLoAndZU( rG, res );
    *uses_vvvv = False;
    return delta;
 }
diff --git a/priv/host_amd64_isel.c b/priv/host_amd64_isel.c
index 81bea0b..3403d54 100644
--- a/priv/host_amd64_isel.c
+++ b/priv/host_amd64_isel.c
@@ -3227,7 +3227,6 @@
 
       case Iop_RecipEst32Fx4: op = Asse_RCPF;   goto do_32Fx4_unary;
       case Iop_RSqrtEst32Fx4: op = Asse_RSQRTF; goto do_32Fx4_unary;
-      case Iop_Sqrt32Fx4:     op = Asse_SQRTF;  goto do_32Fx4_unary;
       do_32Fx4_unary:
       {
          HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
@@ -3236,15 +3235,6 @@
          return dst;
       }
 
-      case Iop_Sqrt64Fx2:  op = Asse_SQRTF;  goto do_64Fx2_unary;
-      do_64Fx2_unary:
-      {
-         HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
-         HReg dst = newVRegV(env);
-         addInstr(env, AMD64Instr_Sse64Fx2(op, arg, dst));
-         return dst;
-      }
-
       case Iop_RecipEst32F0x4: op = Asse_RCPF;   goto do_32F0x4_unary;
       case Iop_RSqrtEst32F0x4: op = Asse_RSQRTF; goto do_32F0x4_unary;
       case Iop_Sqrt32F0x4:     op = Asse_SQRTF;  goto do_32F0x4_unary;
@@ -3313,6 +3303,19 @@
    if (e->tag == Iex_Binop) {
    switch (e->Iex.Binop.op) {
 
+      case Iop_Sqrt64Fx2:
+      case Iop_Sqrt32Fx4: {
+         /* :: (rmode, vec) -> vec */
+         HReg arg = iselVecExpr(env, e->Iex.Binop.arg2);
+         HReg dst = newVRegV(env);
+         /* XXXROUNDINGFIXME */
+         /* set roundingmode here */
+         addInstr(env, (e->Iex.Binop.op == Iop_Sqrt64Fx2 
+                           ? AMD64Instr_Sse64Fx2 : AMD64Instr_Sse32Fx4)
+                       (Asse_SQRTF, arg, dst));
+         return dst;
+      }
+
       /* FIXME: could we generate MOVQ here? */
       case Iop_SetV128lo64: {
          HReg dst  = newVRegV(env);