arm64: implement:
FRECPS d_d_d, s_s_s
FRSQRTS d_d_d, s_s_s
FRECPE d_d, s_s
FRSQRTE d_d, s_s
FRECPX d_d, s_s
FRECPS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s
FRSQRTS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s
FRECPE 2d_2d, 4s_4s, 2s_2s
FRSQRTE 2d_2d, 4s_4s, 2s_2s
git-svn-id: svn://svn.valgrind.org/vex/trunk@3092 8f6e269a-dfd6-0310-a8e1-e2731360e62c
diff --git a/priv/guest_arm64_toIR.c b/priv/guest_arm64_toIR.c
index 8843182..c40d44b 100644
--- a/priv/guest_arm64_toIR.c
+++ b/priv/guest_arm64_toIR.c
@@ -9700,6 +9700,23 @@
return True;
}
+ if (bitU == 0 && opcode == BITS5(1,1,1,1,1)) {
+ /* -------- 0,0x,11111: FRECPS d_d_d, s_s_s -------- */
+ /* -------- 0,1x,11111: FRSQRTS d_d_d, s_s_s -------- */
+ Bool isSQRT = (size & 2) == 2;
+ Bool isD = (size & 1) == 1;
+ IROp op = isSQRT ? (isD ? Iop_RSqrtStep64Fx2 : Iop_RSqrtStep32Fx4)
+ : (isD ? Iop_RecipStep64Fx2 : Iop_RecipStep32Fx4);
+ IRTemp res = newTempV128();
+ assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
+ putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
+ mkexpr(res))));
+ HChar c = isD ? 'd' : 's';
+ DIP("%s %c%u, %c%u, %c%u\n", isSQRT ? "frsqrts" : "frecps",
+ c, dd, c, nn, c, mm);
+ return True;
+ }
+
return False;
# undef INSN
}
@@ -9900,7 +9917,37 @@
return True;
}
-# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
+ if (size >= X10 && opcode == BITS5(1,1,1,0,1)) {
+ /* -------- 0,1x,11101: FRECPE d_d, s_s -------- */
+ /* -------- 1,1x,11101: FRSQRTE d_d, s_s -------- */
+ Bool isSQRT = bitU == 1;
+ Bool isD = (size & 1) == 1;
+ IROp op = isSQRT ? (isD ? Iop_RSqrtEst64Fx2 : Iop_RSqrtEst32Fx4)
+ : (isD ? Iop_RecipEst64Fx2 : Iop_RecipEst32Fx4);
+ IRTemp resV = newTempV128();
+ assign(resV, unop(op, getQReg128(nn)));
+ putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
+ mkexpr(resV))));
+ HChar c = isD ? 'd' : 's';
+ DIP("%s %c%u, %c%u\n", isSQRT ? "frsqrte" : "frecpe", c, dd, c, nn);
+ return True;
+ }
+
+ if (bitU == 0 && size >= X10 && opcode == BITS5(1,1,1,1,1)) {
+ /* -------- 0,1x,11111: FRECPX d_d, s_s -------- */
+ Bool isD = (size & 1) == 1;
+ IRType ty = isD ? Ity_F64 : Ity_F32;
+ IROp op = isD ? Iop_RecpExpF64 : Iop_RecpExpF32;
+ IRTemp res = newTemp(ty);
+ IRTemp rm = mk_get_IR_rounding_mode();
+ assign(res, binop(op, mkexpr(rm), getQRegLane(nn, 0, ty)));
+ putQReg128(dd, mkV128(0x0000));
+ putQRegLane(dd, 0, mkexpr(res));
+ HChar c = isD ? 'd' : 's';
+ DIP("%s %c%u, %c%u\n", "frecpx", c, dd, c, nn);
+ return True;
+ }
+
return False;
# undef INSN
}
@@ -11449,6 +11496,23 @@
return True;
}
+ if (bitU == 0 && opcode == BITS5(1,1,1,1,1)) {
+ /* -------- 0,0x,11111: FRECPS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
+ /* -------- 0,1x,11111: FRSQRTS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
+ Bool isSQRT = (size & 2) == 2;
+ Bool isD = (size & 1) == 1;
+ if (bitQ == 0 && isD) return False; // implied 1d case
+ IROp op = isSQRT ? (isD ? Iop_RSqrtStep64Fx2 : Iop_RSqrtStep32Fx4)
+ : (isD ? Iop_RecipStep64Fx2 : Iop_RecipStep32Fx4);
+ IRTemp res = newTempV128();
+ assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
+ putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
+ const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
+ DIP("%s %s.%s, %s.%s, %s.%s\n", isSQRT ? "frsqrts" : "frecps",
+ nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
+ return True;
+ }
+
return False;
# undef INSN
}
@@ -11857,7 +11921,6 @@
return True;
}
-
ix = 0;
if (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,0,0,1)) {
ix = 1 + ((((bitU & 1) << 2) | ((size & 2) << 0)) | ((opcode & 1) << 0));
@@ -11928,8 +11991,6 @@
return True;
}
-
-
if (size == X10 && opcode == BITS5(1,1,1,0,0)) {
/* -------- 0,10,11100: URECPE 4s_4s, 2s_2s -------- */
/* -------- 1,10,11100: URSQRTE 4s_4s, 2s_2s -------- */
@@ -11983,6 +12044,23 @@
/* else fall through */
}
+ if (size >= X10 && opcode == BITS5(1,1,1,0,1)) {
+ /* -------- 0,1x,11101: FRECPE 2d_2d, 4s_4s, 2s_2s -------- */
+ /* -------- 1,1x,11101: FRSQRTE 2d_2d, 4s_4s, 2s_2s -------- */
+ Bool isSQRT = bitU == 1;
+ Bool isD = (size & 1) == 1;
+ IROp op = isSQRT ? (isD ? Iop_RSqrtEst64Fx2 : Iop_RSqrtEst32Fx4)
+ : (isD ? Iop_RecipEst64Fx2 : Iop_RecipEst32Fx4);
+ if (bitQ == 0 && isD) return False; // implied 1d case
+ IRTemp resV = newTempV128();
+ assign(resV, unop(op, getQReg128(nn)));
+ putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, resV));
+ const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
+ DIP("%s %s.%s, %s.%s\n", isSQRT ? "frsqrte" : "frecpe",
+ nameQReg128(dd), arr, nameQReg128(nn), arr);
+ return True;
+ }
+
return False;
# undef INSN
}
diff --git a/priv/host_arm64_defs.c b/priv/host_arm64_defs.c
index 8b5114e..7cc0910 100644
--- a/priv/host_arm64_defs.c
+++ b/priv/host_arm64_defs.c
@@ -559,10 +559,11 @@
static const HChar* showARM64FpUnaryOp ( ARM64FpUnaryOp op ) {
switch (op) {
- case ARM64fpu_NEG: return "neg ";
- case ARM64fpu_ABS: return "abs ";
- case ARM64fpu_SQRT: return "sqrt ";
- case ARM64fpu_RINT: return "rinti";
+ case ARM64fpu_NEG: return "neg ";
+ case ARM64fpu_ABS: return "abs ";
+ case ARM64fpu_SQRT: return "sqrt ";
+ case ARM64fpu_RINT: return "rinti";
+ case ARM64fpu_RECPX: return "recpx";
default: vpanic("showARM64FpUnaryOp");
}
}
@@ -687,22 +688,26 @@
case ARM64vecb_UQRSHL32x4: *nm = "uqrshl"; *ar = "4s"; return;
case ARM64vecb_UQRSHL16x8: *nm = "uqrshl"; *ar = "8h"; return;
case ARM64vecb_UQRSHL8x16: *nm = "uqrshl"; *ar = "16b"; return;
- case ARM64vecb_SSHL64x2: *nm = "sshl"; *ar = "2d"; return;
- case ARM64vecb_SSHL32x4: *nm = "sshl"; *ar = "4s"; return;
- case ARM64vecb_SSHL16x8: *nm = "sshl"; *ar = "8h"; return;
- case ARM64vecb_SSHL8x16: *nm = "sshl"; *ar = "16b"; return;
- case ARM64vecb_USHL64x2: *nm = "ushl"; *ar = "2d"; return;
- case ARM64vecb_USHL32x4: *nm = "ushl"; *ar = "4s"; return;
- case ARM64vecb_USHL16x8: *nm = "ushl"; *ar = "8h"; return;
- case ARM64vecb_USHL8x16: *nm = "ushl"; *ar = "16b"; return;
- case ARM64vecb_SRSHL64x2: *nm = "srshl"; *ar = "2d"; return;
- case ARM64vecb_SRSHL32x4: *nm = "srshl"; *ar = "4s"; return;
- case ARM64vecb_SRSHL16x8: *nm = "srshl"; *ar = "8h"; return;
- case ARM64vecb_SRSHL8x16: *nm = "srshl"; *ar = "16b"; return;
- case ARM64vecb_URSHL64x2: *nm = "urshl"; *ar = "2d"; return;
- case ARM64vecb_URSHL32x4: *nm = "urshl"; *ar = "4s"; return;
- case ARM64vecb_URSHL16x8: *nm = "urshl"; *ar = "8h"; return;
- case ARM64vecb_URSHL8x16: *nm = "urshl"; *ar = "16b"; return;
+ case ARM64vecb_SSHL64x2: *nm = "sshl "; *ar = "2d"; return;
+ case ARM64vecb_SSHL32x4: *nm = "sshl "; *ar = "4s"; return;
+ case ARM64vecb_SSHL16x8: *nm = "sshl "; *ar = "8h"; return;
+ case ARM64vecb_SSHL8x16: *nm = "sshl "; *ar = "16b"; return;
+ case ARM64vecb_USHL64x2: *nm = "ushl "; *ar = "2d"; return;
+ case ARM64vecb_USHL32x4: *nm = "ushl "; *ar = "4s"; return;
+ case ARM64vecb_USHL16x8: *nm = "ushl "; *ar = "8h"; return;
+ case ARM64vecb_USHL8x16: *nm = "ushl "; *ar = "16b"; return;
+ case ARM64vecb_SRSHL64x2: *nm = "srshl "; *ar = "2d"; return;
+ case ARM64vecb_SRSHL32x4: *nm = "srshl "; *ar = "4s"; return;
+ case ARM64vecb_SRSHL16x8: *nm = "srshl "; *ar = "8h"; return;
+ case ARM64vecb_SRSHL8x16: *nm = "srshl "; *ar = "16b"; return;
+ case ARM64vecb_URSHL64x2: *nm = "urshl "; *ar = "2d"; return;
+ case ARM64vecb_URSHL32x4: *nm = "urshl "; *ar = "4s"; return;
+ case ARM64vecb_URSHL16x8: *nm = "urshl "; *ar = "8h"; return;
+ case ARM64vecb_URSHL8x16: *nm = "urshl "; *ar = "16b"; return;
+ case ARM64vecb_FRECPS64x2: *nm = "frecps"; *ar = "2d"; return;
+ case ARM64vecb_FRECPS32x4: *nm = "frecps"; *ar = "4s"; return;
+ case ARM64vecb_FRSQRTS64x2: *nm = "frsqrts"; *ar = "2d"; return;
+ case ARM64vecb_FRSQRTS32x4: *nm = "frsqrts"; *ar = "4s"; return;
default: vpanic("showARM64VecBinOp");
}
}
@@ -752,6 +757,10 @@
case ARM64vecu_REV644S: *nm = "rev64"; *ar = "4s"; return;
case ARM64vecu_URECPE32x4: *nm = "urecpe"; *ar = "4s"; return;
case ARM64vecu_URSQRTE32x4: *nm = "ursqrte"; *ar = "4s"; return;
+ case ARM64vecu_FRECPE64x2: *nm = "frecpe"; *ar = "2d"; return;
+ case ARM64vecu_FRECPE32x4: *nm = "frecpe"; *ar = "4s"; return;
+ case ARM64vecu_FRSQRTE64x2: *nm = "frsqrte"; *ar = "2d"; return;
+ case ARM64vecu_FRSQRTE32x4: *nm = "frsqrte"; *ar = "4s"; return;
default: vpanic("showARM64VecUnaryOp");
}
}
@@ -2601,6 +2610,7 @@
#define X110010 BITS8(0,0, 1,1,0,0,1,0)
#define X110100 BITS8(0,0, 1,1,0,1,0,0)
#define X110101 BITS8(0,0, 1,1,0,1,0,1)
+#define X110110 BITS8(0,0, 1,1,0,1,1,0)
#define X110111 BITS8(0,0, 1,1,0,1,1,1)
#define X111000 BITS8(0,0, 1,1,1,0,0,0)
#define X111001 BITS8(0,0, 1,1,1,0,0,1)
@@ -2642,6 +2652,8 @@
#define X11011110 BITS8(1,1,0,1,1,1,1,0)
#define X11110001 BITS8(1,1,1,1,0,0,0,1)
#define X11110011 BITS8(1,1,1,1,0,0,1,1)
+#define X11110101 BITS8(1,1,1,1,0,1,0,1)
+#define X11110111 BITS8(1,1,1,1,0,1,1,1)
/* --- 4 fields --- */
@@ -3878,7 +3890,7 @@
000,11110 01 1,0000 0,0 10000 n d FMOV Dd, Dn (not handled)
------------------- 0,1 --------- FABS ------
------------------- 1,0 --------- FNEG ------
- ------------------- 1,1 --------- FQSRT -----
+ ------------------- 1,1 --------- FSQRT -----
*/
UInt dD = dregNo(i->ARM64in.VUnaryD.dst);
UInt dN = dregNo(i->ARM64in.VUnaryD.src);
@@ -3902,6 +3914,13 @@
*p++ = X_3_8_5_6_5_5(X000, X11110011, X00111, X110000, dN, dD);
goto done;
}
+ /*
+ 010, 11110 11 1,0000 1,1111 10 n d FRECPX Dd, Dm
+ */
+ if (i->ARM64in.VUnaryD.op == ARM64fpu_RECPX) {
+ *p++ = X_3_8_5_6_5_5(X010, X11110111, X00001, X111110, dN, dD);
+ goto done;
+ }
goto bad;
}
case ARM64in_VUnaryS: {
@@ -3909,7 +3928,7 @@
000,11110 00 1,0000 0,0 10000 n d FMOV Sd, Sn (not handled)
------------------- 0,1 --------- FABS ------
------------------- 1,0 --------- FNEG ------
- ------------------- 1,1 --------- FQSRT -----
+ ------------------- 1,1 --------- FSQRT -----
*/
UInt sD = dregNo(i->ARM64in.VUnaryS.dst);
UInt sN = dregNo(i->ARM64in.VUnaryS.src);
@@ -3933,6 +3952,13 @@
*p++ = X_3_8_5_6_5_5(X000, X11110001, X00111, X110000, sN, sD);
goto done;
}
+ /*
+ 010, 11110 10 1,0000 1,1111 10 n d FRECPX Sd, Sm
+ */
+ if (i->ARM64in.VUnaryS.op == ARM64fpu_RECPX) {
+ *p++ = X_3_8_5_6_5_5(X010, X11110101, X00001, X111110, sN, sD);
+ goto done;
+ }
goto bad;
}
case ARM64in_VBinD: {
@@ -4176,6 +4202,11 @@
010 01110 sz 1 m 010101 n d SRSHL@sz Vd, Vn, Vm
011 01110 sz 1 m 010001 n d USHL@sz Vd, Vn, Vm
011 01110 sz 1 m 010101 n d URSHL@sz Vd, Vn, Vm
+
+ 010 01110 01 1 m 111111 n d FRECPS Vd.2d, Vn.2d, Vm.2d
+ 010 01110 00 1 m 111111 n d FRECPS Vd.4s, Vn.4s, Vm.4s
+ 010 01110 11 1 m 111111 n d FRSQRTS Vd.2d, Vn.2d, Vm.2d
+ 010 01110 10 1 m 111111 n d FRSQRTS Vd.4s, Vn.4s, Vm.4s
*/
UInt vD = qregNo(i->ARM64in.VBinV.dst);
UInt vN = qregNo(i->ARM64in.VBinV.argL);
@@ -4616,6 +4647,19 @@
*p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X010101, vN, vD);
break;
+ case ARM64vecb_FRECPS64x2:
+ *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X111111, vN, vD);
+ break;
+ case ARM64vecb_FRECPS32x4:
+ *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X111111, vN, vD);
+ break;
+ case ARM64vecb_FRSQRTS64x2:
+ *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X111111, vN, vD);
+ break;
+ case ARM64vecb_FRSQRTS32x4:
+ *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X111111, vN, vD);
+ break;
+
default:
goto bad;
}
@@ -4692,6 +4736,12 @@
010 01110 10 1 00001 110010 n d URECPE Vd.4s, Vn.4s
011 01110 10 1 00001 110010 n d URSQRTE Vd.4s, Vn.4s
+
+ 010 01110 11 1 00001 110110 n d FRECPE Vd.2d, Vn.2d
+ 010 01110 10 1 00001 110110 n d FRECPE Vd.4s, Vn.4s
+
+ 011 01110 11 1 00001 110110 n d FRECPE Vd.2d, Vn.2d
+ 011 01110 10 1 00001 110110 n d FRECPE Vd.4s, Vn.4s
*/
UInt vD = qregNo(i->ARM64in.VUnaryV.dst);
UInt vN = qregNo(i->ARM64in.VUnaryV.arg);
@@ -4771,6 +4821,18 @@
case ARM64vecu_URSQRTE32x4:
*p++ = X_3_8_5_6_5_5(X011, X01110101, X00001, X110010, vN, vD);
break;
+ case ARM64vecu_FRECPE64x2:
+ *p++ = X_3_8_5_6_5_5(X010, X01110111, X00001, X110110, vN, vD);
+ break;
+ case ARM64vecu_FRECPE32x4:
+ *p++ = X_3_8_5_6_5_5(X010, X01110101, X00001, X110110, vN, vD);
+ break;
+ case ARM64vecu_FRSQRTE64x2:
+ *p++ = X_3_8_5_6_5_5(X011, X01110111, X00001, X110110, vN, vD);
+ break;
+ case ARM64vecu_FRSQRTE32x4:
+ *p++ = X_3_8_5_6_5_5(X011, X01110101, X00001, X110110, vN, vD);
+ break;
default:
goto bad;
}
diff --git a/priv/host_arm64_defs.h b/priv/host_arm64_defs.h
index b25cf20..281cb0e 100644
--- a/priv/host_arm64_defs.h
+++ b/priv/host_arm64_defs.h
@@ -301,6 +301,7 @@
ARM64fpu_ABS,
ARM64fpu_SQRT,
ARM64fpu_RINT,
+ ARM64fpu_RECPX,
ARM64fpu_INVALID
}
ARM64FpUnaryOp;
@@ -383,6 +384,8 @@
ARM64vecb_SRSHL16x8, ARM64vecb_SRSHL8x16,
ARM64vecb_URSHL64x2, ARM64vecb_URSHL32x4,
ARM64vecb_URSHL16x8, ARM64vecb_URSHL8x16,
+ ARM64vecb_FRECPS64x2, ARM64vecb_FRECPS32x4,
+ ARM64vecb_FRSQRTS64x2, ARM64vecb_FRSQRTS32x4,
ARM64vecb_INVALID
}
ARM64VecBinOp;
@@ -413,6 +416,8 @@
ARM64vecu_REV6416B, ARM64vecu_REV648H, ARM64vecu_REV644S,
ARM64vecu_URECPE32x4,
ARM64vecu_URSQRTE32x4,
+ ARM64vecu_FRECPE64x2, ARM64vecu_FRECPE32x4,
+ ARM64vecu_FRSQRTE64x2, ARM64vecu_FRSQRTE32x4,
ARM64vecu_INVALID
}
ARM64VecUnaryOp;
diff --git a/priv/host_arm64_isel.c b/priv/host_arm64_isel.c
index 9baed6b..afde38f 100644
--- a/priv/host_arm64_isel.c
+++ b/priv/host_arm64_isel.c
@@ -366,7 +366,7 @@
/* Set the FP rounding mode: 'mode' is an I32-typed expression
denoting a value in the range 0 .. 3, indicating a round mode
encoded as per type IRRoundingMode -- the first four values only
- (Irrm_NEAREST, Irrm_NegINF, Irrm_PosINF, Irrm_ZERO). Set the PPC
+ (Irrm_NEAREST, Irrm_NegINF, Irrm_PosINF, Irrm_ZERO). Set the ARM64
FSCR to have the same rounding.
For speed & simplicity, we're setting the *entire* FPCR here.
@@ -2244,9 +2244,12 @@
case Iop_Reverse32sIn64_x2:
case Iop_RecipEst32Ux4:
case Iop_RSqrtEst32Ux4:
+ case Iop_RecipEst64Fx2: case Iop_RecipEst32Fx4:
+ case Iop_RSqrtEst64Fx2: case Iop_RSqrtEst32Fx4:
{
- HReg res = newVRegV(env);
- HReg arg = iselV128Expr(env, e->Iex.Unop.arg);
+ HReg res = newVRegV(env);
+ HReg arg = iselV128Expr(env, e->Iex.Unop.arg);
+ Bool setRM = False;
ARM64VecUnaryOp op = ARM64vecu_INVALID;
switch (e->Iex.Unop.op) {
case Iop_NotV128: op = ARM64vecu_NOT; break;
@@ -2274,8 +2277,23 @@
case Iop_Reverse32sIn64_x2: op = ARM64vecu_REV644S; break;
case Iop_RecipEst32Ux4: op = ARM64vecu_URECPE32x4; break;
case Iop_RSqrtEst32Ux4: op = ARM64vecu_URSQRTE32x4; break;
+ case Iop_RecipEst64Fx2: setRM = True;
+ op = ARM64vecu_FRECPE64x2; break;
+ case Iop_RecipEst32Fx4: setRM = True;
+ op = ARM64vecu_FRECPE32x4; break;
+ case Iop_RSqrtEst64Fx2: setRM = True;
+ op = ARM64vecu_FRSQRTE64x2; break;
+ case Iop_RSqrtEst32Fx4: setRM = True;
+ op = ARM64vecu_FRSQRTE32x4; break;
default: vassert(0);
}
+ if (setRM) {
+ // This is a bit of a kludge. We should do rm properly for
+ // these recip-est insns, but that would require changing the
+ // primop's type to take an rmode.
+ set_FPCR_rounding_mode(env, IRExpr_Const(
+ IRConst_U32(Irrm_NEAREST)));
+ }
addInstr(env, ARM64Instr_VUnaryV(op, res, arg));
return res;
}
@@ -2407,11 +2425,14 @@
case Iop_Rsh32Ux4: case Iop_Rsh64Ux2:
case Iop_Max64Fx2: case Iop_Max32Fx4:
case Iop_Min64Fx2: case Iop_Min32Fx4:
+ case Iop_RecipStep64Fx2: case Iop_RecipStep32Fx4:
+ case Iop_RSqrtStep64Fx2: case Iop_RSqrtStep32Fx4:
{
- HReg res = newVRegV(env);
- HReg argL = iselV128Expr(env, e->Iex.Binop.arg1);
- HReg argR = iselV128Expr(env, e->Iex.Binop.arg2);
- Bool sw = False;
+ HReg res = newVRegV(env);
+ HReg argL = iselV128Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselV128Expr(env, e->Iex.Binop.arg2);
+ Bool sw = False;
+ Bool setRM = False;
ARM64VecBinOp op = ARM64vecb_INVALID;
switch (e->Iex.Binop.op) {
case Iop_AndV128: op = ARM64vecb_AND; break;
@@ -2528,8 +2549,23 @@
case Iop_Max32Fx4: op = ARM64vecb_FMAX32x4; break;
case Iop_Min64Fx2: op = ARM64vecb_FMIN64x2; break;
case Iop_Min32Fx4: op = ARM64vecb_FMIN32x4; break;
+ case Iop_RecipStep64Fx2: setRM = True;
+ op = ARM64vecb_FRECPS64x2; break;
+ case Iop_RecipStep32Fx4: setRM = True;
+ op = ARM64vecb_FRECPS32x4; break;
+ case Iop_RSqrtStep64Fx2: setRM = True;
+ op = ARM64vecb_FRSQRTS64x2; break;
+ case Iop_RSqrtStep32Fx4: setRM = True;
+ op = ARM64vecb_FRSQRTS32x4; break;
default: vassert(0);
}
+ if (setRM) {
+ // This is a bit of a kludge. We should do rm properly for
+ // these recip-step insns, but that would require changing the
+ // primop's type to take an rmode.
+ set_FPCR_rounding_mode(env, IRExpr_Const(
+ IRConst_U32(Irrm_NEAREST)));
+ }
if (sw) {
addInstr(env, ARM64Instr_VBinV(op, res, argR, argL));
} else {
@@ -3034,18 +3070,20 @@
if (e->tag == Iex_Binop) {
switch (e->Iex.Binop.op) {
- case Iop_RoundF64toInt: {
+ case Iop_RoundF64toInt:
+ case Iop_SqrtF64:
+ case Iop_RecpExpF64: {
HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
HReg dst = newVRegD(env);
set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
- addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_RINT, dst, src));
- return dst;
- }
- case Iop_SqrtF64: {
- HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
- HReg dst = newVRegD(env);
- set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
- addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_SQRT, dst, src));
+ ARM64FpUnaryOp op = ARM64fpu_INVALID;
+ switch (e->Iex.Binop.op) {
+ case Iop_RoundF64toInt: op = ARM64fpu_RINT; break;
+ case Iop_SqrtF64: op = ARM64fpu_SQRT; break;
+ case Iop_RecpExpF64: op = ARM64fpu_RECPX; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARM64Instr_VUnaryD(op, dst, src));
return dst;
}
case Iop_I64StoF64:
@@ -3195,18 +3233,20 @@
if (e->tag == Iex_Binop) {
switch (e->Iex.Binop.op) {
- case Iop_RoundF32toInt: {
+ case Iop_RoundF32toInt:
+ case Iop_SqrtF32:
+ case Iop_RecpExpF32: {
HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
HReg dst = newVRegD(env);
set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
- addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_RINT, dst, src));
- return dst;
- }
- case Iop_SqrtF32: {
- HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
- HReg dst = newVRegD(env);
- set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
- addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_SQRT, dst, src));
+ ARM64FpUnaryOp op = ARM64fpu_INVALID;
+ switch (e->Iex.Binop.op) {
+ case Iop_RoundF32toInt: op = ARM64fpu_RINT; break;
+ case Iop_SqrtF32: op = ARM64fpu_SQRT; break;
+ case Iop_RecpExpF32: op = ARM64fpu_RECPX; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARM64Instr_VUnaryS(op, dst, src));
return dst;
}
case Iop_F64toF32: {
diff --git a/priv/ir_defs.c b/priv/ir_defs.c
index 64e487d..c56095c 100644
--- a/priv/ir_defs.c
+++ b/priv/ir_defs.c
@@ -337,6 +337,9 @@
case Iop_TruncF64asF32: vex_printf("TruncF64asF32"); return;
+ case Iop_RecpExpF64: vex_printf("RecpExpF64"); return;
+ case Iop_RecpExpF32: vex_printf("RecpExpF32"); return;
+
case Iop_QAdd32S: vex_printf("QAdd32S"); return;
case Iop_QSub32S: vex_printf("QSub32S"); return;
case Iop_Add16x2: vex_printf("Add16x2"); return;
@@ -637,10 +640,15 @@
case Iop_RecipEst32F0x4: vex_printf("RecipEst32F0x4"); return;
case Iop_RecipStep32Fx2: vex_printf("RecipStep32Fx2"); return;
case Iop_RecipStep32Fx4: vex_printf("RecipStep32Fx4"); return;
+ case Iop_RecipEst64Fx2: vex_printf("RecipEst64Fx2"); return;
+ case Iop_RecipStep64Fx2: vex_printf("RecipStep64Fx2"); return;
+
case Iop_Abs32Fx4: vex_printf("Abs32Fx4"); return;
case Iop_Abs64Fx2: vex_printf("Abs64Fx2"); return;
case Iop_RSqrtStep32Fx4: vex_printf("RSqrtStep32Fx4"); return;
+ case Iop_RSqrtStep64Fx2: vex_printf("RSqrtStep64Fx2"); return;
case Iop_RSqrtStep32Fx2: vex_printf("RSqrtStep32Fx2"); return;
+ case Iop_RSqrtEst64Fx2: vex_printf("RSqrtEst64Fx2"); return;
case Iop_RSqrtEst32F0x4: vex_printf("RSqrtEst32F0x4"); return;
case Iop_RSqrtEst32Fx8: vex_printf("RSqrtEst32Fx8"); return;
@@ -2753,10 +2761,12 @@
UNARY(Ity_F32, Ity_F32);
case Iop_SqrtF64:
+ case Iop_RecpExpF64:
BINARY(ity_RMode,Ity_F64, Ity_F64);
case Iop_SqrtF32:
case Iop_RoundF32toInt:
+ case Iop_RecpExpF32:
BINARY(ity_RMode,Ity_F32, Ity_F32);
case Iop_CmpF32:
@@ -2971,8 +2981,8 @@
case Iop_InterleaveOddLanes16x8: case Iop_InterleaveEvenLanes16x8:
case Iop_InterleaveOddLanes32x4: case Iop_InterleaveEvenLanes32x4:
case Iop_Perm8x16: case Iop_Perm32x4:
- case Iop_RecipStep32Fx4:
- case Iop_RSqrtStep32Fx4:
+ case Iop_RecipStep32Fx4: case Iop_RecipStep64Fx2:
+ case Iop_RSqrtStep32Fx4: case Iop_RSqrtStep64Fx2:
case Iop_CipherV128:
case Iop_CipherLV128:
case Iop_NCipherV128:
@@ -2995,6 +3005,7 @@
case Iop_NotV128:
case Iop_RecipEst32Fx4: case Iop_RecipEst32F0x4:
+ case Iop_RecipEst64Fx2: case Iop_RSqrtEst64Fx2:
case Iop_RecipEst32Ux4:
case Iop_RSqrtEst32F0x4:
case Iop_Sqrt32Fx4: case Iop_Sqrt32F0x4:
diff --git a/pub/libvex_ir.h b/pub/libvex_ir.h
index bd97f87..3d2c2b2 100644
--- a/pub/libvex_ir.h
+++ b/pub/libvex_ir.h
@@ -754,6 +754,11 @@
/* NB: pretty much the same as Iop_F64toF32, except no change
of type. */
+ /* --- guest arm64 specifics, not mandated by 754. --- */
+
+ Iop_RecpExpF64, /* FRECPX d :: IRRoundingMode(I32) x F64 -> F64 */
+ Iop_RecpExpF32, /* FRECPX s :: IRRoundingMode(I32) x F32 -> F32 */
+
/* ------------------ 32-bit SIMD Integer ------------------ */
/* 32x1 saturating add/sub (ok, well, not really SIMD :) */
@@ -1284,8 +1289,8 @@
Iop_Neg32Fx4,
/* Vector Reciprocal Estimate finds an approximate reciprocal of each
- element in the operand vector, and places the results in the destination
- vector. */
+ element in the operand vector, and places the results in the
+ destination vector. */
Iop_RecipEst32Fx4,
/* Vector Reciprocal Step computes (2.0 - arg1 * arg2).
@@ -1348,6 +1353,12 @@
Iop_Sqrt64Fx2,
Iop_Neg64Fx2,
+ /* see 32Fx4 variants for description */
+ Iop_RecipEst64Fx2, // unary
+ Iop_RecipStep64Fx2, // binary
+ Iop_RSqrtEst64Fx2, // unary
+ Iop_RSqrtStep64Fx2, // binary
+
/* --- 64x2 lowest-lane-only scalar FP --- */
/* In binary cases, upper half is copied from first operand. In