arm64: implement remaining SQDMULH and SQRDMULH cases.
git-svn-id: svn://svn.valgrind.org/vex/trunk@2911 8f6e269a-dfd6-0310-a8e1-e2731360e62c
diff --git a/priv/guest_arm64_toIR.c b/priv/guest_arm64_toIR.c
index 4132780..44decc5 100644
--- a/priv/guest_arm64_toIR.c
+++ b/priv/guest_arm64_toIR.c
@@ -6292,7 +6292,7 @@
}
-static IRTemp math_ZERO_ALL_EXCEPT_LOWEST_LANE ( IRExpr* srcE, UInt size )
+static IRTemp math_ZERO_ALL_EXCEPT_LOWEST_LANE ( UInt size, IRExpr* srcE )
{
vassert(size < 4);
IRTemp t = newTempV128();
@@ -6412,6 +6412,9 @@
}
+/* Generate IR for SQDMULH and SQRDMULH: signedly wideningly multiply,
+ double that, possibly add a rounding constant (R variants), and take
+ the high half. */
static
void math_SQDMULH ( /*OUT*/IRTemp* res,
/*OUT*/IRTemp* sat1q, /*OUT*/IRTemp* sat1n,
@@ -6469,7 +6472,8 @@
if (opZHI == Iop_INVALID) {
assign(diff, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres)));
} else {
- vassert(opZHI == Iop_ZeroHI64ofV128 || opZHI == Iop_ZeroHI96ofV128);
+ vassert(opZHI == Iop_ZeroHI64ofV128
+ || opZHI == Iop_ZeroHI96ofV128 || opZHI == Iop_ZeroHI112ofV128);
assign(diff, unop(opZHI, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres))));
}
assign(oldQCFLAG, IRExpr_Get(OFFB_QCFLAG, Ity_V128));
@@ -7491,9 +7495,9 @@
assign(argL, getQReg128(nn));
assign(argR, getQReg128(mm));
assign(qres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
- binop(qop, mkexpr(argL), mkexpr(argR)), size)));
+ size, binop(qop, mkexpr(argL), mkexpr(argR)))));
assign(nres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
- binop(nop, mkexpr(argL), mkexpr(argR)), size)));
+ size, binop(nop, mkexpr(argL), mkexpr(argR)))));
putQReg128(dd, mkexpr(qres));
updateQCFLAGwithDifference(qres, nres);
const HChar* nm = isADD ? (isU ? "uqadd" : "sqadd")
@@ -7573,6 +7577,28 @@
return True;
}
+ if (opcode == BITS5(1,0,1,1,0)) {
+ /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
+ /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
+ if (size == X00 || size == X11) return False;
+ Bool isR = bitU == 1;
+ IRTemp res, sat1q, sat1n, vN, vM;
+ res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
+ newTempsV128_2(&vN, &vM);
+ assign(vN, getQReg128(nn));
+ assign(vM, getQReg128(mm));
+ math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
+ putQReg128(dd,
+ mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(res))));
+ updateQCFLAGwithDifference(
+ math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1q)),
+ math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1n)));
+ const HChar arr = "bhsd"[size];
+ const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
+ DIP("%s %c%d, %c%d, %c%d\n", nm, arr, dd, arr, nn, arr, mm);
+ return True;
+ }
+
if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) {
/* -------- 1,1x,11010 FABD d_d_d, s_s_s -------- */
IRType ity = size == X11 ? Ity_F64 : Ity_F32;
@@ -7621,8 +7647,8 @@
IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID;
(isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW,
getQReg128(nn), size );
- IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(mkexpr(qresFW), size);
- IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(mkexpr(nresFW), size);
+ IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(qresFW));
+ IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(nresFW));
putQReg128(dd, mkexpr(qres));
updateQCFLAGwithDifference(qres, nres);
const HChar arr = "bhsd"[size];
@@ -7771,6 +7797,40 @@
return True;
}
+ if (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1)) {
+ /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
+ /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
+ UInt mm = 32; // invalid
+ UInt ix = 16; // invalid
+ switch (size) {
+ case X00:
+ return False; // b case is not allowed
+ case X01:
+ mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
+ case X10:
+ mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
+ case X11:
+ return False; // q case is not allowed
+ default:
+ vassert(0);
+ }
+ vassert(mm < 32 && ix < 16);
+ Bool isR = opcode == BITS4(1,1,0,1);
+ IRTemp res, sat1q, sat1n, vN, vM;
+ res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
+ vN = newTempV128();
+ assign(vN, getQReg128(nn));
+ vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
+ math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
+ IROp opZHI = mkVecZEROHIxxOFV128(size);
+ putQReg128(dd, unop(opZHI, mkexpr(res)));
+ updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
+ const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
+ HChar ch = size == X01 ? 'h' : 's';
+ DIP("%s %c%d, %c%d, v%d.%c[%u]\n", nm, ch, dd, ch, nn, ch, dd, ix);
+ return True;
+ }
+
return False;
# undef INSN
}
@@ -9400,6 +9460,42 @@
return True;
}
+ if (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1)) {
+ /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
+ /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
+ UInt mm = 32; // invalid
+ UInt ix = 16; // invalid
+ switch (size) {
+ case X00:
+ return False; // b case is not allowed
+ case X01:
+ mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
+ case X10:
+ mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
+ case X11:
+ return False; // q case is not allowed
+ default:
+ vassert(0);
+ }
+ vassert(mm < 32 && ix < 16);
+ Bool isR = opcode == BITS4(1,1,0,1);
+ IRTemp res, sat1q, sat1n, vN, vM;
+ res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
+ vN = newTempV128();
+ assign(vN, getQReg128(nn));
+ vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
+ math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
+ putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
+ IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
+ updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
+ const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
+ const HChar* arr = nameArr_Q_SZ(bitQ, size);
+ HChar ch = size == X01 ? 'h' : 's';
+ DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", nm,
+ nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(dd), ch, ix);
+ return True;
+ }
+
return False;
# undef INSN
}