arm64: implement pmull{2}.
git-svn-id: svn://svn.valgrind.org/vex/trunk@2888 8f6e269a-dfd6-0310-a8e1-e2731360e62c
diff --git a/priv/guest_arm64_toIR.c b/priv/guest_arm64_toIR.c
index 6b73b1a..84fac0e 100644
--- a/priv/guest_arm64_toIR.c
+++ b/priv/guest_arm64_toIR.c
@@ -5609,6 +5609,24 @@
}
+/* Let |argL| and |argR| be V128 values, and let |opI64x2toV128| be
+ an op which takes two I64s and produces a V128. That is, a widening
+ operator. Generate IR which applies |opI64x2toV128| to either the
+ lower (if |is2| is False) or upper (if |is2| is True) halves of
+ |argL| and |argR|, and return the value in a new IRTemp.
+*/
+static
+IRTemp math_BINARY_WIDENING_V128 ( Bool is2, IROp opI64x2toV128,
+ IRExpr* argL, IRExpr* argR )
+{
+ IRTemp res = newTemp(Ity_V128);
+ IROp slice = is2 ? Iop_V128HIto64 : Iop_V128to64;
+ assign(res, binop(opI64x2toV128, unop(slice, argL),
+ unop(slice, argR)));
+ return res;
+}
+
+
/* Let |new64| be a V128 in which only the lower 64 bits are interesting,
and the upper can contain any value -- it is ignored. If |is2| is False,
generate IR to put |new64| in the lower half of vector reg |dd| and zero
@@ -6938,6 +6956,22 @@
return True;
}
+ if (bitU == 0 && opcode == BITS4(1,1,1,0)) {
+ /* -------- 0,1110 PMULL{2} -------- */
+ /* Narrows, and size refers to the narrowed lanes. */
+ if (size != X00) return False;
+ IRTemp res
+ = math_BINARY_WIDENING_V128(is2, Iop_PolynomialMull8x8,
+ getQReg128(nn), getQReg128(mm));
+ putQReg128(dd, mkexpr(res));
+ const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
+ const HChar* arrWide = nameArr_Q_SZ(1, size+1);
+ DIP("%s%s %s.%s, %s.%s, %s.%s\n", "pmull", is2 ? "2" : "",
+ nameQReg128(dd), arrNarrow,
+ nameQReg128(nn), arrWide, nameQReg128(mm), arrWide);
+ return True;
+ }
+
return False;
# undef INSN
}
diff --git a/priv/host_arm64_defs.c b/priv/host_arm64_defs.c
index 9efaed1..dbb485d 100644
--- a/priv/host_arm64_defs.c
+++ b/priv/host_arm64_defs.c
@@ -922,6 +922,7 @@
case ARM64vecb_ZIP216x8: *nm = "zip2"; *ar = "8h"; return;
case ARM64vecb_ZIP28x16: *nm = "zip2"; *ar = "16b"; return;
case ARM64vecb_PMUL8x16: *nm = "pmul"; *ar = "16b"; return;
+ case ARM64vecb_PMULL8x8: *nm = "pmull"; *ar = "8hb"; return;
default: vpanic("showARM64VecBinOp");
}
}
@@ -5126,6 +5127,8 @@
010 01110 10 0 m 011110 n d ZIP2 Vd.16b, Vn.16b, Vm.16b
011 01110 00 1 m 100111 n d PMUL Vd.16b, Vn.16b, Vm.16b
+
+ 000 01110 00 1 m 111000 n d PMULL Vd.8h, Vn.8b, Vm.8b
*/
UInt vD = qregNo(i->ARM64in.VBinV.dst);
UInt vN = qregNo(i->ARM64in.VBinV.argL);
@@ -5353,6 +5356,10 @@
*p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X100111, vN, vD);
break;
+ case ARM64vecb_PMULL8x8:
+ *p++ = X_3_8_5_6_5_5(X000, X01110001, vM, X111000, vN, vD);
+ break;
+
default:
goto bad;
}
diff --git a/priv/host_arm64_defs.h b/priv/host_arm64_defs.h
index c00bb3d..12b0980 100644
--- a/priv/host_arm64_defs.h
+++ b/priv/host_arm64_defs.h
@@ -345,6 +345,7 @@
ARM64vecb_ZIP18x16, ARM64vecb_ZIP232x4,
ARM64vecb_ZIP216x8, ARM64vecb_ZIP28x16,
ARM64vecb_PMUL8x16,
+ ARM64vecb_PMULL8x8,
ARM64vecb_INVALID
}
ARM64VecBinOp;
diff --git a/priv/host_arm64_isel.c b/priv/host_arm64_isel.c
index 7916ce2..d640a0d 100644
--- a/priv/host_arm64_isel.c
+++ b/priv/host_arm64_isel.c
@@ -5671,6 +5671,19 @@
break;
}
+ case Iop_PolynomialMull8x8: {
+ HReg iSrcL = iselIntExpr_R(env, e->Iex.Binop.arg1);
+ HReg iSrcR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ HReg vSrcL = newVRegV(env);
+ HReg vSrcR = newVRegV(env);
+ HReg dst = newVRegV(env);
+ addInstr(env, ARM64Instr_VQfromXX(vSrcL, iSrcL, iSrcL));
+ addInstr(env, ARM64Instr_VQfromXX(vSrcR, iSrcR, iSrcR));
+ addInstr(env, ARM64Instr_VBinV(ARM64vecb_PMULL8x8,
+ dst, vSrcL, vSrcR));
+ return dst;
+ }
+
//ZZ case Iop_CmpGT8Ux16:
//ZZ case Iop_CmpGT16Ux8:
//ZZ case Iop_CmpGT32Ux4: {