Implement a few more vector aarch64 insns:
LD1 {vT.8h}, [xN|SP], #16
LD1 {vT.16b}, [xN|SP], #16
ST1 {vT.4h}, [xN|SP], #8
MUL Vd.T, Vn.T, Vm.T
PMUL Vd.T, Vn.T, Vm.T (fe only)
MLA Vd.T, Vn.T, Vm.T
MLS Vd.T, Vn.T, Vm.T
UMOV Xd/Wd, Vn.Ts[index]
SMOV Xd/Wd, Vn.Ts[index]
git-svn-id: svn://svn.valgrind.org/vex/trunk@2813 8f6e269a-dfd6-0310-a8e1-e2731360e62c
diff --git a/priv/guest_arm64_toIR.c b/priv/guest_arm64_toIR.c
index ababb89..e30103c 100644
--- a/priv/guest_arm64_toIR.c
+++ b/priv/guest_arm64_toIR.c
@@ -3995,6 +3995,9 @@
0100 1100 1001 1111 0111 10 N T ST1 {vT.4s}, [xN|SP], #16
0100 1100 1101 1111 0111 10 N T LD1 {vT.4s}, [xN|SP], #16
0100 1100 1001 1111 0111 01 N T ST1 {vT.8h}, [xN|SP], #16
+ 0100 1100 1101 1111 0111 01 N T LD1 {vT.8h}, [xN|SP], #16
+ ..
+ 0100 1100 1101 1111 0111 00 N T LD1 {vT.16b}, [xN|SP], #16
Note that #16 is implied and cannot be any other value.
FIXME does this assume that the host is little endian?
*/
@@ -4003,6 +4006,9 @@
|| (insn & 0xFFFFFC00) == 0x4C9F7800 // ST1 {vT.4s}, [xN|SP], #16
|| (insn & 0xFFFFFC00) == 0x4CDF7800 // LD1 {vT.4s}, [xN|SP], #16
|| (insn & 0xFFFFFC00) == 0x4C9F7400 // ST1 {vT.8h}, [xN|SP], #16
+ || (insn & 0xFFFFFC00) == 0x4CDF7400 // LD1 {vT.8h}, [xN|SP], #16
+ /* */
+ || (insn & 0xFFFFFC00) == 0x4CDF7000 // LD1 {vT.16b}, [xN|SP], #16
) {
Bool isLD = INSN(22,22) == 1;
UInt rN = INSN(9,5);
@@ -4025,10 +4031,12 @@
/*
0000 1100 1001 1111 0111 10 N T ST1 {vT.2s}, [xN|SP], #8
+ 0000 1100 1001 1111 0111 01 N T ST1 {vT.4h}, [xN|SP], #8
Note that #8 is implied and cannot be any other value.
FIXME does this assume that the host is little endian?
*/
if ( (insn & 0xFFFFFC00) == 0x0C9F7800 // st1 {vT.2s}, [xN|SP], #8
+ || (insn & 0xFFFFFC00) == 0x0C9F7400 // st1 {vT.4h}, [xN|SP], #8
) {
UInt rN = INSN(9,5);
UInt vT = INSN(4,0);
@@ -5190,13 +5198,13 @@
const HChar* arrSpec = "";
Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 );
if (ok) {
- const IROp opADD[4]
+ const IROp opsADD[4]
= { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2 };
- const IROp opSUB[4]
+ const IROp opsSUB[4]
= { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 };
vassert(szBlg2 < 4);
- IROp op = isSUB ? opSUB[szBlg2] : opADD[szBlg2];
- IRTemp t = newTemp(Ity_V128);
+ IROp op = isSUB ? opsSUB[szBlg2] : opsADD[szBlg2];
+ IRTemp t = newTemp(Ity_V128);
assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t))
: mkexpr(t));
@@ -5232,6 +5240,65 @@
return True;
}
+ /* ------------ MUL/PMUL/MLA/MLS (vector) ------------ */
+ /* 31 28 23 21 20 15 9 4
+ 0q0 01110 size 1 m 100111 n d MUL Vd.T, Vn.T, Vm.T B/H/S only
+ 0q1 01110 size 1 m 100111 n d PMUL Vd.T, Vn.T, Vm.T B only
+ 0q0 01110 size 1 m 100101 n d MLA Vd.T, Vn.T, Vm.T B/H/S only
+ 0q1 01110 size 1 m 100101 n d MLS Vd.T, Vn.T, Vm.T B/H/S only
+ */
+ if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0)
+ && INSN(21,21) == 1
+ && (INSN(15,10) & BITS6(1,1,1,1,0,1)) == BITS6(1,0,0,1,0,1)) {
+ Bool isQ = INSN(30,30) == 1;
+ UInt szBlg2 = INSN(23,22);
+ UInt bit29 = INSN(29,29);
+ UInt mm = INSN(20,16);
+ UInt nn = INSN(9,5);
+ UInt dd = INSN(4,0);
+ Bool isMLAS = INSN(11,11) == 0;
+ const IROp opsADD[4]
+ = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_INVALID };
+ const IROp opsSUB[4]
+ = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_INVALID };
+ const IROp opsMUL[4]
+ = { Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4, Iop_INVALID };
+ const IROp opsPMUL[4]
+ = { Iop_PolynomialMul8x16, Iop_INVALID, Iop_INVALID, Iop_INVALID };
+ /* Set opMUL and, if necessary, opACC. A result value of
+ Iop_INVALID for opMUL indicates that the instruction is
+ invalid. */
+ Bool zeroHI = False;
+ const HChar* arrSpec = "";
+ Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 );
+ vassert(szBlg2 < 4);
+ IROp opACC = Iop_INVALID;
+ IROp opMUL = Iop_INVALID;
+ if (ok) {
+ opMUL = (bit29 == 1 && !isMLAS) ? opsPMUL[szBlg2]
+ : opsMUL[szBlg2];
+ opACC = isMLAS ? (bit29 == 1 ? opsSUB[szBlg2] : opsADD[szBlg2])
+ : Iop_INVALID;
+ }
+ if (ok && opMUL != Iop_INVALID) {
+ IRTemp t1 = newTemp(Ity_V128);
+ assign(t1, binop(opMUL, getQReg128(nn), getQReg128(mm)));
+ IRTemp t2 = newTemp(Ity_V128);
+ assign(t2, opACC == Iop_INVALID
+ ? mkexpr(t1)
+ : binop(opACC, getQReg128(dd), mkexpr(t1)));
+ putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t2))
+ : mkexpr(t2));
+ const HChar* nm = isMLAS ? (bit29 == 1 ? "mls" : "mla")
+ : (bit29 == 1 ? "pmul" : "mul");
+ DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
+ nameQReg128(dd), arrSpec,
+ nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec);
+ return True;
+ }
+ /* else fall through */
+ }
+
/* ---------------- {S,U}{MIN,MAX} (vector) ---------------- */
/* 31 28 23 21 20 15 9 4
0q0 01110 size 1 m 011011 n d SMIN Vd.T, Vn.T, Vm.T
@@ -5487,6 +5554,104 @@
/* else fall through */
}
+ /* ---------------------- {S,U}MOV ---------------------- */
+ /* 31 28 20 15 9 4
+ 0q0 01110 000 imm5 001111 n d UMOV Xd/Wd, Vn.Ts[index]
+ 0q0 01110 000 imm5 001011 n d SMOV Xd/Wd, Vn.Ts[index]
+ dest is Xd when q==1, Wd when q==0
+ UMOV:
+ Ts,index,ops = case q:imm5 of
+ 0:xxxx1 -> B, xxxx, 8Uto64
+ 1:xxxx1 -> invalid
+ 0:xxx10 -> H, xxx, 16Uto64
+ 1:xxx10 -> invalid
+ 0:xx100 -> S, xx, 32Uto64
+ 1:xx100 -> invalid
+ 1:x1000 -> D, x, copy64
+ other -> invalid
+ SMOV:
+ Ts,index,ops = case q:imm5 of
+ 0:xxxx1 -> B, xxxx, (32Uto64 . 8Sto32)
+ 1:xxxx1 -> B, xxxx, 8Sto64
+ 0:xxx10 -> H, xxx, (32Uto64 . 16Sto32)
+ 1:xxx10 -> H, xxx, 16Sto64
+ 0:xx100 -> invalid
+ 1:xx100 -> S, xx, 32Sto64
+ 1:x1000 -> invalid
+ other -> invalid
+ */
+ if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0)
+ && (INSN(15,10) & BITS6(1,1,1,0,1,1)) == BITS6(0,0,1,0,1,1)) {
+ UInt bitQ = INSN(30,30) == 1;
+ UInt imm5 = INSN(20,16);
+ UInt nn = INSN(9,5);
+ UInt dd = INSN(4,0);
+ Bool isU = INSN(12,12) == 1;
+ const HChar* arTs = "??";
+ UInt laneNo = 16; /* invalid */
+ // Setting 'res' to non-NULL determines valid/invalid
+ IRExpr* res = NULL;
+ if (!bitQ && (imm5 & 1)) { // 0:xxxx1
+ laneNo = (imm5 >> 1) & 15;
+ IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
+ res = isU ? unop(Iop_8Uto64, lane)
+ : unop(Iop_32Uto64, unop(Iop_8Sto32, lane));
+ arTs = "b";
+ }
+ else if (bitQ && (imm5 & 1)) { // 1:xxxx1
+ laneNo = (imm5 >> 1) & 15;
+ IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
+ res = isU ? NULL
+ : unop(Iop_8Sto64, lane);
+ arTs = "b";
+ }
+ else if (!bitQ && (imm5 & 2)) { // 0:xxx10
+ laneNo = (imm5 >> 2) & 7;
+ IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
+ res = isU ? unop(Iop_16Uto64, lane)
+ : unop(Iop_32Uto64, unop(Iop_16Sto32, lane));
+ arTs = "h";
+ }
+ else if (bitQ && (imm5 & 2)) { // 1:xxx10
+ laneNo = (imm5 >> 2) & 7;
+ IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
+ res = isU ? NULL
+ : unop(Iop_16Sto64, lane);
+ arTs = "h";
+ }
+ else if (!bitQ && (imm5 & 4)) { // 0:xx100
+ laneNo = (imm5 >> 3) & 3;
+ IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
+ res = isU ? unop(Iop_32Uto64, lane)
+ : NULL;
+ arTs = "s";
+ }
+ else if (bitQ && (imm5 & 4)) { // 1:xxx10
+ laneNo = (imm5 >> 3) & 3;
+ IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
+ res = isU ? NULL
+ : unop(Iop_32Sto64, lane);
+ arTs = "s";
+ }
+ else if (bitQ && (imm5 & 8)) { // 1:x1000
+ laneNo = (imm5 >> 4) & 1;
+ IRExpr* lane = getQRegLane(nn, laneNo, Ity_I64);
+ res = isU ? lane
+ : NULL;
+ arTs = "d";
+ }
+ /* */
+ if (res) {
+ vassert(laneNo < 16);
+ putIReg64orZR(dd, res);
+ DIP("%cmov %s, %s.%s[%u]\n", isU ? 'u' : 's',
+ nameIRegOrZR(bitQ == 1, dd),
+ nameQReg128(nn), arTs, laneNo);
+ return True;
+ }
+ /* else fall through */
+ }
+
/* FIXME Temporary hacks to get through ld.so FIXME */
/* ------------------ movi vD.4s, #0x0 ------------------ */
diff --git a/priv/host_arm64_defs.c b/priv/host_arm64_defs.c
index 81fca08..8111abb 100644
--- a/priv/host_arm64_defs.c
+++ b/priv/host_arm64_defs.c
@@ -857,6 +857,8 @@
case ARM64vecb_SUB64x2: *nm = "sub "; *ar = "2d"; return;
case ARM64vecb_SUB32x4: *nm = "sub "; *ar = "4s"; return;
case ARM64vecb_SUB16x8: *nm = "sub "; *ar = "8h"; return;
+ case ARM64vecb_MUL32x4: *nm = "mul "; *ar = "4s"; return;
+ case ARM64vecb_MUL16x8: *nm = "mul "; *ar = "8h"; return;
case ARM64vecb_FADD64x2: *nm = "fadd"; *ar = "2d"; return;
case ARM64vecb_FSUB64x2: *nm = "fsub"; *ar = "2d"; return;
case ARM64vecb_FMUL64x2: *nm = "fmul"; *ar = "2d"; return;
@@ -869,6 +871,10 @@
case ARM64vecb_UMAX16x8: *nm = "umax"; *ar = "8h"; return;
case ARM64vecb_UMIN32x4: *nm = "umin"; *ar = "4s"; return;
case ARM64vecb_UMIN16x8: *nm = "umin"; *ar = "8h"; return;
+ case ARM64vecb_SMAX32x4: *nm = "smax"; *ar = "4s"; return;
+ case ARM64vecb_SMAX16x8: *nm = "smax"; *ar = "8h"; return;
+ case ARM64vecb_SMIN32x4: *nm = "smin"; *ar = "4s"; return;
+ case ARM64vecb_SMIN16x8: *nm = "smin"; *ar = "8h"; return;
case ARM64vecb_AND: *nm = "and "; *ar = "all"; return;
case ARM64vecb_ORR: *nm = "orr "; *ar = "all"; return;
default: vpanic("showARM64VecBinOp");
@@ -3239,6 +3245,7 @@
#define X100100 BITS8(0,0, 1,0,0,1,0,0)
#define X100101 BITS8(0,0, 1,0,0,1,0,1)
#define X100110 BITS8(0,0, 1,0,0,1,1,0)
+#define X100111 BITS8(0,0, 1,0,0,1,1,1)
#define X110000 BITS8(0,0, 1,1,0,0,0,0)
#define X110001 BITS8(0,0, 1,1,0,0,0,1)
#define X110101 BITS8(0,0, 1,1,0,1,0,1)
@@ -4757,21 +4764,35 @@
/* 31 23 20 15 9 4
010 01110 11 1 m 100001 n d ADD Vd.2d, Vn.2d, Vm.2d
010 01110 10 1 m 100001 n d ADD Vd.4s, Vn.4s, Vm.4s
+ 010 01110 01 1 m 100001 n d ADD Vd.8h, Vn.8h, Vm.8h
+
011 01110 11 1 m 100001 n d SUB Vd.2d, Vn.2d, Vm.2d
011 01110 10 1 m 100001 n d SUB Vd.4s, Vn.4s, Vm.4s
011 01110 01 1 m 100001 n d SUB Vd.8h, Vn.8h, Vm.8h
+
+ 010 01110 10 1 m 100111 n d MUL Vd.4s, Vn.4s, Vm.4s
+ 010 01110 01 1 m 100111 n d MUL Vd.8h, Vn.8h, Vm.8h
+
010 01110 01 1 m 110101 n d FADD Vd.2d, Vn.2d, Vm.2d
010 01110 00 1 m 110101 n d FADD Vd.4s, Vn.4s, Vm.4s
010 01110 11 1 m 110101 n d FSUB Vd.2d, Vn.2d, Vm.2d
010 01110 10 1 m 110101 n d FSUB Vd.4s, Vn.4s, Vm.4s
+
011 01110 01 1 m 110111 n d FMUL Vd.2d, Vn.2d, Vm.2d
011 01110 00 1 m 110111 n d FMUL Vd.4s, Vn.4s, Vm.4s
011 01110 01 1 m 111111 n d FDIV Vd.2d, Vn.2d, Vm.2d
011 01110 00 1 m 111111 n d FDIV Vd.4s, Vn.4s, Vm.4s
+
011 01110 10 1 m 011001 n d UMAX Vd.4s, Vn.4s, Vm.4s
011 01110 01 1 m 011001 n d UMAX Vd.8h, Vn.8h, Vm.8h
011 01110 10 1 m 011011 n d UMIN Vd.4s, Vn.4s, Vm.4s
011 01110 01 1 m 011011 n d UMIN Vd.8h, Vn.8h, Vm.8h
+
+ 010 01110 10 1 m 011001 n d SMAX Vd.4s, Vn.4s, Vm.4s
+ 010 01110 01 1 m 011001 n d SMAX Vd.8h, Vn.8h, Vm.8h
+ 010 01110 10 1 m 011011 n d SMIN Vd.4s, Vn.4s, Vm.4s
+ 010 01110 01 1 m 011011 n d SMIN Vd.8h, Vn.8h, Vm.8h
+
010 01110 00 1 m 000111 n d AND Vd, Vn, Vm
010 01110 10 1 m 000111 n d ORR Vd, Vn, Vm
*/
@@ -4782,7 +4803,12 @@
case ARM64vecb_ADD64x2:
*p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X100001, vN, vD);
break;
- // ADD32x4
+ case ARM64vecb_ADD32x4:
+ *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X100001, vN, vD);
+ break;
+ case ARM64vecb_ADD16x8:
+ *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X100001, vN, vD);
+ break;
case ARM64vecb_SUB64x2:
*p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X100001, vN, vD);
break;
@@ -4792,6 +4818,12 @@
case ARM64vecb_SUB16x8:
*p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X100001, vN, vD);
break;
+ case ARM64vecb_MUL32x4:
+ *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X100111, vN, vD);
+ break;
+ case ARM64vecb_MUL16x8:
+ *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X100111, vN, vD);
+ break;
case ARM64vecb_FADD64x2:
*p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X110101, vN, vD);
break;
@@ -4816,6 +4848,7 @@
case ARM64vecb_FDIV32x4:
*p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X111111, vN, vD);
break;
+
case ARM64vecb_UMAX32x4:
*p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X011001, vN, vD);
break;
@@ -4828,6 +4861,20 @@
case ARM64vecb_UMIN16x8:
*p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X011011, vN, vD);
break;
+
+ case ARM64vecb_SMAX32x4:
+ *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X011001, vN, vD);
+ break;
+ case ARM64vecb_SMAX16x8:
+ *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X011001, vN, vD);
+ break;
+ case ARM64vecb_SMIN32x4:
+ *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X011011, vN, vD);
+ break;
+ case ARM64vecb_SMIN16x8:
+ *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X011011, vN, vD);
+ break;
+
case ARM64vecb_ORR:
goto bad; //ATC
*p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X000111, vN, vD);
diff --git a/priv/host_arm64_defs.h b/priv/host_arm64_defs.h
index 7b7e5bb..a152052 100644
--- a/priv/host_arm64_defs.h
+++ b/priv/host_arm64_defs.h
@@ -313,6 +313,8 @@
ARM64vecb_SUB64x2,
ARM64vecb_SUB32x4,
ARM64vecb_SUB16x8,
+ ARM64vecb_MUL32x4,
+ ARM64vecb_MUL16x8,
ARM64vecb_FADD64x2,
ARM64vecb_FSUB64x2,
ARM64vecb_FMUL64x2,
@@ -325,6 +327,10 @@
ARM64vecb_UMAX16x8,
ARM64vecb_UMIN32x4,
ARM64vecb_UMIN16x8,
+ ARM64vecb_SMAX32x4,
+ ARM64vecb_SMAX16x8,
+ ARM64vecb_SMIN32x4,
+ ARM64vecb_SMIN16x8,
ARM64vecb_AND,
ARM64vecb_ORR,
ARM64vecb_INVALID
diff --git a/priv/host_arm64_isel.c b/priv/host_arm64_isel.c
index 50bad4a..cb027f1 100644
--- a/priv/host_arm64_isel.c
+++ b/priv/host_arm64_isel.c
@@ -4871,10 +4871,18 @@
case Iop_Max16Ux8:
case Iop_Min32Ux4:
case Iop_Min16Ux8:
+ case Iop_Max32Sx4:
+ case Iop_Max16Sx8:
+ case Iop_Min32Sx4:
+ case Iop_Min16Sx8:
case Iop_Add64x2:
+ case Iop_Add32x4:
+ case Iop_Add16x8:
case Iop_Sub64x2:
case Iop_Sub32x4:
- case Iop_Sub16x8: {
+ case Iop_Sub16x8:
+ case Iop_Mul32x4:
+ case Iop_Mul16x8: {
HReg res = newVRegV(env);
HReg argL = iselV128Expr(env, e->Iex.Binop.arg1);
HReg argR = iselV128Expr(env, e->Iex.Binop.arg2);
@@ -4884,10 +4892,18 @@
case Iop_Max16Ux8: op = ARM64vecb_UMAX16x8; break;
case Iop_Min32Ux4: op = ARM64vecb_UMIN32x4; break;
case Iop_Min16Ux8: op = ARM64vecb_UMIN16x8; break;
+ case Iop_Max32Sx4: op = ARM64vecb_SMAX32x4; break;
+ case Iop_Max16Sx8: op = ARM64vecb_SMAX16x8; break;
+ case Iop_Min32Sx4: op = ARM64vecb_SMIN32x4; break;
+ case Iop_Min16Sx8: op = ARM64vecb_SMIN16x8; break;
case Iop_Add64x2: op = ARM64vecb_ADD64x2; break;
+ case Iop_Add32x4: op = ARM64vecb_ADD32x4; break;
+ case Iop_Add16x8: op = ARM64vecb_ADD16x8; break;
case Iop_Sub64x2: op = ARM64vecb_SUB64x2; break;
case Iop_Sub32x4: op = ARM64vecb_SUB32x4; break;
case Iop_Sub16x8: op = ARM64vecb_SUB16x8; break;
+ case Iop_Mul32x4: op = ARM64vecb_MUL32x4; break;
+ case Iop_Mul16x8: op = ARM64vecb_MUL16x8; break;
default: vassert(0);
}
addInstr(env, ARM64Instr_VBinV(op, res, argL, argR));
diff --git a/pub/libvex_ir.h b/pub/libvex_ir.h
index 161fc13..38a3b18 100644
--- a/pub/libvex_ir.h
+++ b/pub/libvex_ir.h
@@ -1417,8 +1417,8 @@
Iop_QRDMulHi16Sx8, Iop_QRDMulHi32Sx4,
/* Doubling saturating multiplication (long) (I64, I64) -> V128 */
Iop_QDMulLong16Sx4, Iop_QDMulLong32Sx2,
- /* Plynomial multiplication treats it's arguments as coefficients of
- polynoms over {0, 1}. */
+ /* Polynomial multiplication treats its arguments as
+ coefficients of polynomials over {0, 1}. */
Iop_PolynomialMul8x16, /* (V128, V128) -> V128 */
Iop_PolynomialMull8x8, /* (I64, I64) -> V128 */