Implement: orr_{8h,4h}_imm8_shifted, orr_{4s,2s}_imm8_shifted,
bic_{8h,4h}_imm8_shifted, bic_{4s,2s}_imm8_shifted, cls_std6_std6,
cm{eq,ge,gt,hi,hs,tst}_d_d_d, cm{ge,gt,le,lt}_d_d_zero,
cnt_{16,8}b_{16,8}b
git-svn-id: svn://svn.valgrind.org/vex/trunk@2879 8f6e269a-dfd6-0310-a8e1-e2731360e62c
diff --git a/priv/guest_arm64_toIR.c b/priv/guest_arm64_toIR.c
index c61d4f2..63186fe 100644
--- a/priv/guest_arm64_toIR.c
+++ b/priv/guest_arm64_toIR.c
@@ -5993,6 +5993,8 @@
{
/* 31 28 18 15 11 9 4
0q op 01111 00000 abc cmode 01 defgh d
+ Decode fields: q,op,cmode
+ Bit 11 is really "o2", but it is always zero.
*/
# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
if (INSN(31,31) != 0
@@ -6006,24 +6008,71 @@
UInt abcdefgh = (INSN(18,16) << 5) | INSN(9,5);
UInt dd = INSN(4,0);
- /* -------- {FMOV,MOVI} (vector, immediate) -------- */
- /* Allowable op:cmode
- FMOV = 1:1111
- MOVI = 0:xx00, 0:0010, 1:0x00, 1:10x0, 1:110x, x:1110,
- */
ULong imm64lo = 0;
UInt op_cmode = (bitOP << 4) | cmode;
Bool ok = False;
+ Bool isORR = False;
+ Bool isBIC = False;
switch (op_cmode) {
+ /* -------- 1,1,1111 FMOV (vector, immediate) -------- */
case BITS5(1,1,1,1,1): // 1:1111
+ ok = bitQ == 1; break;
+
+ /* -------- x,0,0000 MOVI 32-bit shifted imm -------- */
+ /* -------- x,0,0100 MOVI 32-bit shifted imm -------- */
case BITS5(0,0,0,0,0): case BITS5(0,0,1,0,0): // 0:0x00
+
+ /* -------- x,0,0010 MOVI 32-bit shifted imm -------- */
case BITS5(0,0,0,1,0): // 1:0010
- case BITS5(0,1,0,0,0): case BITS5(0,1,1,0,0): // 0:xx00
+
+ /* -------- x,0,1000 MOVI 16-bit shifted imm -------- */
+ /* -------- x,0,1100 MOVI 32-bit shifting ones -------- */
+ case BITS5(0,1,0,0,0): case BITS5(0,1,1,0,0): // 0:1x00
+
+ /* -------- x,1,0000 MVNI 32-bit shifted imm -------- */
+ /* -------- x,1,0100 MVNI 32-bit shifted imm -------- */
case BITS5(1,0,0,0,0): case BITS5(1,0,1,0,0): // 1:0x00
+
+ /* -------- x,1,1000 MVNI 16-bit shifted imm -------- */
+ /* -------- x,1,1010 MVNI 16-bit shifted imm -------- */
case BITS5(1,1,0,0,0): case BITS5(1,1,0,1,0): // 1:10x0
+
+ /* -------- x,1,1100 MVNI 32-bit shifting ones -------- */
+ /* -------- x,1,1101 MVNI 32-bit shifting ones -------- */
case BITS5(1,1,1,0,0): case BITS5(1,1,1,0,1): // 1:110x
+
+ /* -------- 0,1,1110 MOVI 64-bit scalar -------- */
+ /* -------- 1,1,1110 MOVI 64-bit vector -------- */
+ /* -------- x,0,1110 MOVI 8-bit -------- */
case BITS5(1,1,1,1,0): case BITS5(0,1,1,1,0): // x:1110
ok = True; break;
+
+ /* -------- x,0,1001 ORR (vector, immediate) 16-bit -------- */
+ /* -------- x,0,1011 ORR (vector, immediate) 16-bit -------- */
+ case BITS5(0,1,0,0,1): case BITS5(0,1,0,1,1): // 0:10x1
+ ok = True; isORR = True; break;
+
+ /* -------- x,1,1001 BIC (vector, immediate) 16-bit -------- */
+ /* -------- x,1,1011 BIC (vector, immediate) 16-bit -------- */
+ case BITS5(1,1,0,0,1): case BITS5(1,1,0,1,1): // 1:10x1
+ ok = True; isBIC = True; break;
+
+ /* -------- x,0,0001 ORR (vector, immediate) 32-bit -------- */
+ /* -------- x,0,0011 ORR (vector, immediate) 32-bit -------- */
+ /* -------- x,0,0101 ORR (vector, immediate) 32-bit -------- */
+ /* -------- x,0,0111 ORR (vector, immediate) 32-bit -------- */
+ case BITS5(0,0,0,0,1): case BITS5(0,0,0,1,1):
+ case BITS5(0,0,1,0,1): case BITS5(0,0,1,1,1): // 0:0xx1
+ ok = True; isORR = True; break;
+
+ /* -------- x,1,0001 BIC (vector, immediate) 32-bit -------- */
+ /* -------- x,1,0011 BIC (vector, immediate) 32-bit -------- */
+ /* -------- x,1,0101 BIC (vector, immediate) 32-bit -------- */
+ /* -------- x,1,0111 BIC (vector, immediate) 32-bit -------- */
+ case BITS5(1,0,0,0,1): case BITS5(1,0,0,1,1):
+ case BITS5(1,0,1,0,1): case BITS5(1,0,1,1,1): // 1:0xx1
+ ok = True; isBIC = True; break;
+
default:
break;
}
@@ -6031,9 +6080,29 @@
ok = AdvSIMDExpandImm(&imm64lo, bitOP, cmode, abcdefgh);
}
if (ok) {
- ULong imm64hi = (bitQ == 0 && bitOP == 0) ? 0 : imm64lo;
- putQReg128(dd, binop(Iop_64HLtoV128, mkU64(imm64hi), mkU64(imm64lo)));
- DIP("mov %s, #0x%016llx'%016llx\n", nameQReg128(dd), imm64hi, imm64lo);
+ if (isORR || isBIC) {
+ ULong inv
+ = isORR ? 0ULL : ~0ULL;
+ IRExpr* immV128
+ = binop(Iop_64HLtoV128, mkU64(inv ^ imm64lo), mkU64(inv ^ imm64lo));
+ IRExpr* res
+ = binop(isORR ? Iop_OrV128 : Iop_AndV128, getQReg128(dd), immV128);
+ putQReg128(dd, bitQ == 0 ? unop(Iop_ZeroHI64ofV128, res) : res);
+ const HChar* nm = isORR ? "orr" : "bic";
+ if (bitQ == 0) {
+ putQReg128(dd, unop(Iop_ZeroHI64ofV128, res));
+ DIP("%s %s.1d, %016llx\n", nm, nameQReg128(dd), imm64lo);
+ } else {
+ putQReg128(dd, res);
+ DIP("%s %s.2d, #0x%016llx'%016llx\n", nm,
+ nameQReg128(dd), imm64lo, imm64lo);
+ }
+ } else {
+ ULong imm64hi = (bitQ == 0 && bitOP == 0) ? 0 : imm64lo;
+ IRExpr* immV128 = binop(Iop_64HLtoV128, mkU64(imm64hi), mkU64(imm64lo));
+ putQReg128(dd, immV128);
+ DIP("mov %s, #0x%016llx'%016llx\n", nameQReg128(dd), imm64hi, imm64lo);
+ }
return True;
}
/* else fall through */
@@ -6168,6 +6237,40 @@
UInt dd = INSN(4,0);
vassert(size < 4);
+ if (size == X11 && opcode == BITS5(0,0,1,1,0)) {
+ /* -------- 0,11,00110 CMGT d_d_d -------- */ // >s
+ /* -------- 1,11,00110 CMHI d_d_d -------- */ // >u
+ Bool isGT = bitU == 0;
+ IRExpr* argL = getQReg128(nn);
+ IRExpr* argR = getQReg128(mm);
+ IRTemp res = newTemp(Ity_V128);
+ assign(res,
+ isGT ? binop(Iop_CmpGT64Sx2, argL, argR)
+ : binop(Iop_CmpGT64Ux2, argL, argR));
+ putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
+ DIP("%s %s, %s, %s\n",isGT ? "cmgt" : "cmhi",
+ nameQRegLO(dd, Ity_I64),
+ nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
+ return True;
+ }
+
+ if (size == X11 && opcode == BITS5(0,0,1,1,1)) {
+ /* -------- 0,11,00111 CMGE d_d_d -------- */ // >=s
+ /* -------- 1,11,00111 CMHS d_d_d -------- */ // >=u
+ Bool isGE = bitU == 0;
+ IRExpr* argL = getQReg128(nn);
+ IRExpr* argR = getQReg128(mm);
+ IRTemp res = newTemp(Ity_V128);
+ assign(res,
+ isGE ? unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL))
+ : unop(Iop_NotV128, binop(Iop_CmpGT64Ux2, argR, argL)));
+ putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
+ DIP("%s %s, %s, %s\n", isGE ? "cmge" : "cmhs",
+ nameQRegLO(dd, Ity_I64),
+ nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
+ return True;
+ }
+
if (size == X11 && opcode == BITS5(1,0,0,0,0)) {
/* -------- 0,11,10000 ADD d_d_d -------- */
/* -------- 1,11,10000 SUB d_d_d -------- */
@@ -6184,6 +6287,25 @@
return True;
}
+ if (size == X11 && opcode == BITS5(1,0,0,0,1)) {
+ /* -------- 0,11,10001 CMTST d_d_d -------- */ // &, != 0
+ /* -------- 1,11,10001 CMEQ d_d_d -------- */ // ==
+ Bool isEQ = bitU == 1;
+ IRExpr* argL = getQReg128(nn);
+ IRExpr* argR = getQReg128(mm);
+ IRTemp res = newTemp(Ity_V128);
+ assign(res,
+ isEQ ? binop(Iop_CmpEQ64x2, argL, argR)
+ : unop(Iop_NotV128, binop(Iop_CmpEQ64x2,
+ binop(Iop_AndV128, argL, argR),
+ mkV128(0x0000))));
+ putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
+ DIP("%s %s, %s, %s\n", isEQ ? "cmeq" : "cmtst",
+ nameQRegLO(dd, Ity_I64),
+ nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
+ return True;
+ }
+
if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) {
/* -------- 1,1x,11010 FABD d_d_d, s_s_s -------- */
IRType ity = size == X11 ? Ity_F64 : Ity_F32;
@@ -6226,12 +6348,41 @@
UInt dd = INSN(4,0);
vassert(size < 4);
- if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,0,1)) {
- /* -------- 0,11,01001 CMEQ d_d_#0 -------- */
+ if (size == X11 && opcode == BITS5(0,1,0,0,0)) {
+ /* -------- 0,11,01000: CMGT d_d_#0 -------- */ // >s 0
+ /* -------- 1,11,01000: CMGE d_d_#0 -------- */ // >=s 0
+ Bool isGT = bitU == 0;
+ IRExpr* argL = getQReg128(nn);
+ IRExpr* argR = mkV128(0x0000);
+ IRTemp res = newTemp(Ity_V128);
+ assign(res, isGT ? binop(Iop_CmpGT64Sx2, argL, argR)
+ : unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL)));
+ putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
+ DIP("cm%s d%u, d%u, #0\n", isGT ? "gt" : "ge", dd, nn);
+ return True;
+ }
+
+ if (size == X11 && opcode == BITS5(0,1,0,0,1)) {
+ /* -------- 0,11,01001: CMEQ d_d_#0 -------- */ // == 0
+ /* -------- 1,11,01001: CMLE d_d_#0 -------- */ // <=s 0
+ Bool isEQ = bitU == 0;
+ IRExpr* argL = getQReg128(nn);
+ IRExpr* argR = mkV128(0x0000);
+ IRTemp res = newTemp(Ity_V128);
+ assign(res, isEQ ? binop(Iop_CmpEQ64x2, argL, argR)
+ : unop(Iop_NotV128,
+ binop(Iop_CmpGT64Sx2, argL, argR)));
+ putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
+ DIP("cm%s d%u, d%u, #0\n", isEQ ? "eq" : "le", dd, nn);
+ return True;
+ }
+
+ if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,0)) {
+ /* -------- 0,11,01010: CMLT d_d_#0 -------- */ // <s 0
putQReg128(dd, unop(Iop_ZeroHI64ofV128,
- binop(Iop_CmpEQ64x2, getQReg128(nn),
- mkV128(0x0000))));
- DIP("cmeq d%u, d%u, #0\n", dd, nn);
+ binop(Iop_CmpGT64Sx2, mkV128(0x0000),
+ getQReg128(nn))));
+ DIP("cm%s d%u, d%u, #0\n", "lt", dd, nn);
return True;
}
@@ -7002,6 +7153,36 @@
UInt dd = INSN(4,0);
vassert(size < 4);
+ if (opcode == BITS5(0,0,1,0,0)) {
+ /* -------- 0,xx,00100: CLS std6_std6 -------- */
+ /* -------- 1,xx,00100: CLZ std6_std6 -------- */
+ if (size == X11) return False; // no 1d or 2d cases
+ const IROp opsCLS[3] = { Iop_Cls8Sx16, Iop_Cls16Sx8, Iop_Cls32Sx4 };
+ const IROp opsCLZ[3] = { Iop_Clz8Sx16, Iop_Clz16Sx8, Iop_Clz32Sx4 };
+ Bool isCLZ = bitU == 1;
+ IRTemp res = newTemp(Ity_V128);
+ vassert(size <= 2);
+ assign(res, unop(isCLZ ? opsCLZ[size] : opsCLS[size], getQReg128(nn)));
+ putQReg128(dd, bitQ == 0 ? unop(Iop_ZeroHI64ofV128, mkexpr(res))
+ : mkexpr(res));
+ const HChar* arr = nameArr_Q_SZ(bitQ, size);
+ DIP("%s %s.%s, %s.%s\n", isCLZ ? "clz" : "cls",
+ nameQReg128(dd), arr, nameQReg128(nn), arr);
+ return True;
+ }
+
+ if (bitU == 0 && size == X00 && opcode == BITS5(0,0,1,0,1)) {
+ /* -------- 0,00,00101: CNT 16b_16b, 8b_8b -------- */
+ IRTemp res = newTemp(Ity_V128);
+ assign(res, unop(Iop_Cnt8x16, getQReg128(nn)));
+ putQReg128(dd, bitQ == 0 ? unop(Iop_ZeroHI64ofV128, mkexpr(res))
+ : mkexpr(res));
+ const HChar* arr = nameArr_Q_SZ(bitQ, size);
+ DIP("%s %s.%s, %s.%s\n", "cnt",
+ nameQReg128(dd), arr, nameQReg128(nn), arr);
+ return True;
+ }
+
if (opcode == BITS5(0,1,0,0,0)) {
/* -------- 0,xx,01000: CMGT std7_std7_#0 -------- */ // >s 0
/* -------- 1,xx,01000: CMGE std7_std7_#0 -------- */ // >=s 0
diff --git a/priv/host_arm64_defs.c b/priv/host_arm64_defs.c
index 5c5988a..d744539 100644
--- a/priv/host_arm64_defs.c
+++ b/priv/host_arm64_defs.c
@@ -929,11 +929,18 @@
case ARM64vecu_FNEG32x4: *nm = "fneg "; *ar = "4s"; return;
case ARM64vecu_FABS64x2: *nm = "fabs "; *ar = "2d"; return;
case ARM64vecu_FABS32x4: *nm = "fabs "; *ar = "4s"; return;
- case ARM64vecu_ABS64x2: *nm = "abs"; *ar = "2d"; return;
- case ARM64vecu_ABS32x4: *nm = "abs"; *ar = "4s"; return;
- case ARM64vecu_ABS16x8: *nm = "abs"; *ar = "8h"; return;
- case ARM64vecu_ABS8x16: *nm = "abs"; *ar = "16b"; return;
case ARM64vecu_NOT: *nm = "not "; *ar = "all"; return;
+ case ARM64vecu_ABS64x2: *nm = "abs "; *ar = "2d"; return;
+ case ARM64vecu_ABS32x4: *nm = "abs "; *ar = "4s"; return;
+ case ARM64vecu_ABS16x8: *nm = "abs "; *ar = "8h"; return;
+ case ARM64vecu_ABS8x16: *nm = "abs "; *ar = "16b"; return;
+ case ARM64vecu_CLS32x4: *nm = "cls "; *ar = "4s"; return;
+ case ARM64vecu_CLS16x8: *nm = "cls "; *ar = "8h"; return;
+ case ARM64vecu_CLS8x16: *nm = "cls "; *ar = "16b"; return;
+ case ARM64vecu_CLZ32x4: *nm = "clz "; *ar = "4s"; return;
+ case ARM64vecu_CLZ16x8: *nm = "clz "; *ar = "8h"; return;
+ case ARM64vecu_CLZ8x16: *nm = "clz "; *ar = "16b"; return;
+ case ARM64vecu_CNT8x16: *nm = "cnt "; *ar = "16b"; return;
default: vpanic("showARM64VecUnaryOp");
}
}
@@ -3413,6 +3420,7 @@
#define X001111 BITS8(0,0, 0,0,1,1,1,1)
#define X010000 BITS8(0,0, 0,1,0,0,0,0)
#define X010001 BITS8(0,0, 0,1,0,0,0,1)
+#define X010010 BITS8(0,0, 0,1,0,0,1,0)
#define X010101 BITS8(0,0, 0,1,0,1,0,1)
#define X010110 BITS8(0,0, 0,1,0,1,1,0)
#define X011001 BITS8(0,0, 0,1,1,0,0,1)
@@ -5319,6 +5327,16 @@
010 01110 10 1 00000 101110 n d ABS Vd.4s, Vn.4s
010 01110 01 1 00000 101110 n d ABS Vd.8h, Vn.8h
010 01110 00 1 00000 101110 n d ABS Vd.16b, Vn.16b
+
+ 010 01110 10 1 00000 010010 n d CLS Vd.4s, Vn.4s
+ 010 01110 01 1 00000 010010 n d CLS Vd.8h, Vn.8h
+ 010 01110 00 1 00000 010010 n d CLS Vd.16b, Vn.16b
+
+ 011 01110 10 1 00000 010010 n d CLZ Vd.4s, Vn.4s
+ 011 01110 01 1 00000 010010 n d CLZ Vd.8h, Vn.8h
+ 011 01110 00 1 00000 010010 n d CLZ Vd.16b, Vn.16b
+
+ 010 01110 00 1 00000 010110 n d CNT Vd.16b, Vn.16b
*/
UInt vD = qregNo(i->ARM64in.VUnaryV.dst);
UInt vN = qregNo(i->ARM64in.VUnaryV.arg);
@@ -5350,6 +5368,27 @@
case ARM64vecu_ABS8x16:
*p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X101110, vN, vD);
break;
+ case ARM64vecu_CLS32x4:
+ *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X010010, vN, vD);
+ break;
+ case ARM64vecu_CLS16x8:
+ *p++ = X_3_8_5_6_5_5(X010, X01110011, X00000, X010010, vN, vD);
+ break;
+ case ARM64vecu_CLS8x16:
+ *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X010010, vN, vD);
+ break;
+ case ARM64vecu_CLZ32x4:
+ *p++ = X_3_8_5_6_5_5(X011, X01110101, X00000, X010010, vN, vD);
+ break;
+ case ARM64vecu_CLZ16x8:
+ *p++ = X_3_8_5_6_5_5(X011, X01110011, X00000, X010010, vN, vD);
+ break;
+ case ARM64vecu_CLZ8x16:
+ *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X010010, vN, vD);
+ break;
+ case ARM64vecu_CNT8x16:
+ *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X010110, vN, vD);
+ break;
default:
goto bad;
}
diff --git a/priv/host_arm64_defs.h b/priv/host_arm64_defs.h
index 38b2910..9b8491e 100644
--- a/priv/host_arm64_defs.h
+++ b/priv/host_arm64_defs.h
@@ -356,6 +356,9 @@
ARM64vecu_NOT,
ARM64vecu_ABS64x2, ARM64vecu_ABS32x4,
ARM64vecu_ABS16x8, ARM64vecu_ABS8x16,
+ ARM64vecu_CLS32x4, ARM64vecu_CLS16x8, ARM64vecu_CLS8x16,
+ ARM64vecu_CLZ32x4, ARM64vecu_CLZ16x8, ARM64vecu_CLZ8x16,
+ ARM64vecu_CNT8x16,
ARM64vecu_INVALID
}
ARM64VecUnaryOp;
diff --git a/priv/host_arm64_isel.c b/priv/host_arm64_isel.c
index d12c72d..9aa0337 100644
--- a/priv/host_arm64_isel.c
+++ b/priv/host_arm64_isel.c
@@ -4410,14 +4410,13 @@
/* Other cases */
switch (e->Iex.Unop.op) {
case Iop_NotV128:
- case Iop_Abs64Fx2:
- case Iop_Abs32Fx4:
- case Iop_Neg64Fx2:
- case Iop_Neg32Fx4:
- case Iop_Abs64x2:
- case Iop_Abs32x4:
- case Iop_Abs16x8:
- case Iop_Abs8x16:
+ case Iop_Abs64Fx2: case Iop_Abs32Fx4:
+ case Iop_Neg64Fx2: case Iop_Neg32Fx4:
+ case Iop_Abs64x2: case Iop_Abs32x4:
+ case Iop_Abs16x8: case Iop_Abs8x16:
+ case Iop_Cls32Sx4: case Iop_Cls16Sx8: case Iop_Cls8Sx16:
+ case Iop_Clz32Sx4: case Iop_Clz16Sx8: case Iop_Clz8Sx16:
+ case Iop_Cnt8x16:
{
HReg res = newVRegV(env);
HReg arg = iselV128Expr(env, e->Iex.Unop.arg);
@@ -4432,6 +4431,13 @@
case Iop_Abs32x4: op = ARM64vecu_ABS32x4; break;
case Iop_Abs16x8: op = ARM64vecu_ABS16x8; break;
case Iop_Abs8x16: op = ARM64vecu_ABS8x16; break;
+ case Iop_Cls32Sx4: op = ARM64vecu_CLS32x4; break;
+ case Iop_Cls16Sx8: op = ARM64vecu_CLS16x8; break;
+ case Iop_Cls8Sx16: op = ARM64vecu_CLS8x16; break;
+ case Iop_Clz32Sx4: op = ARM64vecu_CLZ32x4; break;
+ case Iop_Clz16Sx8: op = ARM64vecu_CLZ16x8; break;
+ case Iop_Clz8Sx16: op = ARM64vecu_CLZ8x16; break;
+ case Iop_Cnt8x16: op = ARM64vecu_CNT8x16; break;
default: vassert(0);
}
addInstr(env, ARM64Instr_VUnaryV(op, res, arg));