arm64: implement pmull{2}.


git-svn-id: svn://svn.valgrind.org/vex/trunk@2888 8f6e269a-dfd6-0310-a8e1-e2731360e62c
diff --git a/priv/guest_arm64_toIR.c b/priv/guest_arm64_toIR.c
index 6b73b1a..84fac0e 100644
--- a/priv/guest_arm64_toIR.c
+++ b/priv/guest_arm64_toIR.c
@@ -5609,6 +5609,24 @@
 }
 
 
+/* Let |argL| and |argR| be V128 values, and let |opI64x2toV128| be
+   an op which takes two I64s and produces a V128.  That is, a widening
+   operator.  Generate IR which applies |opI64x2toV128| to either the
+   lower (if |is2| is False) or upper (if |is2| is True) halves of
+   |argL| and |argR|, and return the value in a new IRTemp.
+*/
+static
+IRTemp math_BINARY_WIDENING_V128 ( Bool is2, IROp opI64x2toV128,
+                                   IRExpr* argL, IRExpr* argR )
+{
+   IRTemp res   = newTemp(Ity_V128);
+   IROp   slice = is2 ? Iop_V128HIto64 : Iop_V128to64;
+   assign(res, binop(opI64x2toV128, unop(slice, argL),
+                                    unop(slice, argR)));
+   return res;
+}
+
+
 /* Let |new64| be a V128 in which only the lower 64 bits are interesting,
    and the upper can contain any value -- it is ignored.  If |is2| is False,
    generate IR to put |new64| in the lower half of vector reg |dd| and zero
@@ -6938,6 +6956,22 @@
       return True;
    }
 
+   if (bitU == 0 && opcode == BITS4(1,1,1,0)) {
+      /* -------- 0,1110  PMULL{2} -------- */
+      /* Narrows, and size refers to the narrowed lanes. */
+      if (size != X00) return False;
+      IRTemp res
+         = math_BINARY_WIDENING_V128(is2, Iop_PolynomialMull8x8,
+                                     getQReg128(nn), getQReg128(mm));
+      putQReg128(dd, mkexpr(res));
+      const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
+      const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
+      DIP("%s%s %s.%s, %s.%s, %s.%s\n", "pmull", is2 ? "2" : "",
+          nameQReg128(dd), arrNarrow,
+          nameQReg128(nn), arrWide, nameQReg128(mm), arrWide);
+      return True;
+   }
+
    return False;
 #  undef INSN
 }
diff --git a/priv/host_arm64_defs.c b/priv/host_arm64_defs.c
index 9efaed1..dbb485d 100644
--- a/priv/host_arm64_defs.c
+++ b/priv/host_arm64_defs.c
@@ -922,6 +922,7 @@
       case ARM64vecb_ZIP216x8:  *nm = "zip2";  *ar = "8h";  return;
       case ARM64vecb_ZIP28x16:  *nm = "zip2";  *ar = "16b"; return;
       case ARM64vecb_PMUL8x16:  *nm = "pmul";  *ar = "16b"; return;
+      case ARM64vecb_PMULL8x8:  *nm = "pmull"; *ar = "8hb";  return;
       default: vpanic("showARM64VecBinOp");
    }
 }
@@ -5126,6 +5127,8 @@
             010 01110 10 0 m  011110 n d   ZIP2 Vd.16b, Vn.16b, Vm.16b
 
             011 01110 00 1 m  100111 n d   PMUL Vd.16b, Vn.16b, Vm.16b
+
+            000 01110 00 1 m  111000 n d   PMULL Vd.8h, Vn.8b, Vm.8b
          */
          UInt vD = qregNo(i->ARM64in.VBinV.dst);
          UInt vN = qregNo(i->ARM64in.VBinV.argL);
@@ -5353,6 +5356,10 @@
                *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X100111, vN, vD);
                break;
 
+            case ARM64vecb_PMULL8x8:
+               *p++ = X_3_8_5_6_5_5(X000, X01110001, vM, X111000, vN, vD);
+               break;
+
             default:
                goto bad;
          }
diff --git a/priv/host_arm64_defs.h b/priv/host_arm64_defs.h
index c00bb3d..12b0980 100644
--- a/priv/host_arm64_defs.h
+++ b/priv/host_arm64_defs.h
@@ -345,6 +345,7 @@
       ARM64vecb_ZIP18x16,    ARM64vecb_ZIP232x4,
       ARM64vecb_ZIP216x8,    ARM64vecb_ZIP28x16,
                              ARM64vecb_PMUL8x16,
+                             ARM64vecb_PMULL8x8,
       ARM64vecb_INVALID
    }
    ARM64VecBinOp;
diff --git a/priv/host_arm64_isel.c b/priv/host_arm64_isel.c
index 7916ce2..d640a0d 100644
--- a/priv/host_arm64_isel.c
+++ b/priv/host_arm64_isel.c
@@ -5671,6 +5671,19 @@
             break;
          }
 
+         case Iop_PolynomialMull8x8: {
+            HReg iSrcL = iselIntExpr_R(env, e->Iex.Binop.arg1);
+            HReg iSrcR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+            HReg vSrcL = newVRegV(env);
+            HReg vSrcR = newVRegV(env);
+            HReg dst   = newVRegV(env);
+            addInstr(env, ARM64Instr_VQfromXX(vSrcL, iSrcL, iSrcL));
+            addInstr(env, ARM64Instr_VQfromXX(vSrcR, iSrcR, iSrcR));
+            addInstr(env, ARM64Instr_VBinV(ARM64vecb_PMULL8x8,
+                                           dst, vSrcL, vSrcR));
+            return dst;
+         }
+
 //ZZ          case Iop_CmpGT8Ux16:
 //ZZ          case Iop_CmpGT16Ux8:
 //ZZ          case Iop_CmpGT32Ux4: {