blob: 892b43b3be75fc065d125bab987ebc667bf77e6d [file] [log] [blame]
/*--------------------------------------------------------------------*/
/*--- Instrument IR to perform memory checking operations. ---*/
/*--- mc_translate.c ---*/
/*--------------------------------------------------------------------*/
/*
This file is part of MemCheck, a heavyweight Valgrind tool for
detecting memory errors.
Copyright (C) 2000-2013 Julian Seward
jseward@acm.org
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307, USA.
The GNU General Public License is contained in the file COPYING.
*/
#include "pub_tool_basics.h"
#include "pub_tool_poolalloc.h" // For mc_include.h
#include "pub_tool_hashtable.h" // For mc_include.h
#include "pub_tool_libcassert.h"
#include "pub_tool_libcprint.h"
#include "pub_tool_tooliface.h"
#include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
#include "pub_tool_xarray.h"
#include "pub_tool_mallocfree.h"
#include "pub_tool_libcbase.h"
#include "mc_include.h"
/* FIXMEs JRS 2011-June-16.
Check the interpretation for vector narrowing and widening ops,
particularly the saturating ones. I suspect they are either overly
pessimistic and/or wrong.
Iop_QandSQsh64x2 and friends (vector-by-vector bidirectional
saturating shifts): the interpretation is overly pessimistic.
See comments on the relevant cases below for details.
Iop_Sh64Sx2 and friends (vector-by-vector bidirectional shifts,
both rounding and non-rounding variants): ditto
*/
/* This file implements the Memcheck instrumentation, and in
particular contains the core of its undefined value detection
machinery. For a comprehensive background of the terminology,
algorithms and rationale used herein, read:
Using Valgrind to detect undefined value errors with
bit-precision
Julian Seward and Nicholas Nethercote
2005 USENIX Annual Technical Conference (General Track),
Anaheim, CA, USA, April 10-15, 2005.
----
Here is as good a place as any to record exactly when V bits are and
should be checked, why, and what function is responsible.
Memcheck complains when an undefined value is used:
1. In the condition of a conditional branch. Because it could cause
incorrect control flow, and thus cause incorrect externally-visible
behaviour. [mc_translate.c:complainIfUndefined]
2. As an argument to a system call, or as the value that specifies
the system call number. Because it could cause an incorrect
externally-visible side effect. [mc_translate.c:mc_pre_reg_read]
3. As the address in a load or store. Because it could cause an
incorrect value to be used later, which could cause externally-visible
behaviour (eg. via incorrect control flow or an incorrect system call
argument) [complainIfUndefined]
4. As the target address of a branch. Because it could cause incorrect
control flow. [complainIfUndefined]
5. As an argument to setenv, unsetenv, or putenv. Because it could put
an incorrect value into the external environment.
[mc_replace_strmem.c:VG_WRAP_FUNCTION_ZU(*, *env)]
6. As the index in a GETI or PUTI operation. I'm not sure why... (njn).
[complainIfUndefined]
7. As an argument to the VALGRIND_CHECK_MEM_IS_DEFINED and
VALGRIND_CHECK_VALUE_IS_DEFINED client requests. Because the user
requested it. [in memcheck.h]
Memcheck also complains, but should not, when an undefined value is used:
8. As the shift value in certain SIMD shift operations (but not in the
standard integer shift operations). This inconsistency is due to
historical reasons.) [complainIfUndefined]
Memcheck does not complain, but should, when an undefined value is used:
9. As an input to a client request. Because the client request may
affect the visible behaviour -- see bug #144362 for an example
involving the malloc replacements in vg_replace_malloc.c and
VALGRIND_NON_SIMD_CALL* requests, where an uninitialised argument
isn't identified. That bug report also has some info on how to solve
the problem. [valgrind.h:VALGRIND_DO_CLIENT_REQUEST]
In practice, 1 and 2 account for the vast majority of cases.
*/
/* Generation of addr-definedness, addr-validity and
guard-definedness checks pertaining to loads and stores (Iex_Load,
Ist_Store, IRLoadG, IRStoreG, LLSC, CAS and Dirty memory
loads/stores) was re-checked 11 May 2013. */
/*------------------------------------------------------------*/
/*--- Forward decls ---*/
/*------------------------------------------------------------*/
struct _MCEnv;
static IRType shadowTypeV ( IRType ty );
static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
static IRTemp findShadowTmpB ( struct _MCEnv* mce, IRTemp orig );
static IRExpr *i128_const_zero(void);
/*------------------------------------------------------------*/
/*--- Memcheck running state, and tmp management. ---*/
/*------------------------------------------------------------*/
/* Carries info about a particular tmp. The tmp's number is not
recorded, as this is implied by (equal to) its index in the tmpMap
in MCEnv. The tmp's type is also not recorded, as this is present
in MCEnv.sb->tyenv.
When .kind is Orig, .shadowV and .shadowB may give the identities
of the temps currently holding the associated definedness (shadowV)
and origin (shadowB) values, or these may be IRTemp_INVALID if code
to compute such values has not yet been emitted.
When .kind is VSh or BSh then the tmp is holds a V- or B- value,
and so .shadowV and .shadowB must be IRTemp_INVALID, since it is
illogical for a shadow tmp itself to be shadowed.
*/
typedef
enum { Orig=1, VSh=2, BSh=3 }
TempKind;
typedef
struct {
TempKind kind;
IRTemp shadowV;
IRTemp shadowB;
}
TempMapEnt;
/* Carries around state during memcheck instrumentation. */
typedef
struct _MCEnv {
/* MODIFIED: the superblock being constructed. IRStmts are
added. */
IRSB* sb;
Bool trace;
/* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the
current kind and possibly shadow temps for each temp in the
IRSB being constructed. Note that it does not contain the
type of each tmp. If you want to know the type, look at the
relevant entry in sb->tyenv. It follows that at all times
during the instrumentation process, the valid indices for
tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is
total number of Orig, V- and B- temps allocated so far.
The reason for this strange split (types in one place, all
other info in another) is that we need the types to be
attached to sb so as to make it possible to do
"typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the
instrumentation process. */
XArray* /* of TempMapEnt */ tmpMap;
/* MODIFIED: indicates whether "bogus" literals have so far been
found. Starts off False, and may change to True. */
Bool bogusLiterals;
/* READONLY: indicates whether we should use expensive
interpretations of integer adds, since unfortunately LLVM
uses them to do ORs in some circumstances. Defaulted to True
on MacOS and False everywhere else. */
Bool useLLVMworkarounds;
/* READONLY: the guest layout. This indicates which parts of
the guest state should be regarded as 'always defined'. */
const VexGuestLayout* layout;
/* READONLY: the host word type. Needed for constructing
arguments of type 'HWord' to be passed to helper functions.
Ity_I32 or Ity_I64 only. */
IRType hWordTy;
}
MCEnv;
/* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on
demand), as they are encountered. This is for two reasons.
(1) (less important reason): Many original tmps are unused due to
initial IR optimisation, and we do not want to spaces in tables
tracking them.
Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a
table indexed [0 .. n_types-1], which gives the current shadow for
each original tmp, or INVALID_IRTEMP if none is so far assigned.
It is necessary to support making multiple assignments to a shadow
-- specifically, after testing a shadow for definedness, it needs
to be made defined. But IR's SSA property disallows this.
(2) (more important reason): Therefore, when a shadow needs to get
a new value, a new temporary is created, the value is assigned to
that, and the tmpMap is updated to reflect the new binding.
A corollary is that if the tmpMap maps a given tmp to
IRTemp_INVALID and we are hoping to read that shadow tmp, it means
there's a read-before-write error in the original tmps. The IR
sanity checker should catch all such anomalies, however.
*/
/* Create a new IRTemp of type 'ty' and kind 'kind', and add it to
both the table in mce->sb and to our auxiliary mapping. Note that
newTemp may cause mce->tmpMap to resize, hence previous results
from VG_(indexXA)(mce->tmpMap) are invalidated. */
static IRTemp newTemp ( MCEnv* mce, IRType ty, TempKind kind )
{
Word newIx;
TempMapEnt ent;
IRTemp tmp = newIRTemp(mce->sb->tyenv, ty);
ent.kind = kind;
ent.shadowV = IRTemp_INVALID;
ent.shadowB = IRTemp_INVALID;
newIx = VG_(addToXA)( mce->tmpMap, &ent );
tl_assert(newIx == (Word)tmp);
return tmp;
}
/* Find the tmp currently shadowing the given original tmp. If none
so far exists, allocate one. */
static IRTemp findShadowTmpV ( MCEnv* mce, IRTemp orig )
{
TempMapEnt* ent;
/* VG_(indexXA) range-checks 'orig', hence no need to check
here. */
ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
tl_assert(ent->kind == Orig);
if (ent->shadowV == IRTemp_INVALID) {
IRTemp tmpV
= newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
/* newTemp may cause mce->tmpMap to resize, hence previous results
from VG_(indexXA) are invalid. */
ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
tl_assert(ent->kind == Orig);
tl_assert(ent->shadowV == IRTemp_INVALID);
ent->shadowV = tmpV;
}
return ent->shadowV;
}
/* Allocate a new shadow for the given original tmp. This means any
previous shadow is abandoned. This is needed because it is
necessary to give a new value to a shadow once it has been tested
for undefinedness, but unfortunately IR's SSA property disallows
this. Instead we must abandon the old shadow, allocate a new one
and use that instead.
This is the same as findShadowTmpV, except we don't bother to see
if a shadow temp already existed -- we simply allocate a new one
regardless. */
static void newShadowTmpV ( MCEnv* mce, IRTemp orig )
{
TempMapEnt* ent;
/* VG_(indexXA) range-checks 'orig', hence no need to check
here. */
ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
tl_assert(ent->kind == Orig);
if (1) {
IRTemp tmpV
= newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
/* newTemp may cause mce->tmpMap to resize, hence previous results
from VG_(indexXA) are invalid. */
ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
tl_assert(ent->kind == Orig);
ent->shadowV = tmpV;
}
}
/*------------------------------------------------------------*/
/*--- IRAtoms -- a subset of IRExprs ---*/
/*------------------------------------------------------------*/
/* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
isIRAtom() in libvex_ir.h. Because this instrumenter expects flat
input, most of this code deals in atoms. Usefully, a value atom
always has a V-value which is also an atom: constants are shadowed
by constants, and temps are shadowed by the corresponding shadow
temporary. */
typedef IRExpr IRAtom;
/* (used for sanity checks only): is this an atom which looks
like it's from original code? */
static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
{
if (a1->tag == Iex_Const)
return True;
if (a1->tag == Iex_RdTmp) {
TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
return ent->kind == Orig;
}
return False;
}
/* (used for sanity checks only): is this an atom which looks
like it's from shadow code? */
static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
{
if (a1->tag == Iex_Const)
return True;
if (a1->tag == Iex_RdTmp) {
TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
return ent->kind == VSh || ent->kind == BSh;
}
return False;
}
/* (used for sanity checks only): check that both args are atoms and
are identically-kinded. */
static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
{
if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp)
return True;
if (a1->tag == Iex_Const && a2->tag == Iex_Const)
return True;
return False;
}
/*------------------------------------------------------------*/
/*--- Type management ---*/
/*------------------------------------------------------------*/
/* Shadow state is always accessed using integer types. This returns
an integer type with the same size (as per sizeofIRType) as the
given type. The only valid shadow types are Bit, I8, I16, I32,
I64, I128, V128, V256. */
static IRType shadowTypeV ( IRType ty )
{
switch (ty) {
case Ity_I1:
case Ity_I8:
case Ity_I16:
case Ity_I32:
case Ity_I64:
case Ity_I128: return ty;
case Ity_F16: return Ity_I16;
case Ity_F32: return Ity_I32;
case Ity_D32: return Ity_I32;
case Ity_F64: return Ity_I64;
case Ity_D64: return Ity_I64;
case Ity_F128: return Ity_I128;
case Ity_D128: return Ity_I128;
case Ity_V128: return Ity_V128;
case Ity_V256: return Ity_V256;
default: ppIRType(ty);
VG_(tool_panic)("memcheck:shadowTypeV");
}
}
/* Produce a 'defined' value of the given shadow type. Should only be
supplied shadow types (Bit/I8/I16/I32/UI64). */
static IRExpr* definedOfType ( IRType ty ) {
switch (ty) {
case Ity_I1: return IRExpr_Const(IRConst_U1(False));
case Ity_I8: return IRExpr_Const(IRConst_U8(0));
case Ity_I16: return IRExpr_Const(IRConst_U16(0));
case Ity_I32: return IRExpr_Const(IRConst_U32(0));
case Ity_I64: return IRExpr_Const(IRConst_U64(0));
case Ity_I128: return i128_const_zero();
case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
case Ity_V256: return IRExpr_Const(IRConst_V256(0x00000000));
default: VG_(tool_panic)("memcheck:definedOfType");
}
}
/*------------------------------------------------------------*/
/*--- Constructing IR fragments ---*/
/*------------------------------------------------------------*/
/* add stmt to a bb */
static inline void stmt ( HChar cat, MCEnv* mce, IRStmt* st ) {
if (mce->trace) {
VG_(printf)(" %c: ", cat);
ppIRStmt(st);
VG_(printf)("\n");
}
addStmtToIRSB(mce->sb, st);
}
/* assign value to tmp */
static inline
void assign ( HChar cat, MCEnv* mce, IRTemp tmp, IRExpr* expr ) {
stmt(cat, mce, IRStmt_WrTmp(tmp,expr));
}
/* build various kinds of expressions */
#define triop(_op, _arg1, _arg2, _arg3) \
IRExpr_Triop((_op),(_arg1),(_arg2),(_arg3))
#define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
#define unop(_op, _arg) IRExpr_Unop((_op),(_arg))
#define mkU1(_n) IRExpr_Const(IRConst_U1(_n))
#define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
#define mkU16(_n) IRExpr_Const(IRConst_U16(_n))
#define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
#define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
#define mkV128(_n) IRExpr_Const(IRConst_V128(_n))
#define mkexpr(_tmp) IRExpr_RdTmp((_tmp))
/* Bind the given expression to a new temporary, and return the
temporary. This effectively converts an arbitrary expression into
an atom.
'ty' is the type of 'e' and hence the type that the new temporary
needs to be. But passing it in is redundant, since we can deduce
the type merely by inspecting 'e'. So at least use that fact to
assert that the two types agree. */
static IRAtom* assignNew ( HChar cat, MCEnv* mce, IRType ty, IRExpr* e )
{
TempKind k;
IRTemp t;
IRType tyE = typeOfIRExpr(mce->sb->tyenv, e);
tl_assert(tyE == ty); /* so 'ty' is redundant (!) */
switch (cat) {
case 'V': k = VSh; break;
case 'B': k = BSh; break;
case 'C': k = Orig; break;
/* happens when we are making up new "orig"
expressions, for IRCAS handling */
default: tl_assert(0);
}
t = newTemp(mce, ty, k);
assign(cat, mce, t, e);
return mkexpr(t);
}
/*------------------------------------------------------------*/
/*--- Helper functions for 128-bit ops ---*/
/*------------------------------------------------------------*/
static IRExpr *i128_const_zero(void)
{
IRAtom* z64 = IRExpr_Const(IRConst_U64(0));
return binop(Iop_64HLto128, z64, z64);
}
/* There are no I128-bit loads and/or stores [as generated by any
current front ends]. So we do not need to worry about that in
expr2vbits_Load */
/*------------------------------------------------------------*/
/*--- Constructing definedness primitive ops ---*/
/*------------------------------------------------------------*/
/* --------- Defined-if-either-defined --------- */
static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
tl_assert(isShadowAtom(mce,a1));
tl_assert(isShadowAtom(mce,a2));
return assignNew('V', mce, Ity_I8, binop(Iop_And8, a1, a2));
}
static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
tl_assert(isShadowAtom(mce,a1));
tl_assert(isShadowAtom(mce,a2));
return assignNew('V', mce, Ity_I16, binop(Iop_And16, a1, a2));
}
static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
tl_assert(isShadowAtom(mce,a1));
tl_assert(isShadowAtom(mce,a2));
return assignNew('V', mce, Ity_I32, binop(Iop_And32, a1, a2));
}
static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
tl_assert(isShadowAtom(mce,a1));
tl_assert(isShadowAtom(mce,a2));
return assignNew('V', mce, Ity_I64, binop(Iop_And64, a1, a2));
}
static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
tl_assert(isShadowAtom(mce,a1));
tl_assert(isShadowAtom(mce,a2));
return assignNew('V', mce, Ity_V128, binop(Iop_AndV128, a1, a2));
}
static IRAtom* mkDifDV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
tl_assert(isShadowAtom(mce,a1));
tl_assert(isShadowAtom(mce,a2));
return assignNew('V', mce, Ity_V256, binop(Iop_AndV256, a1, a2));
}
/* --------- Undefined-if-either-undefined --------- */
static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
tl_assert(isShadowAtom(mce,a1));
tl_assert(isShadowAtom(mce,a2));
return assignNew('V', mce, Ity_I8, binop(Iop_Or8, a1, a2));
}
static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
tl_assert(isShadowAtom(mce,a1));
tl_assert(isShadowAtom(mce,a2));
return assignNew('V', mce, Ity_I16, binop(Iop_Or16, a1, a2));
}
static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
tl_assert(isShadowAtom(mce,a1));
tl_assert(isShadowAtom(mce,a2));
return assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, a2));
}
static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
tl_assert(isShadowAtom(mce,a1));
tl_assert(isShadowAtom(mce,a2));
return assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, a2));
}
static IRAtom* mkUifU128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
IRAtom *tmp1, *tmp2, *tmp3, *tmp4, *tmp5, *tmp6;
tl_assert(isShadowAtom(mce,a1));
tl_assert(isShadowAtom(mce,a2));
tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a1));
tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a1));
tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a2));
tmp4 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a2));
tmp5 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp1, tmp3));
tmp6 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp4));
return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp6, tmp5));
}
static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
tl_assert(isShadowAtom(mce,a1));
tl_assert(isShadowAtom(mce,a2));
return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, a1, a2));
}
static IRAtom* mkUifUV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
tl_assert(isShadowAtom(mce,a1));
tl_assert(isShadowAtom(mce,a2));
return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, a1, a2));
}
static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
switch (vty) {
case Ity_I8: return mkUifU8(mce, a1, a2);
case Ity_I16: return mkUifU16(mce, a1, a2);
case Ity_I32: return mkUifU32(mce, a1, a2);
case Ity_I64: return mkUifU64(mce, a1, a2);
case Ity_I128: return mkUifU128(mce, a1, a2);
case Ity_V128: return mkUifUV128(mce, a1, a2);
case Ity_V256: return mkUifUV256(mce, a1, a2);
default:
VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
VG_(tool_panic)("memcheck:mkUifU");
}
}
/* --------- The Left-family of operations. --------- */
static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
tl_assert(isShadowAtom(mce,a1));
return assignNew('V', mce, Ity_I8, unop(Iop_Left8, a1));
}
static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
tl_assert(isShadowAtom(mce,a1));
return assignNew('V', mce, Ity_I16, unop(Iop_Left16, a1));
}
static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
tl_assert(isShadowAtom(mce,a1));
return assignNew('V', mce, Ity_I32, unop(Iop_Left32, a1));
}
static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) {
tl_assert(isShadowAtom(mce,a1));
return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1));
}
/* --------- 'Improvement' functions for AND/OR. --------- */
/* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give
defined (0); all other -> undefined (1).
*/
static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
{
tl_assert(isOriginalAtom(mce, data));
tl_assert(isShadowAtom(mce, vbits));
tl_assert(sameKindedAtoms(data, vbits));
return assignNew('V', mce, Ity_I8, binop(Iop_Or8, data, vbits));
}
static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
{
tl_assert(isOriginalAtom(mce, data));
tl_assert(isShadowAtom(mce, vbits));
tl_assert(sameKindedAtoms(data, vbits));
return assignNew('V', mce, Ity_I16, binop(Iop_Or16, data, vbits));
}
static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
{
tl_assert(isOriginalAtom(mce, data));
tl_assert(isShadowAtom(mce, vbits));
tl_assert(sameKindedAtoms(data, vbits));
return assignNew('V', mce, Ity_I32, binop(Iop_Or32, data, vbits));
}
static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
{
tl_assert(isOriginalAtom(mce, data));
tl_assert(isShadowAtom(mce, vbits));
tl_assert(sameKindedAtoms(data, vbits));
return assignNew('V', mce, Ity_I64, binop(Iop_Or64, data, vbits));
}
static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
{
tl_assert(isOriginalAtom(mce, data));
tl_assert(isShadowAtom(mce, vbits));
tl_assert(sameKindedAtoms(data, vbits));
return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, data, vbits));
}
static IRAtom* mkImproveANDV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
{
tl_assert(isOriginalAtom(mce, data));
tl_assert(isShadowAtom(mce, vbits));
tl_assert(sameKindedAtoms(data, vbits));
return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, data, vbits));
}
/* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give
defined (0); all other -> undefined (1).
*/
static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
{
tl_assert(isOriginalAtom(mce, data));
tl_assert(isShadowAtom(mce, vbits));
tl_assert(sameKindedAtoms(data, vbits));
return assignNew(
'V', mce, Ity_I8,
binop(Iop_Or8,
assignNew('V', mce, Ity_I8, unop(Iop_Not8, data)),
vbits) );
}
static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
{
tl_assert(isOriginalAtom(mce, data));
tl_assert(isShadowAtom(mce, vbits));
tl_assert(sameKindedAtoms(data, vbits));
return assignNew(
'V', mce, Ity_I16,
binop(Iop_Or16,
assignNew('V', mce, Ity_I16, unop(Iop_Not16, data)),
vbits) );
}
static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
{
tl_assert(isOriginalAtom(mce, data));
tl_assert(isShadowAtom(mce, vbits));
tl_assert(sameKindedAtoms(data, vbits));
return assignNew(
'V', mce, Ity_I32,
binop(Iop_Or32,
assignNew('V', mce, Ity_I32, unop(Iop_Not32, data)),
vbits) );
}
static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
{
tl_assert(isOriginalAtom(mce, data));
tl_assert(isShadowAtom(mce, vbits));
tl_assert(sameKindedAtoms(data, vbits));
return assignNew(
'V', mce, Ity_I64,
binop(Iop_Or64,
assignNew('V', mce, Ity_I64, unop(Iop_Not64, data)),
vbits) );
}
static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
{
tl_assert(isOriginalAtom(mce, data));
tl_assert(isShadowAtom(mce, vbits));
tl_assert(sameKindedAtoms(data, vbits));
return assignNew(
'V', mce, Ity_V128,
binop(Iop_OrV128,
assignNew('V', mce, Ity_V128, unop(Iop_NotV128, data)),
vbits) );
}
static IRAtom* mkImproveORV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
{
tl_assert(isOriginalAtom(mce, data));
tl_assert(isShadowAtom(mce, vbits));
tl_assert(sameKindedAtoms(data, vbits));
return assignNew(
'V', mce, Ity_V256,
binop(Iop_OrV256,
assignNew('V', mce, Ity_V256, unop(Iop_NotV256, data)),
vbits) );
}
/* --------- Pessimising casts. --------- */
/* The function returns an expression of type DST_TY. If any of the VBITS
is undefined (value == 1) the resulting expression has all bits set to
1. Otherwise, all bits are 0. */
static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
{
IRType src_ty;
IRAtom* tmp1;
/* Note, dst_ty is a shadow type, not an original type. */
tl_assert(isShadowAtom(mce,vbits));
src_ty = typeOfIRExpr(mce->sb->tyenv, vbits);
/* Fast-track some common cases */
if (src_ty == Ity_I32 && dst_ty == Ity_I32)
return assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
if (src_ty == Ity_I64 && dst_ty == Ity_I64)
return assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
if (src_ty == Ity_I32 && dst_ty == Ity_I64) {
/* PCast the arg, then clone it. */
IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
}
if (src_ty == Ity_I32 && dst_ty == Ity_V128) {
/* PCast the arg, then clone it 4 times. */
IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
tmp = assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
return assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp, tmp));
}
if (src_ty == Ity_I32 && dst_ty == Ity_V256) {
/* PCast the arg, then clone it 8 times. */
IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
tmp = assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
tmp = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp, tmp));
return assignNew('V', mce, Ity_V256, binop(Iop_V128HLtoV256, tmp, tmp));
}
if (src_ty == Ity_I64 && dst_ty == Ity_I32) {
/* PCast the arg. This gives all 0s or all 1s. Then throw away
the top half. */
IRAtom* tmp = assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
return assignNew('V', mce, Ity_I32, unop(Iop_64to32, tmp));
}
if (src_ty == Ity_V128 && dst_ty == Ity_I64) {
/* Use InterleaveHI64x2 to copy the top half of the vector into
the bottom half. Then we can UifU it with the original, throw
away the upper half of the result, and PCast-I64-to-I64
the lower half. */
// Generates vbits[127:64] : vbits[127:64]
IRAtom* hi64hi64
= assignNew('V', mce, Ity_V128,
binop(Iop_InterleaveHI64x2, vbits, vbits));
// Generates
// UifU(vbits[127:64],vbits[127:64]) : UifU(vbits[127:64],vbits[63:0])
// == vbits[127:64] : UifU(vbits[127:64],vbits[63:0])
IRAtom* lohi64
= mkUifUV128(mce, hi64hi64, vbits);
// Generates UifU(vbits[127:64],vbits[63:0])
IRAtom* lo64
= assignNew('V', mce, Ity_I64, unop(Iop_V128to64, lohi64));
// Generates
// PCast-to-I64( UifU(vbits[127:64], vbits[63:0] )
// == PCast-to-I64( vbits[127:0] )
IRAtom* res
= assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, lo64));
return res;
}
/* Else do it the slow way .. */
/* First of all, collapse vbits down to a single bit. */
tmp1 = NULL;
switch (src_ty) {
case Ity_I1:
tmp1 = vbits;
break;
case Ity_I8:
tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ8, vbits));
break;
case Ity_I16:
tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ16, vbits));
break;
case Ity_I32:
tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ32, vbits));
break;
case Ity_I64:
tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ64, vbits));
break;
case Ity_I128: {
/* Gah. Chop it in half, OR the halves together, and compare
that with zero. */
IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vbits));
IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, vbits));
IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3));
tmp1 = assignNew('V', mce, Ity_I1,
unop(Iop_CmpNEZ64, tmp4));
break;
}
default:
ppIRType(src_ty);
VG_(tool_panic)("mkPCastTo(1)");
}
tl_assert(tmp1);
/* Now widen up to the dst type. */
switch (dst_ty) {
case Ity_I1:
return tmp1;
case Ity_I8:
return assignNew('V', mce, Ity_I8, unop(Iop_1Sto8, tmp1));
case Ity_I16:
return assignNew('V', mce, Ity_I16, unop(Iop_1Sto16, tmp1));
case Ity_I32:
return assignNew('V', mce, Ity_I32, unop(Iop_1Sto32, tmp1));
case Ity_I64:
return assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
case Ity_V128:
tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
return tmp1;
case Ity_I128:
tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
tmp1 = assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1));
return tmp1;
case Ity_V256:
tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128,
tmp1, tmp1));
tmp1 = assignNew('V', mce, Ity_V256, binop(Iop_V128HLtoV256,
tmp1, tmp1));
return tmp1;
default:
ppIRType(dst_ty);
VG_(tool_panic)("mkPCastTo(2)");
}
}
/* This is a minor variant. It takes an arg of some type and returns
a value of the same type. The result consists entirely of Defined
(zero) bits except its least significant bit, which is a PCast of
the entire argument down to a single bit. */
static IRAtom* mkPCastXXtoXXlsb ( MCEnv* mce, IRAtom* varg, IRType ty )
{
if (ty == Ity_V128) {
/* --- Case for V128 --- */
IRAtom* varg128 = varg;
// generates: PCast-to-I64(varg128)
IRAtom* pcdTo64 = mkPCastTo(mce, Ity_I64, varg128);
// Now introduce zeros (defined bits) in the top 63 places
// generates: Def--(63)--Def PCast-to-I1(varg128)
IRAtom* d63pc
= assignNew('V', mce, Ity_I64, binop(Iop_And64, pcdTo64, mkU64(1)));
// generates: Def--(64)--Def
IRAtom* d64
= definedOfType(Ity_I64);
// generates: Def--(127)--Def PCast-to-I1(varg128)
IRAtom* res
= assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, d64, d63pc));
return res;
}
if (ty == Ity_I64) {
/* --- Case for I64 --- */
// PCast to 64
IRAtom* pcd = mkPCastTo(mce, Ity_I64, varg);
// Zero (Def) out the top 63 bits
IRAtom* res
= assignNew('V', mce, Ity_I64, binop(Iop_And64, pcd, mkU64(1)));
return res;
}
/*NOTREACHED*/
tl_assert(0);
}
/* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */
/*
Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and
PCasting to Ity_U1. However, sometimes it is necessary to be more
accurate. The insight is that the result is defined if two
corresponding bits can be found, one from each argument, so that
both bits are defined but are different -- that makes EQ say "No"
and NE say "Yes". Hence, we compute an improvement term and DifD
it onto the "normal" (UifU) result.
The result is:
PCastTo<1> (
-- naive version
PCastTo<sz>( UifU<sz>(vxx, vyy) )
`DifD<sz>`
-- improvement term
PCastTo<sz>( PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) )
)
where
vec contains 0 (defined) bits where the corresponding arg bits
are defined but different, and 1 bits otherwise.
vec = Or<sz>( vxx, // 0 iff bit defined
vyy, // 0 iff bit defined
Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different
)
If any bit of vec is 0, the result is defined and so the
improvement term should produce 0...0, else it should produce
1...1.
Hence require for the improvement term:
if vec == 1...1 then 1...1 else 0...0
->
PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) )
This was extensively re-analysed and checked on 6 July 05.
*/
static IRAtom* expensiveCmpEQorNE ( MCEnv* mce,
IRType ty,
IRAtom* vxx, IRAtom* vyy,
IRAtom* xx, IRAtom* yy )
{
IRAtom *naive, *vec, *improvement_term;
IRAtom *improved, *final_cast, *top;
IROp opDIFD, opUIFU, opXOR, opNOT, opCMP, opOR;
tl_assert(isShadowAtom(mce,vxx));
tl_assert(isShadowAtom(mce,vyy));
tl_assert(isOriginalAtom(mce,xx));
tl_assert(isOriginalAtom(mce,yy));
tl_assert(sameKindedAtoms(vxx,xx));
tl_assert(sameKindedAtoms(vyy,yy));
switch (ty) {
case Ity_I16:
opOR = Iop_Or16;
opDIFD = Iop_And16;
opUIFU = Iop_Or16;
opNOT = Iop_Not16;
opXOR = Iop_Xor16;
opCMP = Iop_CmpEQ16;
top = mkU16(0xFFFF);
break;
case Ity_I32:
opOR = Iop_Or32;
opDIFD = Iop_And32;
opUIFU = Iop_Or32;
opNOT = Iop_Not32;
opXOR = Iop_Xor32;
opCMP = Iop_CmpEQ32;
top = mkU32(0xFFFFFFFF);
break;
case Ity_I64:
opOR = Iop_Or64;
opDIFD = Iop_And64;
opUIFU = Iop_Or64;
opNOT = Iop_Not64;
opXOR = Iop_Xor64;
opCMP = Iop_CmpEQ64;
top = mkU64(0xFFFFFFFFFFFFFFFFULL);
break;
default:
VG_(tool_panic)("expensiveCmpEQorNE");
}
naive
= mkPCastTo(mce,ty,
assignNew('V', mce, ty, binop(opUIFU, vxx, vyy)));
vec
= assignNew(
'V', mce,ty,
binop( opOR,
assignNew('V', mce,ty, binop(opOR, vxx, vyy)),
assignNew(
'V', mce,ty,
unop( opNOT,
assignNew('V', mce,ty, binop(opXOR, xx, yy))))));
improvement_term
= mkPCastTo( mce,ty,
assignNew('V', mce,Ity_I1, binop(opCMP, vec, top)));
improved
= assignNew( 'V', mce,ty, binop(opDIFD, naive, improvement_term) );
final_cast
= mkPCastTo( mce, Ity_I1, improved );
return final_cast;
}
/* --------- Semi-accurate interpretation of CmpORD. --------- */
/* CmpORD32{S,U} does PowerPC-style 3-way comparisons:
CmpORD32S(x,y) = 1<<3 if x <s y
= 1<<2 if x >s y
= 1<<1 if x == y
and similarly the unsigned variant. The default interpretation is:
CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#)
& (7<<1)
The "& (7<<1)" reflects the fact that all result bits except 3,2,1
are zero and therefore defined (viz, zero).
Also deal with a special case better:
CmpORD32S(x,0)
Here, bit 3 (LT) of the result is a copy of the top bit of x and
will be defined even if the rest of x isn't. In which case we do:
CmpORD32S#(x,x#,0,{impliedly 0}#)
= PCast(x#) & (3<<1) -- standard interp for GT#,EQ#
| (x# >>u 31) << 3 -- LT# = x#[31]
Analogous handling for CmpORD64{S,U}.
*/
static Bool isZeroU32 ( IRAtom* e )
{
return
toBool( e->tag == Iex_Const
&& e->Iex.Const.con->tag == Ico_U32
&& e->Iex.Const.con->Ico.U32 == 0 );
}
static Bool isZeroU64 ( IRAtom* e )
{
return
toBool( e->tag == Iex_Const
&& e->Iex.Const.con->tag == Ico_U64
&& e->Iex.Const.con->Ico.U64 == 0 );
}
static IRAtom* doCmpORD ( MCEnv* mce,
IROp cmp_op,
IRAtom* xxhash, IRAtom* yyhash,
IRAtom* xx, IRAtom* yy )
{
Bool m64 = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
Bool syned = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
IROp opOR = m64 ? Iop_Or64 : Iop_Or32;
IROp opAND = m64 ? Iop_And64 : Iop_And32;
IROp opSHL = m64 ? Iop_Shl64 : Iop_Shl32;
IROp opSHR = m64 ? Iop_Shr64 : Iop_Shr32;
IRType ty = m64 ? Ity_I64 : Ity_I32;
Int width = m64 ? 64 : 32;
Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32;
IRAtom* threeLeft1 = NULL;
IRAtom* sevenLeft1 = NULL;
tl_assert(isShadowAtom(mce,xxhash));
tl_assert(isShadowAtom(mce,yyhash));
tl_assert(isOriginalAtom(mce,xx));
tl_assert(isOriginalAtom(mce,yy));
tl_assert(sameKindedAtoms(xxhash,xx));
tl_assert(sameKindedAtoms(yyhash,yy));
tl_assert(cmp_op == Iop_CmpORD32S || cmp_op == Iop_CmpORD32U
|| cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U);
if (0) {
ppIROp(cmp_op); VG_(printf)(" ");
ppIRExpr(xx); VG_(printf)(" "); ppIRExpr( yy ); VG_(printf)("\n");
}
if (syned && isZero(yy)) {
/* fancy interpretation */
/* if yy is zero, then it must be fully defined (zero#). */
tl_assert(isZero(yyhash));
threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1);
return
binop(
opOR,
assignNew(
'V', mce,ty,
binop(
opAND,
mkPCastTo(mce,ty, xxhash),
threeLeft1
)),
assignNew(
'V', mce,ty,
binop(
opSHL,
assignNew(
'V', mce,ty,
binop(opSHR, xxhash, mkU8(width-1))),
mkU8(3)
))
);
} else {
/* standard interpretation */
sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
return
binop(
opAND,
mkPCastTo( mce,ty,
mkUifU(mce,ty, xxhash,yyhash)),
sevenLeft1
);
}
}
/*------------------------------------------------------------*/
/*--- Emit a test and complaint if something is undefined. ---*/
/*------------------------------------------------------------*/
static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ); /* fwds */
/* Set the annotations on a dirty helper to indicate that the stack
pointer and instruction pointers might be read. This is the
behaviour of all 'emit-a-complaint' style functions we might
call. */
static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
di->nFxState = 2;
di->fxState[0].fx = Ifx_Read;
di->fxState[0].offset = mce->layout->offset_SP;
di->fxState[0].size = mce->layout->sizeof_SP;
di->fxState[0].nRepeats = 0;
di->fxState[0].repeatLen = 0;
di->fxState[1].fx = Ifx_Read;
di->fxState[1].offset = mce->layout->offset_IP;
di->fxState[1].size = mce->layout->sizeof_IP;
di->fxState[1].nRepeats = 0;
di->fxState[1].repeatLen = 0;
}
/* Check the supplied *original* |atom| for undefinedness, and emit a
complaint if so. Once that happens, mark it as defined. This is
possible because the atom is either a tmp or literal. If it's a
tmp, it will be shadowed by a tmp, and so we can set the shadow to
be defined. In fact as mentioned above, we will have to allocate a
new tmp to carry the new 'defined' shadow value, and update the
original->tmp mapping accordingly; we cannot simply assign a new
value to an existing shadow tmp as this breaks SSAness.
The checks are performed, any resulting complaint emitted, and
|atom|'s shadow temp set to 'defined', ONLY in the case that
|guard| evaluates to True at run-time. If it evaluates to False
then no action is performed. If |guard| is NULL (the usual case)
then it is assumed to be always-true, and hence these actions are
performed unconditionally.
This routine does not generate code to check the definedness of
|guard|. The caller is assumed to have taken care of that already.
*/
static void complainIfUndefined ( MCEnv* mce, IRAtom* atom, IRExpr *guard )
{
IRAtom* vatom;
IRType ty;
Int sz;
IRDirty* di;
IRAtom* cond;
IRAtom* origin;
void* fn;
const HChar* nm;
IRExpr** args;
Int nargs;
// Don't do V bit tests if we're not reporting undefined value errors.
if (MC_(clo_mc_level) == 1)
return;
if (guard)
tl_assert(isOriginalAtom(mce, guard));
/* Since the original expression is atomic, there's no duplicated
work generated by making multiple V-expressions for it. So we
don't really care about the possibility that someone else may
also create a V-interpretion for it. */
tl_assert(isOriginalAtom(mce, atom));
vatom = expr2vbits( mce, atom );
tl_assert(isShadowAtom(mce, vatom));
tl_assert(sameKindedAtoms(atom, vatom));
ty = typeOfIRExpr(mce->sb->tyenv, vatom);
/* sz is only used for constructing the error message */
sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
cond = mkPCastTo( mce, Ity_I1, vatom );
/* cond will be 0 if all defined, and 1 if any not defined. */
/* Get the origin info for the value we are about to check. At
least, if we are doing origin tracking. If not, use a dummy
zero origin. */
if (MC_(clo_mc_level) == 3) {
origin = schemeE( mce, atom );
if (mce->hWordTy == Ity_I64) {
origin = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, origin) );
}
} else {
origin = NULL;
}
fn = NULL;
nm = NULL;
args = NULL;
nargs = -1;
switch (sz) {
case 0:
if (origin) {
fn = &MC_(helperc_value_check0_fail_w_o);
nm = "MC_(helperc_value_check0_fail_w_o)";
args = mkIRExprVec_1(origin);
nargs = 1;
} else {
fn = &MC_(helperc_value_check0_fail_no_o);
nm = "MC_(helperc_value_check0_fail_no_o)";
args = mkIRExprVec_0();
nargs = 0;
}
break;
case 1:
if (origin) {
fn = &MC_(helperc_value_check1_fail_w_o);
nm = "MC_(helperc_value_check1_fail_w_o)";
args = mkIRExprVec_1(origin);
nargs = 1;
} else {
fn = &MC_(helperc_value_check1_fail_no_o);
nm = "MC_(helperc_value_check1_fail_no_o)";
args = mkIRExprVec_0();
nargs = 0;
}
break;
case 4:
if (origin) {
fn = &MC_(helperc_value_check4_fail_w_o);
nm = "MC_(helperc_value_check4_fail_w_o)";
args = mkIRExprVec_1(origin);
nargs = 1;
} else {
fn = &MC_(helperc_value_check4_fail_no_o);
nm = "MC_(helperc_value_check4_fail_no_o)";
args = mkIRExprVec_0();
nargs = 0;
}
break;
case 8:
if (origin) {
fn = &MC_(helperc_value_check8_fail_w_o);
nm = "MC_(helperc_value_check8_fail_w_o)";
args = mkIRExprVec_1(origin);
nargs = 1;
} else {
fn = &MC_(helperc_value_check8_fail_no_o);
nm = "MC_(helperc_value_check8_fail_no_o)";
args = mkIRExprVec_0();
nargs = 0;
}
break;
case 2:
case 16:
if (origin) {
fn = &MC_(helperc_value_checkN_fail_w_o);
nm = "MC_(helperc_value_checkN_fail_w_o)";
args = mkIRExprVec_2( mkIRExpr_HWord( sz ), origin);
nargs = 2;
} else {
fn = &MC_(helperc_value_checkN_fail_no_o);
nm = "MC_(helperc_value_checkN_fail_no_o)";
args = mkIRExprVec_1( mkIRExpr_HWord( sz ) );
nargs = 1;
}
break;
default:
VG_(tool_panic)("unexpected szB");
}
tl_assert(fn);
tl_assert(nm);
tl_assert(args);
tl_assert(nargs >= 0 && nargs <= 2);
tl_assert( (MC_(clo_mc_level) == 3 && origin != NULL)
|| (MC_(clo_mc_level) == 2 && origin == NULL) );
di = unsafeIRDirty_0_N( nargs/*regparms*/, nm,
VG_(fnptr_to_fnentry)( fn ), args );
di->guard = cond; // and cond is PCast-to-1(atom#)
/* If the complaint is to be issued under a guard condition, AND
that into the guard condition for the helper call. */
if (guard) {
IRAtom *g1 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, di->guard));
IRAtom *g2 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, guard));
IRAtom *e = assignNew('V', mce, Ity_I32, binop(Iop_And32, g1, g2));
di->guard = assignNew('V', mce, Ity_I1, unop(Iop_32to1, e));
}
setHelperAnns( mce, di );
stmt( 'V', mce, IRStmt_Dirty(di));
/* If |atom| is shadowed by an IRTemp, set the shadow tmp to be
defined -- but only in the case where the guard evaluates to
True at run-time. Do the update by setting the orig->shadow
mapping for tmp to reflect the fact that this shadow is getting
a new value. */
tl_assert(isIRAtom(vatom));
/* sameKindedAtoms ... */
if (vatom->tag == Iex_RdTmp) {
tl_assert(atom->tag == Iex_RdTmp);
if (guard == NULL) {
// guard is 'always True', hence update unconditionally
newShadowTmpV(mce, atom->Iex.RdTmp.tmp);
assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp),
definedOfType(ty));
} else {
// update the temp only conditionally. Do this by copying
// its old value when the guard is False.
// The old value ..
IRTemp old_tmpV = findShadowTmpV(mce, atom->Iex.RdTmp.tmp);
newShadowTmpV(mce, atom->Iex.RdTmp.tmp);
IRAtom* new_tmpV
= assignNew('V', mce, shadowTypeV(ty),
IRExpr_ITE(guard, definedOfType(ty),
mkexpr(old_tmpV)));
assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp), new_tmpV);
}
}
}
/*------------------------------------------------------------*/
/*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/
/*------------------------------------------------------------*/
/* Examine the always-defined sections declared in layout to see if
the (offset,size) section is within one. Note, is is an error to
partially fall into such a region: (offset,size) should either be
completely in such a region or completely not-in such a region.
*/
static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
{
Int minoffD, maxoffD, i;
Int minoff = offset;
Int maxoff = minoff + size - 1;
tl_assert((minoff & ~0xFFFF) == 0);
tl_assert((maxoff & ~0xFFFF) == 0);
for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
minoffD = mce->layout->alwaysDefd[i].offset;
maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
tl_assert((minoffD & ~0xFFFF) == 0);
tl_assert((maxoffD & ~0xFFFF) == 0);
if (maxoff < minoffD || maxoffD < minoff)
continue; /* no overlap */
if (minoff >= minoffD && maxoff <= maxoffD)
return True; /* completely contained in an always-defd section */
VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
}
return False; /* could not find any containing section */
}
/* Generate into bb suitable actions to shadow this Put. If the state
slice is marked 'always defined', do nothing. Otherwise, write the
supplied V bits to the shadow state. We can pass in either an
original atom or a V-atom, but not both. In the former case the
relevant V-bits are then generated from the original.
We assume here, that the definedness of GUARD has already been checked.
*/
static
void do_shadow_PUT ( MCEnv* mce, Int offset,
IRAtom* atom, IRAtom* vatom, IRExpr *guard )
{
IRType ty;
// Don't do shadow PUTs if we're not doing undefined value checking.
// Their absence lets Vex's optimiser remove all the shadow computation
// that they depend on, which includes GETs of the shadow registers.
if (MC_(clo_mc_level) == 1)
return;
if (atom) {
tl_assert(!vatom);
tl_assert(isOriginalAtom(mce, atom));
vatom = expr2vbits( mce, atom );
} else {
tl_assert(vatom);
tl_assert(isShadowAtom(mce, vatom));
}
ty = typeOfIRExpr(mce->sb->tyenv, vatom);
tl_assert(ty != Ity_I1);
tl_assert(ty != Ity_I128);
if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
/* later: no ... */
/* emit code to emit a complaint if any of the vbits are 1. */
/* complainIfUndefined(mce, atom); */
} else {
/* Do a plain shadow Put. */
if (guard) {
/* If the guard expression evaluates to false we simply Put the value
that is already stored in the guest state slot */
IRAtom *cond, *iffalse;
cond = assignNew('V', mce, Ity_I1, guard);
iffalse = assignNew('V', mce, ty,
IRExpr_Get(offset + mce->layout->total_sizeB, ty));
vatom = assignNew('V', mce, ty, IRExpr_ITE(cond, vatom, iffalse));
}
stmt( 'V', mce, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ));
}
}
/* Return an expression which contains the V bits corresponding to the
given GETI (passed in in pieces).
*/
static
void do_shadow_PUTI ( MCEnv* mce, IRPutI *puti)
{
IRAtom* vatom;
IRType ty, tyS;
Int arrSize;;
IRRegArray* descr = puti->descr;
IRAtom* ix = puti->ix;
Int bias = puti->bias;
IRAtom* atom = puti->data;
// Don't do shadow PUTIs if we're not doing undefined value checking.
// Their absence lets Vex's optimiser remove all the shadow computation
// that they depend on, which includes GETIs of the shadow registers.
if (MC_(clo_mc_level) == 1)
return;
tl_assert(isOriginalAtom(mce,atom));
vatom = expr2vbits( mce, atom );
tl_assert(sameKindedAtoms(atom, vatom));
ty = descr->elemTy;
tyS = shadowTypeV(ty);
arrSize = descr->nElems * sizeofIRType(ty);
tl_assert(ty != Ity_I1);
tl_assert(isOriginalAtom(mce,ix));
complainIfUndefined(mce, ix, NULL);
if (isAlwaysDefd(mce, descr->base, arrSize)) {
/* later: no ... */
/* emit code to emit a complaint if any of the vbits are 1. */
/* complainIfUndefined(mce, atom); */
} else {
/* Do a cloned version of the Put that refers to the shadow
area. */
IRRegArray* new_descr
= mkIRRegArray( descr->base + mce->layout->total_sizeB,
tyS, descr->nElems);
stmt( 'V', mce, IRStmt_PutI( mkIRPutI(new_descr, ix, bias, vatom) ));
}
}
/* Return an expression which contains the V bits corresponding to the
given GET (passed in in pieces).
*/
static
IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
{
IRType tyS = shadowTypeV(ty);
tl_assert(ty != Ity_I1);
tl_assert(ty != Ity_I128);
if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
/* Always defined, return all zeroes of the relevant type */
return definedOfType(tyS);
} else {
/* return a cloned version of the Get that refers to the shadow
area. */
/* FIXME: this isn't an atom! */
return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
}
}
/* Return an expression which contains the V bits corresponding to the
given GETI (passed in in pieces).
*/
static
IRExpr* shadow_GETI ( MCEnv* mce,
IRRegArray* descr, IRAtom* ix, Int bias )
{
IRType ty = descr->elemTy;
IRType tyS = shadowTypeV(ty);
Int arrSize = descr->nElems * sizeofIRType(ty);
tl_assert(ty != Ity_I1);
tl_assert(isOriginalAtom(mce,ix));
complainIfUndefined(mce, ix, NULL);
if (isAlwaysDefd(mce, descr->base, arrSize)) {
/* Always defined, return all zeroes of the relevant type */
return definedOfType(tyS);
} else {
/* return a cloned version of the Get that refers to the shadow
area. */
IRRegArray* new_descr
= mkIRRegArray( descr->base + mce->layout->total_sizeB,
tyS, descr->nElems);
return IRExpr_GetI( new_descr, ix, bias );
}
}
/*------------------------------------------------------------*/
/*--- Generating approximations for unknown operations, ---*/
/*--- using lazy-propagate semantics ---*/
/*------------------------------------------------------------*/
/* Lazy propagation of undefinedness from two values, resulting in the
specified shadow type.
*/
static
IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
{
IRAtom* at;
IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
tl_assert(isShadowAtom(mce,va1));
tl_assert(isShadowAtom(mce,va2));
/* The general case is inefficient because PCast is an expensive
operation. Here are some special cases which use PCast only
once rather than twice. */
/* I64 x I64 -> I64 */
if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) {
if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n");
at = mkUifU(mce, Ity_I64, va1, va2);
at = mkPCastTo(mce, Ity_I64, at);
return at;
}
/* I64 x I64 -> I32 */
if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) {
if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n");
at = mkUifU(mce, Ity_I64, va1, va2);
at = mkPCastTo(mce, Ity_I32, at);
return at;
}
if (0) {
VG_(printf)("mkLazy2 ");
ppIRType(t1);
VG_(printf)("_");
ppIRType(t2);
VG_(printf)("_");
ppIRType(finalVty);
VG_(printf)("\n");
}
/* General case: force everything via 32-bit intermediaries. */
at = mkPCastTo(mce, Ity_I32, va1);
at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
at = mkPCastTo(mce, finalVty, at);
return at;
}
/* 3-arg version of the above. */
static
IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty,
IRAtom* va1, IRAtom* va2, IRAtom* va3 )
{
IRAtom* at;
IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
tl_assert(isShadowAtom(mce,va1));
tl_assert(isShadowAtom(mce,va2));
tl_assert(isShadowAtom(mce,va3));
/* The general case is inefficient because PCast is an expensive
operation. Here are some special cases which use PCast only
twice rather than three times. */
/* I32 x I64 x I64 -> I64 */
/* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
&& finalVty == Ity_I64) {
if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n");
/* Widen 1st arg to I64. Since 1st arg is typically a rounding
mode indication which is fully defined, this should get
folded out later. */
at = mkPCastTo(mce, Ity_I64, va1);
/* Now fold in 2nd and 3rd args. */
at = mkUifU(mce, Ity_I64, at, va2);
at = mkUifU(mce, Ity_I64, at, va3);
/* and PCast once again. */
at = mkPCastTo(mce, Ity_I64, at);
return at;
}
/* I32 x I8 x I64 -> I64 */
if (t1 == Ity_I32 && t2 == Ity_I8 && t3 == Ity_I64
&& finalVty == Ity_I64) {
if (0) VG_(printf)("mkLazy3: I32 x I8 x I64 -> I64\n");
/* Widen 1st and 2nd args to I64. Since 1st arg is typically a
* rounding mode indication which is fully defined, this should
* get folded out later.
*/
IRAtom* at1 = mkPCastTo(mce, Ity_I64, va1);
IRAtom* at2 = mkPCastTo(mce, Ity_I64, va2);
at = mkUifU(mce, Ity_I64, at1, at2); // UifU(PCast(va1), PCast(va2))
at = mkUifU(mce, Ity_I64, at, va3);
/* and PCast once again. */
at = mkPCastTo(mce, Ity_I64, at);
return at;
}
/* I32 x I64 x I64 -> I32 */
if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
&& finalVty == Ity_I32) {
if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I32\n");
at = mkPCastTo(mce, Ity_I64, va1);
at = mkUifU(mce, Ity_I64, at, va2);
at = mkUifU(mce, Ity_I64, at, va3);
at = mkPCastTo(mce, Ity_I32, at);
return at;
}
/* I32 x I32 x I32 -> I32 */
/* 32-bit FP idiom, as (eg) happens on ARM */
if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32
&& finalVty == Ity_I32) {
if (0) VG_(printf)("mkLazy3: I32 x I32 x I32 -> I32\n");
at = va1;
at = mkUifU(mce, Ity_I32, at, va2);
at = mkUifU(mce, Ity_I32, at, va3);
at = mkPCastTo(mce, Ity_I32, at);
return at;
}
/* I32 x I128 x I128 -> I128 */
/* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
if (t1 == Ity_I32 && t2 == Ity_I128 && t3 == Ity_I128
&& finalVty == Ity_I128) {
if (0) VG_(printf)("mkLazy3: I32 x I128 x I128 -> I128\n");
/* Widen 1st arg to I128. Since 1st arg is typically a rounding
mode indication which is fully defined, this should get
folded out later. */
at = mkPCastTo(mce, Ity_I128, va1);
/* Now fold in 2nd and 3rd args. */
at = mkUifU(mce, Ity_I128, at, va2);
at = mkUifU(mce, Ity_I128, at, va3);
/* and PCast once again. */
at = mkPCastTo(mce, Ity_I128, at);
return at;
}
/* I32 x I8 x I128 -> I128 */
/* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
if (t1 == Ity_I32 && t2 == Ity_I8 && t3 == Ity_I128
&& finalVty == Ity_I128) {
if (0) VG_(printf)("mkLazy3: I32 x I8 x I128 -> I128\n");
/* Use I64 as an intermediate type, which means PCasting all 3
args to I64 to start with. 1st arg is typically a rounding
mode indication which is fully defined, so we hope that it
will get folded out later. */
IRAtom* at1 = mkPCastTo(mce, Ity_I64, va1);
IRAtom* at2 = mkPCastTo(mce, Ity_I64, va2);
IRAtom* at3 = mkPCastTo(mce, Ity_I64, va3);
/* Now UifU all three together. */
at = mkUifU(mce, Ity_I64, at1, at2); // UifU(PCast(va1), PCast(va2))
at = mkUifU(mce, Ity_I64, at, at3); // ... `UifU` PCast(va3)
/* and PCast once again. */
at = mkPCastTo(mce, Ity_I128, at);
return at;
}
if (1) {
VG_(printf)("mkLazy3: ");
ppIRType(t1);
VG_(printf)(" x ");
ppIRType(t2);
VG_(printf)(" x ");
ppIRType(t3);
VG_(printf)(" -> ");
ppIRType(finalVty);
VG_(printf)("\n");
}
tl_assert(0);
/* General case: force everything via 32-bit intermediaries. */
/*
at = mkPCastTo(mce, Ity_I32, va1);
at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3));
at = mkPCastTo(mce, finalVty, at);
return at;
*/
}
/* 4-arg version of the above. */
static
IRAtom* mkLazy4 ( MCEnv* mce, IRType finalVty,
IRAtom* va1, IRAtom* va2, IRAtom* va3, IRAtom* va4 )
{
IRAtom* at;
IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
IRType t4 = typeOfIRExpr(mce->sb->tyenv, va4);
tl_assert(isShadowAtom(mce,va1));
tl_assert(isShadowAtom(mce,va2));
tl_assert(isShadowAtom(mce,va3));
tl_assert(isShadowAtom(mce,va4));
/* The general case is inefficient because PCast is an expensive
operation. Here are some special cases which use PCast only
twice rather than three times. */
/* I32 x I64 x I64 x I64 -> I64 */
/* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 && t4 == Ity_I64
&& finalVty == Ity_I64) {
if (0) VG_(printf)("mkLazy4: I32 x I64 x I64 x I64 -> I64\n");
/* Widen 1st arg to I64. Since 1st arg is typically a rounding
mode indication which is fully defined, this should get
folded out later. */
at = mkPCastTo(mce, Ity_I64, va1);
/* Now fold in 2nd, 3rd, 4th args. */
at = mkUifU(mce, Ity_I64, at, va2);
at = mkUifU(mce, Ity_I64, at, va3);
at = mkUifU(mce, Ity_I64, at, va4);
/* and PCast once again. */
at = mkPCastTo(mce, Ity_I64, at);
return at;
}
/* I32 x I32 x I32 x I32 -> I32 */
/* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32 && t4 == Ity_I32
&& finalVty == Ity_I32) {
if (0) VG_(printf)("mkLazy4: I32 x I32 x I32 x I32 -> I32\n");
at = va1;
/* Now fold in 2nd, 3rd, 4th args. */
at = mkUifU(mce, Ity_I32, at, va2);
at = mkUifU(mce, Ity_I32, at, va3);
at = mkUifU(mce, Ity_I32, at, va4);
at = mkPCastTo(mce, Ity_I32, at);
return at;
}
if (1) {
VG_(printf)("mkLazy4: ");
ppIRType(t1);
VG_(printf)(" x ");
ppIRType(t2);
VG_(printf)(" x ");
ppIRType(t3);
VG_(printf)(" x ");
ppIRType(t4);
VG_(printf)(" -> ");
ppIRType(finalVty);
VG_(printf)("\n");
}
tl_assert(0);
}
/* Do the lazy propagation game from a null-terminated vector of
atoms. This is presumably the arguments to a helper call, so the
IRCallee info is also supplied in order that we can know which
arguments should be ignored (via the .mcx_mask field).
*/
static
IRAtom* mkLazyN ( MCEnv* mce,
IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
{
Int i;
IRAtom* here;
IRAtom* curr;
IRType mergeTy;
Bool mergeTy64 = True;
/* Decide on the type of the merge intermediary. If all relevant
args are I64, then it's I64. In all other circumstances, use
I32. */
for (i = 0; exprvec[i]; i++) {
tl_assert(i < 32);
tl_assert(isOriginalAtom(mce, exprvec[i]));
if (cee->mcx_mask & (1<<i))
continue;
if (typeOfIRExpr(mce->sb->tyenv, exprvec[i]) != Ity_I64)
mergeTy64 = False;
}
mergeTy = mergeTy64 ? Ity_I64 : Ity_I32;
curr = definedOfType(mergeTy);
for (i = 0; exprvec[i]; i++) {
tl_assert(i < 32);
tl_assert(isOriginalAtom(mce, exprvec[i]));
/* Only take notice of this arg if the callee's mc-exclusion
mask does not say it is to be excluded. */
if (cee->mcx_mask & (1<<i)) {
/* the arg is to be excluded from definedness checking. Do
nothing. */
if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
} else {
/* calculate the arg's definedness, and pessimistically merge
it in. */
here = mkPCastTo( mce, mergeTy, expr2vbits(mce, exprvec[i]) );
curr = mergeTy64
? mkUifU64(mce, here, curr)
: mkUifU32(mce, here, curr);
}
}
return mkPCastTo(mce, finalVtype, curr );
}
/*------------------------------------------------------------*/
/*--- Generating expensive sequences for exact carry-chain ---*/
/*--- propagation in add/sub and related operations. ---*/
/*------------------------------------------------------------*/
static
IRAtom* expensiveAddSub ( MCEnv* mce,
Bool add,
IRType ty,
IRAtom* qaa, IRAtom* qbb,
IRAtom* aa, IRAtom* bb )
{
IRAtom *a_min, *b_min, *a_max, *b_max;
IROp opAND, opOR, opXOR, opNOT, opADD, opSUB;
tl_assert(isShadowAtom(mce,qaa));
tl_assert(isShadowAtom(mce,qbb));
tl_assert(isOriginalAtom(mce,aa));
tl_assert(isOriginalAtom(mce,bb));
tl_assert(sameKindedAtoms(qaa,aa));
tl_assert(sameKindedAtoms(qbb,bb));
switch (ty) {
case Ity_I32:
opAND = Iop_And32;
opOR = Iop_Or32;
opXOR = Iop_Xor32;
opNOT = Iop_Not32;
opADD = Iop_Add32;
opSUB = Iop_Sub32;
break;
case Ity_I64:
opAND = Iop_And64;
opOR = Iop_Or64;
opXOR = Iop_Xor64;
opNOT = Iop_Not64;
opADD = Iop_Add64;
opSUB = Iop_Sub64;
break;
default:
VG_(tool_panic)("expensiveAddSub");
}
// a_min = aa & ~qaa
a_min = assignNew('V', mce,ty,
binop(opAND, aa,
assignNew('V', mce,ty, unop(opNOT, qaa))));
// b_min = bb & ~qbb
b_min = assignNew('V', mce,ty,
binop(opAND, bb,
assignNew('V', mce,ty, unop(opNOT, qbb))));
// a_max = aa | qaa
a_max = assignNew('V', mce,ty, binop(opOR, aa, qaa));
// b_max = bb | qbb
b_max = assignNew('V', mce,ty, binop(opOR, bb, qbb));
if (add) {
// result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
return
assignNew('V', mce,ty,
binop( opOR,
assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
assignNew('V', mce,ty,
binop( opXOR,
assignNew('V', mce,ty, binop(opADD, a_min, b_min)),
assignNew('V', mce,ty, binop(opADD, a_max, b_max))
)
)
)
);
} else {
// result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min))
return
assignNew('V', mce,ty,
binop( opOR,
assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
assignNew('V', mce,ty,
binop( opXOR,
assignNew('V', mce,ty, binop(opSUB, a_min, b_max)),
assignNew('V', mce,ty, binop(opSUB, a_max, b_min))
)
)
)
);
}
}
static
IRAtom* expensiveCountTrailingZeroes ( MCEnv* mce, IROp czop,
IRAtom* atom, IRAtom* vatom )
{
IRType ty;
IROp xorOp, subOp, andOp;
IRExpr *one;
IRAtom *improver, *improved;
tl_assert(isShadowAtom(mce,vatom));
tl_assert(isOriginalAtom(mce,atom));
tl_assert(sameKindedAtoms(atom,vatom));
switch (czop) {
case Iop_Ctz32:
ty = Ity_I32;
xorOp = Iop_Xor32;
subOp = Iop_Sub32;
andOp = Iop_And32;
one = mkU32(1);
break;
case Iop_Ctz64:
ty = Ity_I64;
xorOp = Iop_Xor64;
subOp = Iop_Sub64;
andOp = Iop_And64;
one = mkU64(1);
break;
default:
ppIROp(czop);
VG_(tool_panic)("memcheck:expensiveCountTrailingZeroes");
}
// improver = atom ^ (atom - 1)
//
// That is, improver has its low ctz(atom) bits equal to one;
// higher bits (if any) equal to zero.
improver = assignNew('V', mce,ty,
binop(xorOp,
atom,
assignNew('V', mce, ty,
binop(subOp, atom, one))));
// improved = vatom & improver
//
// That is, treat any V bits above the first ctz(atom) bits as
// "defined".
improved = assignNew('V', mce, ty,
binop(andOp, vatom, improver));
// Return pessimizing cast of improved.
return mkPCastTo(mce, ty, improved);
}
/*------------------------------------------------------------*/
/*--- Scalar shifts. ---*/
/*------------------------------------------------------------*/
/* Produce an interpretation for (aa << bb) (or >>s, >>u). The basic
idea is to shift the definedness bits by the original shift amount.
This introduces 0s ("defined") in new positions for left shifts and
unsigned right shifts, and copies the top definedness bit for
signed right shifts. So, conveniently, applying the original shift
operator to the definedness bits for the left arg is exactly the
right thing to do:
(qaa << bb)
However if the shift amount is undefined then the whole result
is undefined. Hence need:
(qaa << bb) `UifU` PCast(qbb)
If the shift amount bb is a literal than qbb will say 'all defined'
and the UifU and PCast will get folded out by post-instrumentation
optimisation.
*/
static IRAtom* scalarShift ( MCEnv* mce,
IRType ty,
IROp original_op,
IRAtom* qaa, IRAtom* qbb,
IRAtom* aa, IRAtom* bb )
{
tl_assert(isShadowAtom(mce,qaa));
tl_assert(isShadowAtom(mce,qbb));
tl_assert(isOriginalAtom(mce,aa));
tl_assert(isOriginalAtom(mce,bb));
tl_assert(sameKindedAtoms(qaa,aa));
tl_assert(sameKindedAtoms(qbb,bb));
return
assignNew(
'V', mce, ty,
mkUifU( mce, ty,
assignNew('V', mce, ty, binop(original_op, qaa, bb)),
mkPCastTo(mce, ty, qbb)
)
);
}
/*------------------------------------------------------------*/
/*--- Helpers for dealing with vector primops. ---*/
/*------------------------------------------------------------*/
/* Vector pessimisation -- pessimise within each lane individually. */
static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
{
return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
}
static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
{
return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
}
static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
{
return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
}
static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
{
return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
}
static IRAtom* mkPCast64x4 ( MCEnv* mce, IRAtom* at )
{
return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ64x4, at));
}
static IRAtom* mkPCast32x8 ( MCEnv* mce, IRAtom* at )
{
return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ32x8, at));
}
static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at )
{
return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ32x2, at));
}
static IRAtom* mkPCast16x16 ( MCEnv* mce, IRAtom* at )
{
return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ16x16, at));
}
static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at )
{
return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ16x4, at));
}
static IRAtom* mkPCast8x32 ( MCEnv* mce, IRAtom* at )
{
return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ8x32, at));
}
static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at )
{
return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ8x8, at));
}
static IRAtom* mkPCast16x2 ( MCEnv* mce, IRAtom* at )
{
return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ16x2, at));
}
static IRAtom* mkPCast8x4 ( MCEnv* mce, IRAtom* at )
{
return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ8x4, at));
}
/* Here's a simple scheme capable of handling ops derived from SSE1
code and while only generating ops that can be efficiently
implemented in SSE1. */
/* All-lanes versions are straightforward:
binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#))
unary32Fx4(x,y) ==> PCast32x4(x#)
Lowest-lane-only versions are more complex:
binary32F0x4(x,y) ==> SetV128lo32(
x#,
PCast32(V128to32(UifUV128(x#,y#)))
)
This is perhaps not so obvious. In particular, it's faster to
do a V128-bit UifU and then take the bottom 32 bits than the more
obvious scheme of taking the bottom 32 bits of each operand
and doing a 32-bit UifU. Basically since UifU is fast and
chopping lanes off vector values is slow.
Finally:
unary32F0x4(x) ==> SetV128lo32(
x#,
PCast32(V128to32(x#))
)
Where:
PCast32(v#) = 1Sto32(CmpNE32(v#,0))
PCast32x4(v#) = CmpNEZ32x4(v#)
*/
static
IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
{
IRAtom* at;
tl_assert(isShadowAtom(mce, vatomX));
tl_assert(isShadowAtom(mce, vatomY));
at = mkUifUV128(mce, vatomX, vatomY);
at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, at));
return at;
}
static
IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
{
IRAtom* at;
tl_assert(isShadowAtom(mce, vatomX));
at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, vatomX));
return at;
}
static
IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
{
IRAtom* at;
tl_assert(isShadowAtom(mce, vatomX));
tl_assert(isShadowAtom(mce, vatomY));
at = mkUifUV128(mce, vatomX, vatomY);
at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, at));
at = mkPCastTo(mce, Ity_I32, at);
at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
return at;
}
static
IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
{
IRAtom* at;
tl_assert(isShadowAtom(mce, vatomX));
at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, vatomX));
at = mkPCastTo(mce, Ity_I32, at);
at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
return at;
}
/* --- ... and ... 64Fx2 versions of the same ... --- */
static
IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
{
IRAtom* at;
tl_assert(isShadowAtom(mce, vatomX));
tl_assert(isShadowAtom(mce, vatomY));
at = mkUifUV128(mce, vatomX, vatomY);
at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, at));
return at;
}
static
IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
{
IRAtom* at;
tl_assert(isShadowAtom(mce, vatomX));
at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, vatomX));
return at;
}
static
IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
{
IRAtom* at;
tl_assert(isShadowAtom(mce, vatomX));
tl_assert(isShadowAtom(mce, vatomY));
at = mkUifUV128(mce, vatomX, vatomY);
at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, at));
at = mkPCastTo(mce, Ity_I64, at);
at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
return at;
}
static
IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
{
IRAtom* at;
tl_assert(isShadowAtom(mce, vatomX));
at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vatomX));
at = mkPCastTo(mce, Ity_I64, at);
at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
return at;
}
/* --- --- ... and ... 32Fx2 versions of the same --- --- */
static
IRAtom* binary32Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
{
IRAtom* at;
tl_assert(isShadowAtom(mce, vatomX));
tl_assert(isShadowAtom(mce, vatomY));
at = mkUifU64(mce, vatomX, vatomY);
at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, at));
return at;
}
static
IRAtom* unary32Fx2 ( MCEnv* mce, IRAtom* vatomX )
{
IRAtom* at;
tl_assert(isShadowAtom(mce, vatomX));
at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, vatomX));
return at;
}
/* --- ... and ... 64Fx4 versions of the same ... --- */
static
IRAtom* binary64Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
{
IRAtom* at;
tl_assert(isShadowAtom(mce, vatomX));
tl_assert(isShadowAtom(mce, vatomY));
at = mkUifUV256(mce, vatomX, vatomY);
at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, at));
return at;
}
static
IRAtom* unary64Fx4 ( MCEnv* mce, IRAtom* vatomX )
{
IRAtom* at;
tl_assert(isShadowAtom(mce, vatomX));
at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, vatomX));
return at;
}
/* --- ... and ... 32Fx8 versions of the same ... --- */
static
IRAtom* binary32Fx8 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
{
IRAtom* at;
tl_assert(isShadowAtom(mce, vatomX));
tl_assert(isShadowAtom(mce, vatomY));
at = mkUifUV256(mce, vatomX, vatomY);
at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, at));
return at;
}
static
IRAtom* unary32Fx8 ( MCEnv* mce, IRAtom* vatomX )
{
IRAtom* at;
tl_assert(isShadowAtom(mce, vatomX));
at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, vatomX));
return at;
}
/* --- 64Fx2 binary FP ops, with rounding mode --- */
static
IRAtom* binary64Fx2_w_rm ( MCEnv* mce, IRAtom* vRM,
IRAtom* vatomX, IRAtom* vatomY )
{
/* This is the same as binary64Fx2, except that we subsequently
pessimise vRM (definedness of the rounding mode), widen to 128
bits and UifU it into the result. As with the scalar cases, if
the RM is a constant then it is defined and so this extra bit
will get constant-folded out later. */
// "do" the vector args
IRAtom* t1 = binary64Fx2(mce, vatomX, vatomY);
// PCast the RM, and widen it to 128 bits
IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM);
// Roll it into the result
t1 = mkUifUV128(mce, t1, t2);
return t1;
}
/* --- ... and ... 32Fx4 versions of the same --- */
static
IRAtom* binary32Fx4_w_rm ( MCEnv* mce, IRAtom* vRM,
IRAtom* vatomX, IRAtom* vatomY )
{
IRAtom* t1 = binary32Fx4(mce, vatomX, vatomY);
// PCast the RM, and widen it to 128 bits
IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM);
// Roll it into the result
t1 = mkUifUV128(mce, t1, t2);
return t1;
}
/* --- ... and ... 64Fx4 versions of the same --- */
static
IRAtom* binary64Fx4_w_rm ( MCEnv* mce, IRAtom* vRM,
IRAtom* vatomX, IRAtom* vatomY )
{
IRAtom* t1 = binary64Fx4(mce, vatomX, vatomY);
// PCast the RM, and widen it to 256 bits
IRAtom* t2 = mkPCastTo(mce, Ity_V256, vRM);
// Roll it into the result
t1 = mkUifUV256(mce, t1, t2);
return t1;
}
/* --- ... and ... 32Fx8 versions of the same --- */
static
IRAtom* binary32Fx8_w_rm ( MCEnv* mce, IRAtom* vRM,
IRAtom* vatomX, IRAtom* vatomY )
{
IRAtom* t1 = binary32Fx8(mce, vatomX, vatomY);
// PCast the RM, and widen it to 256 bits
IRAtom* t2 = mkPCastTo(mce, Ity_V256, vRM);
// Roll it into the result
t1 = mkUifUV256(mce, t1, t2);
return t1;
}
/* --- 64Fx2 unary FP ops, with rounding mode --- */
static
IRAtom* unary64Fx2_w_rm ( MCEnv* mce, IRAtom* vRM, IRAtom* vatomX )
{
/* Same scheme as binary64Fx2_w_rm. */
// "do" the vector arg
IRAtom* t1 = unary64Fx2(mce, vatomX);
// PCast the RM, and widen it to 128 bits
IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM);
// Roll it into the result
t1 = mkUifUV128(mce, t1, t2);
return t1;
}
/* --- ... and ... 32Fx4 versions of the same --- */
static
IRAtom* unary32Fx4_w_rm ( MCEnv* mce, IRAtom* vRM, IRAtom* vatomX )
{
/* Same scheme as unary32Fx4_w_rm. */
IRAtom* t1 = unary32Fx4(mce, vatomX);
// PCast the RM, and widen it to 128 bits
IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM);
// Roll it into the result
t1 = mkUifUV128(mce, t1, t2);
return t1;
}
/* --- --- Vector saturated narrowing --- --- */
/* We used to do something very clever here, but on closer inspection
(2011-Jun-15), and in particular bug #279698, it turns out to be
wrong. Part of the problem came from the fact that for a long
time, the IR primops to do with saturated narrowing were
underspecified and managed to confuse multiple cases which needed
to be separate: the op names had a signedness qualifier, but in
fact the source and destination signednesses needed to be specified
independently, so the op names really need two independent
signedness specifiers.
As of 2011-Jun-15 (ish) the underspecification was sorted out
properly. The incorrect instrumentation remained, though. That
has now (2011-Oct-22) been fixed.
What we now do is simple:
Let the original narrowing op be QNarrowBinXtoYxZ, where Z is a
number of lanes, X is the source lane width and signedness, and Y
is the destination lane width and signedness. In all cases the
destination lane width is half the source lane width, so the names
have a bit of redundancy, but are at least easy to read.
For example, Iop_QNarrowBin32Sto16Ux8 narrows 8 lanes of signed 32s
to unsigned 16s.
Let Vanilla(OP) be a function that takes OP, one of these
saturating narrowing ops, and produces the same "shaped" narrowing
op which is not saturating, but merely dumps the most significant
bits. "same shape" means that the lane numbers and widths are the
same as with OP.
For example, Vanilla(Iop_QNarrowBin32Sto16Ux8)
= Iop_NarrowBin32to16x8,
that is, narrow 8 lanes of 32 bits to 8 lanes of 16 bits, by
dumping the top half of each lane.
So, with that in place, the scheme is simple, and it is simple to
pessimise each lane individually and then apply Vanilla(OP) so as
to get the result in the right "shape". If the original OP is
QNarrowBinXtoYxZ then we produce
Vanilla(OP)( PCast-X-to-X-x-Z(vatom1), PCast-X-to-X-x-Z(vatom2) )
or for the case when OP is unary (Iop_QNarrowUn*)
Vanilla(OP)( PCast-X-to-X-x-Z(vatom) )
*/
static
IROp vanillaNarrowingOpOfShape ( IROp qnarrowOp )
{
switch (qnarrowOp) {
/* Binary: (128, 128) -> 128 */
case Iop_QNarrowBin16Sto8Ux16:
case Iop_QNarrowBin16Sto8Sx16:
case Iop_QNarrowBin16Uto8Ux16:
case Iop_QNarrowBin64Sto32Sx4:
case Iop_QNarrowBin64Uto32Ux4:
return Iop_NarrowBin16to8x16;
case Iop_QNarrowBin32Sto16Ux8:
case Iop_QNarrowBin32Sto16Sx8:
case Iop_QNarrowBin32Uto16Ux8:
return Iop_NarrowBin32to16x8;
/* Binary: (64, 64) -> 64 */
case Iop_QNarrowBin32Sto16Sx4:
return Iop_NarrowBin32to16x4;
case Iop_QNarrowBin16Sto8Ux8:
case Iop_QNarrowBin16Sto8Sx8:
return Iop_NarrowBin16to8x8;
/* Unary: 128 -> 64 */
case Iop_QNarrowUn64Uto32Ux2:
case Iop_QNarrowUn64Sto32Sx2:
case Iop_QNarrowUn64Sto32Ux2:
return Iop_NarrowUn64to32x2;
case Iop_QNarrowUn32Uto16Ux4:
case Iop_QNarrowUn32Sto16Sx4:
case Iop_QNarrowUn32Sto16Ux4:
return Iop_NarrowUn32to16x4;
case Iop_QNarrowUn16Uto8Ux8:
case Iop_QNarrowUn16Sto8Sx8:
case Iop_QNarrowUn16Sto8Ux8:
return Iop_NarrowUn16to8x8;
default:
ppIROp(qnarrowOp);
VG_(tool_panic)("vanillaNarrowOpOfShape");
}
}
static
IRAtom* vectorNarrowBinV128 ( MCEnv* mce, IROp narrow_op,
IRAtom* vatom1, IRAtom* vatom2)
{
IRAtom *at1, *at2, *at3;
IRAtom* (*pcast)( MCEnv*, IRAtom* );
switch (narrow_op) {
case Iop_QNarrowBin64Sto32Sx4: pcast = mkPCast32x4; break;
case Iop_QNarrowBin64Uto32Ux4: pcast = mkPCast32x4; break;
case Iop_QNarrowBin32Sto16Sx8: pcast = mkPCast32x4; break;
case Iop_QNarrowBin32Uto16Ux8: pcast = mkPCast32x4; break;
case Iop_QNarrowBin32Sto16Ux8: pcast = mkPCast32x4; break;
case Iop_QNarrowBin16Sto8Sx16: pcast = mkPCast16x8; break;
case Iop_QNarrowBin16Uto8Ux16: pcast = mkPCast16x8; break;
case Iop_QNarrowBin16Sto8Ux16: pcast = mkPCast16x8; break;
default: VG_(tool_panic)("vectorNarrowBinV128");
}
IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
tl_assert(isShadowAtom(mce,vatom1));
tl_assert(isShadowAtom(mce,vatom2));
at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
at2 = assignNew('V', mce, Ity_V128, pcast(mce, vatom2));
at3 = assignNew('V', mce, Ity_V128, binop(vanilla_narrow, at1, at2));
return at3;
}
static
IRAtom* vectorNarrowBin64 ( MCEnv* mce, IROp narrow_op,
IRAtom* vatom1, IRAtom* vatom2)
{
IRAtom *at1, *at2, *at3;
IRAtom* (*pcast)( MCEnv*, IRAtom* );
switch (narrow_op) {
case Iop_QNarrowBin32Sto16Sx4: pcast = mkPCast32x2; break;
case Iop_QNarrowBin16Sto8Sx8: pcast = mkPCast16x4; break;
case Iop_QNarrowBin16Sto8Ux8: pcast = mkPCast16x4; break;
default: VG_(tool_panic)("vectorNarrowBin64");
}
IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
tl_assert(isShadowAtom(mce,vatom1));
tl_assert(isShadowAtom(mce,vatom2));
at1 = assignNew('V', mce, Ity_I64, pcast(mce, vatom1));
at2 = assignNew('V', mce, Ity_I64, pcast(mce, vatom2));
at3 = assignNew('V', mce, Ity_I64, binop(vanilla_narrow, at1, at2));
return at3;
}
static
IRAtom* vectorNarrowUnV128 ( MCEnv* mce, IROp narrow_op,
IRAtom* vatom1)
{
IRAtom *at1, *at2;
IRAtom* (*pcast)( MCEnv*, IRAtom* );
tl_assert(isShadowAtom(mce,vatom1));
/* For vanilla narrowing (non-saturating), we can just apply
the op directly to the V bits. */
switch (narrow_op) {
case Iop_NarrowUn16to8x8:
case Iop_NarrowUn32to16x4:
case Iop_NarrowUn64to32x2:
at1 = assignNew('V', mce, Ity_I64, unop(narrow_op, vatom1));
return at1;
default:
break; /* Do Plan B */
}
/* Plan B: for ops that involve a saturation operation on the args,
we must PCast before the vanilla narrow. */
switch (narrow_op) {
case Iop_QNarrowUn16Sto8Sx8: pcast = mkPCast16x8; break;
case Iop_QNarrowUn16Sto8Ux8: pcast = mkPCast16x8; break;
case Iop_QNarrowUn16Uto8Ux8: pcast = mkPCast16x8; break;
case Iop_QNarrowUn32Sto16Sx4: pcast = mkPCast32x4; break;
case Iop_QNarrowUn32Sto16Ux4: pcast = mkPCast32x4; break;
case Iop_QNarrowUn32Uto16Ux4: pcast = mkPCast32x4; break;
case Iop_QNarrowUn64Sto32Sx2: pcast = mkPCast64x2; break;
case Iop_QNarrowUn64Sto32Ux2: pcast = mkPCast64x2; break;
case Iop_QNarrowUn64Uto32Ux2: pcast = mkPCast64x2; break;
default: VG_(tool_panic)("vectorNarrowUnV128");
}
IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
at2 = assignNew('V', mce, Ity_I64, unop(vanilla_narrow, at1));
return at2;
}
static
IRAtom* vectorWidenI64 ( MCEnv* mce, IROp longen_op,
IRAtom* vatom1)
{
IRAtom *at1, *at2;
IRAtom* (*pcast)( MCEnv*, IRAtom* );
switch (longen_op) {
case Iop_Widen8Uto16x8: pcast = mkPCast16x8; break;
case Iop_Widen8Sto16x8: pcast = mkPCast16x8; break;
case Iop_Widen16Uto32x4: pcast = mkPCast32x4; break;
case Iop_Widen16Sto32x4: pcast = mkPCast32x4; break;
case Iop_Widen32Uto64x2: pcast = mkPCast64x2; break;
case Iop_Widen32Sto64x2: pcast = mkPCast64x2; break;
default: VG_(tool_panic)("vectorWidenI64");
}
tl_assert(isShadowAtom(mce,vatom1));
at1 = assignNew('V', mce, Ity_V128, unop(longen_op, vatom1));
at2 = assignNew('V', mce, Ity_V128, pcast(mce, at1));
return at2;
}
/* --- --- Vector integer arithmetic --- --- */
/* Simple ... UifU the args and per-lane pessimise the results. */
/* --- V256-bit versions --- */
static
IRAtom* binary8Ix32 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
{
IRAtom* at;
at = mkUifUV256(mce, vatom1, vatom2);
at = mkPCast8x32(mce, at);
return at;