blob: d132efca272631552d335023a365533cf3d9b5e2 [file] [log] [blame]
/* -*- mode: C; c-basic-offset: 3; -*- */
/*---------------------------------------------------------------*/
/*--- begin ir_opt.c ---*/
/*---------------------------------------------------------------*/
/*
This file is part of Valgrind, a dynamic binary instrumentation
framework.
Copyright (C) 2004-2013 OpenWorks LLP
info@open-works.net
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA.
The GNU General Public License is contained in the file COPYING.
Neither the names of the U.S. Department of Energy nor the
University of California nor the names of its contributors may be
used to endorse or promote products derived from this software
without prior written permission.
*/
#include "libvex_basictypes.h"
#include "libvex_ir.h"
#include "libvex.h"
#include "main_util.h"
#include "main_globals.h"
#include "ir_opt.h"
/* Set to 1 for lots of debugging output. */
#define DEBUG_IROPT 0
/* Set to 1 to gather some statistics. Currently only for sameIRExprs. */
#define STATS_IROPT 0
/* What iropt does, 29 Dec 04.
It takes an IRSB and produces a new one with the same meaning,
defined thus:
After execution of the new BB, all guest state and guest memory is
the same as after execution of the original. This is true
regardless of how the block was exited (at the end vs side exit).
In addition, parts of the guest state will be identical to that
created by execution of the original at the following observation
points:
* In a dirty helper call, any parts of the guest state that the
helper states that it reads or modifies will be up to date.
Also, guest memory will be up to date. Parts of the guest state
not marked as being read or modified by the helper cannot be
assumed to be up-to-date at the point where the helper is called.
* If iropt_register_updates == VexRegUpdSpAtMemAccess :
The guest state is only up to date only as explained above
(i.e. at SB exits and as specified by dirty helper call).
Also, the stack pointer register is up to date at memory
exception points (as this is needed for the stack extension
logic in m_signals.c).
* If iropt_register_updates == VexRegUpdUnwindregsAtMemAccess :
Immediately prior to any load or store, those parts of the guest
state marked as requiring precise exceptions will be up to date.
Also, guest memory will be up to date. Parts of the guest state
not marked as requiring precise exceptions cannot be assumed to
be up-to-date at the point of the load/store.
* If iropt_register_updates == VexRegUpdAllregsAtMemAccess:
Same as minimal, but all the guest state is up to date at memory
exception points.
* If iropt_register_updates == VexRegUpdAllregsAtEachInsn :
Guest state is up to date at each instruction.
The relative order of loads and stores (including loads/stores of
guest memory done by dirty helpers annotated as such) is not
changed. However, the relative order of loads with no intervening
stores/modifies may be changed.
Transformation order
~~~~~~~~~~~~~~~~~~~~
There are three levels of optimisation, controlled by
vex_control.iropt_level. Define first:
"Cheap transformations" are the following sequence:
* Redundant-Get removal
* Redundant-Put removal
* Constant propagation/folding
* Dead code removal
* Specialisation of clean helper functions
* Dead code removal
"Expensive transformations" are the following sequence:
* CSE
* Folding of add/sub chains
* Redundant-GetI removal
* Redundant-PutI removal
* Dead code removal
Then the transformations are as follows, as defined by
vex_control.iropt_level:
Level 0:
* Flatten into atomic form.
Level 1: the following sequence:
* Flatten into atomic form.
* Cheap transformations.
Level 2: the following sequence
* Flatten into atomic form.
* Cheap transformations.
* If block contains any floating or vector types, CSE.
* If block contains GetI or PutI, Expensive transformations.
* Try unrolling loops. Three possible outcomes:
- No effect: do nothing more.
- Unrolled a loop, and block does not contain GetI or PutI:
Do: * CSE
* Dead code removal
- Unrolled a loop, and block contains GetI or PutI:
Do: * Expensive transformations
* Cheap transformations
*/
/* Implementation notes, 29 Dec 04.
TODO (important): I think rPutI removal ignores precise exceptions
and is therefore in a sense, wrong. In the sense that PutIs are
assumed not to write parts of the guest state that we need to have
up-to-date at loads/stores. So far on x86 guest that has not
mattered since indeed only the x87 FP registers and tags are
accessed using GetI/PutI, and there is no need so far for them to
be up to date at mem exception points. The rPutI pass should be
fixed.
TODO: improve pessimistic handling of precise exceptions
in the tree builder.
TODO: check interaction of rGetI and dirty helpers.
F64i constants are treated differently from other constants.
They are not regarded as atoms, and instead lifted off and
bound to temps. This allows them to participate in CSE, which
is important for getting good performance for x86 guest code.
CSE up F64 literals (already doing F64is)
CSE: consider carefully the requirement for precise exns
prior to making CSE any more aggressive. */
/*---------------------------------------------------------------*/
/*--- Finite mappery, of a sort ---*/
/*---------------------------------------------------------------*/
/* General map from HWord-sized thing HWord-sized thing. Could be by
hashing, but it's not clear whether or not this would really be any
faster. */
typedef
struct {
Bool* inuse;
HWord* key;
HWord* val;
Int size;
Int used;
}
HashHW;
static HashHW* newHHW ( void )
{
HashHW* h = LibVEX_Alloc(sizeof(HashHW));
h->size = 8;
h->used = 0;
h->inuse = LibVEX_Alloc(h->size * sizeof(Bool));
h->key = LibVEX_Alloc(h->size * sizeof(HWord));
h->val = LibVEX_Alloc(h->size * sizeof(HWord));
return h;
}
/* Look up key in the map. */
static Bool lookupHHW ( HashHW* h, /*OUT*/HWord* val, HWord key )
{
Int i;
/* vex_printf("lookupHHW(%llx)\n", key ); */
for (i = 0; i < h->used; i++) {
if (h->inuse[i] && h->key[i] == key) {
if (val)
*val = h->val[i];
return True;
}
}
return False;
}
/* Add key->val to the map. Replaces any existing binding for key. */
static void addToHHW ( HashHW* h, HWord key, HWord val )
{
Int i, j;
/* vex_printf("addToHHW(%llx, %llx)\n", key, val); */
/* Find and replace existing binding, if any. */
for (i = 0; i < h->used; i++) {
if (h->inuse[i] && h->key[i] == key) {
h->val[i] = val;
return;
}
}
/* Ensure a space is available. */
if (h->used == h->size) {
/* Copy into arrays twice the size. */
Bool* inuse2 = LibVEX_Alloc(2 * h->size * sizeof(Bool));
HWord* key2 = LibVEX_Alloc(2 * h->size * sizeof(HWord));
HWord* val2 = LibVEX_Alloc(2 * h->size * sizeof(HWord));
for (i = j = 0; i < h->size; i++) {
if (!h->inuse[i]) continue;
inuse2[j] = True;
key2[j] = h->key[i];
val2[j] = h->val[i];
j++;
}
h->used = j;
h->size *= 2;
h->inuse = inuse2;
h->key = key2;
h->val = val2;
}
/* Finally, add it. */
vassert(h->used < h->size);
h->inuse[h->used] = True;
h->key[h->used] = key;
h->val[h->used] = val;
h->used++;
}
/*---------------------------------------------------------------*/
/*--- Flattening out a BB into atomic SSA form ---*/
/*---------------------------------------------------------------*/
/* Non-critical helper, heuristic for reducing the number of tmp-tmp
copies made by flattening. If in doubt return False. */
static Bool isFlat ( IRExpr* e )
{
if (e->tag == Iex_Get)
return True;
if (e->tag == Iex_Binop)
return toBool( isIRAtom(e->Iex.Binop.arg1)
&& isIRAtom(e->Iex.Binop.arg2) );
if (e->tag == Iex_Load)
return isIRAtom(e->Iex.Load.addr);
return False;
}
/* Flatten out 'ex' so it is atomic, returning a new expression with
the same value, after having appended extra IRTemp assignments to
the end of 'bb'. */
static IRExpr* flatten_Expr ( IRSB* bb, IRExpr* ex )
{
Int i;
IRExpr** newargs;
IRType ty = typeOfIRExpr(bb->tyenv, ex);
IRTemp t1;
switch (ex->tag) {
case Iex_GetI:
t1 = newIRTemp(bb->tyenv, ty);
addStmtToIRSB(bb, IRStmt_WrTmp(t1,
IRExpr_GetI(ex->Iex.GetI.descr,
flatten_Expr(bb, ex->Iex.GetI.ix),
ex->Iex.GetI.bias)));
return IRExpr_RdTmp(t1);
case Iex_Get:
t1 = newIRTemp(bb->tyenv, ty);
addStmtToIRSB(bb,
IRStmt_WrTmp(t1, ex));
return IRExpr_RdTmp(t1);
case Iex_Qop: {
IRQop* qop = ex->Iex.Qop.details;
t1 = newIRTemp(bb->tyenv, ty);
addStmtToIRSB(bb, IRStmt_WrTmp(t1,
IRExpr_Qop(qop->op,
flatten_Expr(bb, qop->arg1),
flatten_Expr(bb, qop->arg2),
flatten_Expr(bb, qop->arg3),
flatten_Expr(bb, qop->arg4))));
return IRExpr_RdTmp(t1);
}
case Iex_Triop: {
IRTriop* triop = ex->Iex.Triop.details;
t1 = newIRTemp(bb->tyenv, ty);
addStmtToIRSB(bb, IRStmt_WrTmp(t1,
IRExpr_Triop(triop->op,
flatten_Expr(bb, triop->arg1),
flatten_Expr(bb, triop->arg2),
flatten_Expr(bb, triop->arg3))));
return IRExpr_RdTmp(t1);
}
case Iex_Binop:
t1 = newIRTemp(bb->tyenv, ty);
addStmtToIRSB(bb, IRStmt_WrTmp(t1,
IRExpr_Binop(ex->Iex.Binop.op,
flatten_Expr(bb, ex->Iex.Binop.arg1),
flatten_Expr(bb, ex->Iex.Binop.arg2))));
return IRExpr_RdTmp(t1);
case Iex_Unop:
t1 = newIRTemp(bb->tyenv, ty);
addStmtToIRSB(bb, IRStmt_WrTmp(t1,
IRExpr_Unop(ex->Iex.Unop.op,
flatten_Expr(bb, ex->Iex.Unop.arg))));
return IRExpr_RdTmp(t1);
case Iex_Load:
t1 = newIRTemp(bb->tyenv, ty);
addStmtToIRSB(bb, IRStmt_WrTmp(t1,
IRExpr_Load(ex->Iex.Load.end,
ex->Iex.Load.ty,
flatten_Expr(bb, ex->Iex.Load.addr))));
return IRExpr_RdTmp(t1);
case Iex_CCall:
newargs = shallowCopyIRExprVec(ex->Iex.CCall.args);
for (i = 0; newargs[i]; i++)
newargs[i] = flatten_Expr(bb, newargs[i]);
t1 = newIRTemp(bb->tyenv, ty);
addStmtToIRSB(bb, IRStmt_WrTmp(t1,
IRExpr_CCall(ex->Iex.CCall.cee,
ex->Iex.CCall.retty,
newargs)));
return IRExpr_RdTmp(t1);
case Iex_ITE:
t1 = newIRTemp(bb->tyenv, ty);
addStmtToIRSB(bb, IRStmt_WrTmp(t1,
IRExpr_ITE(flatten_Expr(bb, ex->Iex.ITE.cond),
flatten_Expr(bb, ex->Iex.ITE.iftrue),
flatten_Expr(bb, ex->Iex.ITE.iffalse))));
return IRExpr_RdTmp(t1);
case Iex_Const:
/* Lift F64i constants out onto temps so they can be CSEd
later. */
if (ex->Iex.Const.con->tag == Ico_F64i) {
t1 = newIRTemp(bb->tyenv, ty);
addStmtToIRSB(bb, IRStmt_WrTmp(t1,
IRExpr_Const(ex->Iex.Const.con)));
return IRExpr_RdTmp(t1);
} else {
/* Leave all other constants alone. */
return ex;
}
case Iex_RdTmp:
return ex;
default:
vex_printf("\n");
ppIRExpr(ex);
vex_printf("\n");
vpanic("flatten_Expr");
}
}
/* Append a completely flattened form of 'st' to the end of 'bb'. */
static void flatten_Stmt ( IRSB* bb, IRStmt* st )
{
Int i;
IRExpr *e1, *e2, *e3, *e4, *e5;
IRDirty *d, *d2;
IRCAS *cas, *cas2;
IRPutI *puti, *puti2;
IRLoadG *lg;
IRStoreG *sg;
switch (st->tag) {
case Ist_Put:
if (isIRAtom(st->Ist.Put.data)) {
/* optimisation to reduce the amount of heap wasted
by the flattener */
addStmtToIRSB(bb, st);
} else {
/* general case, always correct */
e1 = flatten_Expr(bb, st->Ist.Put.data);
addStmtToIRSB(bb, IRStmt_Put(st->Ist.Put.offset, e1));
}
break;
case Ist_PutI:
puti = st->Ist.PutI.details;
e1 = flatten_Expr(bb, puti->ix);
e2 = flatten_Expr(bb, puti->data);
puti2 = mkIRPutI(puti->descr, e1, puti->bias, e2);
addStmtToIRSB(bb, IRStmt_PutI(puti2));
break;
case Ist_WrTmp:
if (isFlat(st->Ist.WrTmp.data)) {
/* optimisation, to reduce the number of tmp-tmp
copies generated */
addStmtToIRSB(bb, st);
} else {
/* general case, always correct */
e1 = flatten_Expr(bb, st->Ist.WrTmp.data);
addStmtToIRSB(bb, IRStmt_WrTmp(st->Ist.WrTmp.tmp, e1));
}
break;
case Ist_Store:
e1 = flatten_Expr(bb, st->Ist.Store.addr);
e2 = flatten_Expr(bb, st->Ist.Store.data);
addStmtToIRSB(bb, IRStmt_Store(st->Ist.Store.end, e1,e2));
break;
case Ist_StoreG:
sg = st->Ist.StoreG.details;
e1 = flatten_Expr(bb, sg->addr);
e2 = flatten_Expr(bb, sg->data);
e3 = flatten_Expr(bb, sg->guard);
addStmtToIRSB(bb, IRStmt_StoreG(sg->end, e1, e2, e3));
break;
case Ist_LoadG:
lg = st->Ist.LoadG.details;
e1 = flatten_Expr(bb, lg->addr);
e2 = flatten_Expr(bb, lg->alt);
e3 = flatten_Expr(bb, lg->guard);
addStmtToIRSB(bb, IRStmt_LoadG(lg->end, lg->cvt, lg->dst,
e1, e2, e3));
break;
case Ist_CAS:
cas = st->Ist.CAS.details;
e1 = flatten_Expr(bb, cas->addr);
e2 = cas->expdHi ? flatten_Expr(bb, cas->expdHi) : NULL;
e3 = flatten_Expr(bb, cas->expdLo);
e4 = cas->dataHi ? flatten_Expr(bb, cas->dataHi) : NULL;
e5 = flatten_Expr(bb, cas->dataLo);
cas2 = mkIRCAS( cas->oldHi, cas->oldLo, cas->end,
e1, e2, e3, e4, e5 );
addStmtToIRSB(bb, IRStmt_CAS(cas2));
break;
case Ist_LLSC:
e1 = flatten_Expr(bb, st->Ist.LLSC.addr);
e2 = st->Ist.LLSC.storedata
? flatten_Expr(bb, st->Ist.LLSC.storedata)
: NULL;
addStmtToIRSB(bb, IRStmt_LLSC(st->Ist.LLSC.end,
st->Ist.LLSC.result, e1, e2));
break;
case Ist_Dirty:
d = st->Ist.Dirty.details;
d2 = emptyIRDirty();
*d2 = *d;
d2->args = shallowCopyIRExprVec(d2->args);
if (d2->mFx != Ifx_None) {
d2->mAddr = flatten_Expr(bb, d2->mAddr);
} else {
vassert(d2->mAddr == NULL);
}
d2->guard = flatten_Expr(bb, d2->guard);
for (i = 0; d2->args[i]; i++) {
IRExpr* arg = d2->args[i];
if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
d2->args[i] = flatten_Expr(bb, arg);
}
addStmtToIRSB(bb, IRStmt_Dirty(d2));
break;
case Ist_NoOp:
case Ist_MBE:
case Ist_IMark:
addStmtToIRSB(bb, st);
break;
case Ist_AbiHint:
e1 = flatten_Expr(bb, st->Ist.AbiHint.base);
e2 = flatten_Expr(bb, st->Ist.AbiHint.nia);
addStmtToIRSB(bb, IRStmt_AbiHint(e1, st->Ist.AbiHint.len, e2));
break;
case Ist_Exit:
e1 = flatten_Expr(bb, st->Ist.Exit.guard);
addStmtToIRSB(bb, IRStmt_Exit(e1, st->Ist.Exit.jk,
st->Ist.Exit.dst,
st->Ist.Exit.offsIP));
break;
default:
vex_printf("\n");
ppIRStmt(st);
vex_printf("\n");
vpanic("flatten_Stmt");
}
}
static IRSB* flatten_BB ( IRSB* in )
{
Int i;
IRSB* out;
out = emptyIRSB();
out->tyenv = deepCopyIRTypeEnv( in->tyenv );
for (i = 0; i < in->stmts_used; i++)
if (in->stmts[i])
flatten_Stmt( out, in->stmts[i] );
out->next = flatten_Expr( out, in->next );
out->jumpkind = in->jumpkind;
out->offsIP = in->offsIP;
return out;
}
/*---------------------------------------------------------------*/
/*--- In-place removal of redundant GETs ---*/
/*---------------------------------------------------------------*/
/* Scan forwards, building up an environment binding (min offset, max
offset) pairs to values, which will either be temps or constants.
On seeing 't = Get(minoff,maxoff)', look up (minoff,maxoff) in the
env and if it matches, replace the Get with the stored value. If
there is no match, add a (minoff,maxoff) :-> t binding.
On seeing 'Put (minoff,maxoff) = t or c', first remove in the env
any binding which fully or partially overlaps with (minoff,maxoff).
Then add a new (minoff,maxoff) :-> t or c binding. */
/* Extract the min/max offsets from a guest state array descriptor. */
inline
static void getArrayBounds ( IRRegArray* descr,
UInt* minoff, UInt* maxoff )
{
*minoff = descr->base;
*maxoff = *minoff + descr->nElems*sizeofIRType(descr->elemTy) - 1;
vassert((*minoff & ~0xFFFF) == 0);
vassert((*maxoff & ~0xFFFF) == 0);
vassert(*minoff <= *maxoff);
}
/* Create keys, of the form ((minoffset << 16) | maxoffset). */
static UInt mk_key_GetPut ( Int offset, IRType ty )
{
/* offset should fit in 16 bits. */
UInt minoff = offset;
UInt maxoff = minoff + sizeofIRType(ty) - 1;
vassert((minoff & ~0xFFFF) == 0);
vassert((maxoff & ~0xFFFF) == 0);
return (minoff << 16) | maxoff;
}
static UInt mk_key_GetIPutI ( IRRegArray* descr )
{
UInt minoff, maxoff;
getArrayBounds( descr, &minoff, &maxoff );
vassert((minoff & ~0xFFFF) == 0);
vassert((maxoff & ~0xFFFF) == 0);
return (minoff << 16) | maxoff;
}
/* Supposing h has keys of the form generated by mk_key_GetPut and
mk_key_GetIPutI, invalidate any key which overlaps (k_lo
.. k_hi).
*/
static void invalidateOverlaps ( HashHW* h, UInt k_lo, UInt k_hi )
{
Int j;
UInt e_lo, e_hi;
vassert(k_lo <= k_hi);
/* invalidate any env entries which in any way overlap (k_lo
.. k_hi) */
/* vex_printf("invalidate %d .. %d\n", k_lo, k_hi ); */
for (j = 0; j < h->used; j++) {
if (!h->inuse[j])
continue;
e_lo = (((UInt)h->key[j]) >> 16) & 0xFFFF;
e_hi = ((UInt)h->key[j]) & 0xFFFF;
vassert(e_lo <= e_hi);
if (e_hi < k_lo || k_hi < e_lo)
continue; /* no overlap possible */
else
/* overlap; invalidate */
h->inuse[j] = False;
}
}
static void redundant_get_removal_BB ( IRSB* bb )
{
HashHW* env = newHHW();
UInt key = 0; /* keep gcc -O happy */
Int i, j;
HWord val;
for (i = 0; i < bb->stmts_used; i++) {
IRStmt* st = bb->stmts[i];
if (st->tag == Ist_NoOp)
continue;
/* Deal with Gets */
if (st->tag == Ist_WrTmp
&& st->Ist.WrTmp.data->tag == Iex_Get) {
/* st is 't = Get(...)'. Look up in the environment and see
if the Get can be replaced. */
IRExpr* get = st->Ist.WrTmp.data;
key = (HWord)mk_key_GetPut( get->Iex.Get.offset,
get->Iex.Get.ty );
if (lookupHHW(env, &val, (HWord)key)) {
/* found it */
/* Note, we could do better here. If the types are
different we don't do the substitution, since doing so
could lead to invalidly-typed IR. An improvement would
be to stick in a reinterpret-style cast, although that
would make maintaining flatness more difficult. */
IRExpr* valE = (IRExpr*)val;
Bool typesOK = toBool( typeOfIRExpr(bb->tyenv,valE)
== st->Ist.WrTmp.data->Iex.Get.ty );
if (typesOK && DEBUG_IROPT) {
vex_printf("rGET: "); ppIRExpr(get);
vex_printf(" -> "); ppIRExpr(valE);
vex_printf("\n");
}
if (typesOK)
bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, valE);
} else {
/* Not found, but at least we know that t and the Get(...)
are now associated. So add a binding to reflect that
fact. */
addToHHW( env, (HWord)key,
(HWord)(void*)(IRExpr_RdTmp(st->Ist.WrTmp.tmp)) );
}
}
/* Deal with Puts: invalidate any env entries overlapped by this
Put */
if (st->tag == Ist_Put || st->tag == Ist_PutI) {
UInt k_lo, k_hi;
if (st->tag == Ist_Put) {
key = mk_key_GetPut( st->Ist.Put.offset,
typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
} else {
vassert(st->tag == Ist_PutI);
key = mk_key_GetIPutI( st->Ist.PutI.details->descr );
}
k_lo = (key >> 16) & 0xFFFF;
k_hi = key & 0xFFFF;
invalidateOverlaps(env, k_lo, k_hi);
}
else
if (st->tag == Ist_Dirty) {
/* Deal with dirty helpers which write or modify guest state.
Invalidate the entire env. We could do a lot better
here. */
IRDirty* d = st->Ist.Dirty.details;
Bool writes = False;
for (j = 0; j < d->nFxState; j++) {
if (d->fxState[j].fx == Ifx_Modify
|| d->fxState[j].fx == Ifx_Write)
writes = True;
}
if (writes) {
/* dump the entire env (not clever, but correct ...) */
for (j = 0; j < env->used; j++)
env->inuse[j] = False;
if (0) vex_printf("rGET: trash env due to dirty helper\n");
}
}
/* add this one to the env, if appropriate */
if (st->tag == Ist_Put) {
vassert(isIRAtom(st->Ist.Put.data));
addToHHW( env, (HWord)key, (HWord)(st->Ist.Put.data));
}
} /* for (i = 0; i < bb->stmts_used; i++) */
}
/*---------------------------------------------------------------*/
/*--- In-place removal of redundant PUTs ---*/
/*---------------------------------------------------------------*/
/* Find any Get uses in st and invalidate any partially or fully
overlapping ranges listed in env. Due to the flattening phase, the
only stmt kind we expect to find a Get on is IRStmt_WrTmp. */
static void handle_gets_Stmt (
HashHW* env,
IRStmt* st,
Bool (*preciseMemExnsFn)(Int,Int)
)
{
Int j;
UInt key = 0; /* keep gcc -O happy */
Bool isGet;
Bool memRW = False;
IRExpr* e;
switch (st->tag) {
/* This is the only interesting case. Deal with Gets in the RHS
expression. */
case Ist_WrTmp:
e = st->Ist.WrTmp.data;
switch (e->tag) {
case Iex_Get:
isGet = True;
key = mk_key_GetPut ( e->Iex.Get.offset, e->Iex.Get.ty );
break;
case Iex_GetI:
isGet = True;
key = mk_key_GetIPutI ( e->Iex.GetI.descr );
break;
case Iex_Load:
isGet = False;
memRW = True;
break;
default:
isGet = False;
}
if (isGet) {
UInt k_lo, k_hi;
k_lo = (key >> 16) & 0xFFFF;
k_hi = key & 0xFFFF;
invalidateOverlaps(env, k_lo, k_hi);
}
break;
/* Be very conservative for dirty helper calls; dump the entire
environment. The helper might read guest state, in which
case it needs to be flushed first. Also, the helper might
access guest memory, in which case all parts of the guest
state requiring precise exceptions needs to be flushed. The
crude solution is just to flush everything; we could easily
enough do a lot better if needed. */
/* Probably also overly-conservative, but also dump everything
if we hit a memory bus event (fence, lock, unlock). Ditto
AbiHints, CASs, LLs and SCs. */
case Ist_AbiHint:
vassert(isIRAtom(st->Ist.AbiHint.base));
vassert(isIRAtom(st->Ist.AbiHint.nia));
/* fall through */
case Ist_MBE:
case Ist_Dirty:
case Ist_CAS:
case Ist_LLSC:
for (j = 0; j < env->used; j++)
env->inuse[j] = False;
break;
/* all other cases are boring. */
case Ist_Store:
vassert(isIRAtom(st->Ist.Store.addr));
vassert(isIRAtom(st->Ist.Store.data));
memRW = True;
break;
case Ist_StoreG: {
IRStoreG* sg = st->Ist.StoreG.details;
vassert(isIRAtom(sg->addr));
vassert(isIRAtom(sg->data));
vassert(isIRAtom(sg->guard));
memRW = True;
break;
}
case Ist_LoadG: {
IRLoadG* lg = st->Ist.LoadG.details;
vassert(isIRAtom(lg->addr));
vassert(isIRAtom(lg->alt));
vassert(isIRAtom(lg->guard));
memRW = True;
break;
}
case Ist_Exit:
vassert(isIRAtom(st->Ist.Exit.guard));
break;
case Ist_Put:
vassert(isIRAtom(st->Ist.Put.data));
break;
case Ist_PutI:
vassert(isIRAtom(st->Ist.PutI.details->ix));
vassert(isIRAtom(st->Ist.PutI.details->data));
break;
case Ist_NoOp:
case Ist_IMark:
break;
default:
vex_printf("\n");
ppIRStmt(st);
vex_printf("\n");
vpanic("handle_gets_Stmt");
}
if (memRW) {
/* This statement accesses memory. So we might need to dump all parts
of the environment corresponding to guest state that may not
be reordered with respect to memory references. That means
at least the stack pointer. */
switch (vex_control.iropt_register_updates) {
case VexRegUpdAllregsAtMemAccess:
/* Precise exceptions required at mem access.
Flush all guest state. */
for (j = 0; j < env->used; j++)
env->inuse[j] = False;
break;
case VexRegUpdSpAtMemAccess:
/* We need to dump the stack pointer
(needed for stack extension in m_signals.c).
preciseMemExnsFn will use vex_control.iropt_register_updates
to verify only the sp is to be checked. */
/* fallthrough */
case VexRegUpdUnwindregsAtMemAccess:
for (j = 0; j < env->used; j++) {
if (!env->inuse[j])
continue;
/* Just flush the minimal amount required, as computed by
preciseMemExnsFn. */
HWord k_lo = (env->key[j] >> 16) & 0xFFFF;
HWord k_hi = env->key[j] & 0xFFFF;
if (preciseMemExnsFn( k_lo, k_hi ))
env->inuse[j] = False;
}
break;
case VexRegUpdAllregsAtEachInsn:
// VexRegUpdAllregsAtEachInsn cannot happen here.
// fall through
default:
vassert(0);
}
} /* if (memRW) */
}
/* Scan backwards, building up a set of (min offset, max
offset) pairs, indicating those parts of the guest state
for which the next event is a write.
On seeing a conditional exit, empty the set.
On seeing 'Put (minoff,maxoff) = t or c', if (minoff,maxoff) is
completely within the set, remove the Put. Otherwise, add
(minoff,maxoff) to the set.
On seeing 'Get (minoff,maxoff)', remove any part of the set
overlapping (minoff,maxoff). The same has to happen for any events
which implicitly read parts of the guest state: dirty helper calls
and loads/stores.
*/
static void redundant_put_removal_BB (
IRSB* bb,
Bool (*preciseMemExnsFn)(Int,Int)
)
{
Int i, j;
Bool isPut;
IRStmt* st;
UInt key = 0; /* keep gcc -O happy */
vassert(vex_control.iropt_register_updates < VexRegUpdAllregsAtEachInsn);
HashHW* env = newHHW();
/* Initialise the running env with the fact that the final exit
writes the IP (or, whatever it claims to write. We don't
care.) */
key = mk_key_GetPut(bb->offsIP, typeOfIRExpr(bb->tyenv, bb->next));
addToHHW(env, (HWord)key, 0);
/* And now scan backwards through the statements. */
for (i = bb->stmts_used-1; i >= 0; i--) {
st = bb->stmts[i];
if (st->tag == Ist_NoOp)
continue;
/* Deal with conditional exits. */
if (st->tag == Ist_Exit) {
//Bool re_add;
/* Need to throw out from the env, any part of it which
doesn't overlap with the guest state written by this exit.
Since the exit only writes one section, it's simplest to
do this: (1) check whether env contains a write that
completely overlaps the write done by this exit; (2) empty
out env; and (3) if (1) was true, add the write done by
this exit.
To make (1) a bit simpler, merely search for a write that
exactly matches the one done by this exit. That's safe
because it will fail as often or more often than a full
overlap check, and failure to find an overlapping write in
env is the safe case (we just nuke env if that
happens). */
//vassert(isIRAtom(st->Ist.Exit.guard));
/* (1) */
//key = mk_key_GetPut(st->Ist.Exit.offsIP,
// typeOfIRConst(st->Ist.Exit.dst));
//re_add = lookupHHW(env, NULL, key);
/* (2) */
for (j = 0; j < env->used; j++)
env->inuse[j] = False;
/* (3) */
//if (0 && re_add)
// addToHHW(env, (HWord)key, 0);
continue;
}
/* Deal with Puts */
switch (st->tag) {
case Ist_Put:
isPut = True;
key = mk_key_GetPut( st->Ist.Put.offset,
typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
vassert(isIRAtom(st->Ist.Put.data));
break;
case Ist_PutI:
isPut = True;
key = mk_key_GetIPutI( st->Ist.PutI.details->descr );
vassert(isIRAtom(st->Ist.PutI.details->ix));
vassert(isIRAtom(st->Ist.PutI.details->data));
break;
default:
isPut = False;
}
if (isPut && st->tag != Ist_PutI) {
/* See if any single entry in env overlaps this Put. This is
simplistic in that the transformation is valid if, say, two
or more entries in the env overlap this Put, but the use of
lookupHHW will only find a single entry which exactly
overlaps this Put. This is suboptimal but safe. */
if (lookupHHW(env, NULL, (HWord)key)) {
/* This Put is redundant because a later one will overwrite
it. So NULL (nop) it out. */
if (DEBUG_IROPT) {
vex_printf("rPUT: "); ppIRStmt(st);
vex_printf("\n");
}
bb->stmts[i] = IRStmt_NoOp();
} else {
/* We can't demonstrate that this Put is redundant, so add it
to the running collection. */
addToHHW(env, (HWord)key, 0);
}
continue;
}
/* Deal with Gets. These remove bits of the environment since
appearance of a Get means that the next event for that slice
of the guest state is no longer a write, but a read. Also
deals with implicit reads of guest state needed to maintain
precise exceptions. */
handle_gets_Stmt( env, st, preciseMemExnsFn );
}
}
/*---------------------------------------------------------------*/
/*--- Constant propagation and folding ---*/
/*---------------------------------------------------------------*/
#if STATS_IROPT
/* How often sameIRExprs was invoked */
static UInt invocation_count;
/* How often sameIRExprs recursed through IRTemp assignments */
static UInt recursion_count;
/* How often sameIRExprs found identical IRExprs */
static UInt success_count;
/* How often recursing through assignments to IRTemps helped
establishing equality. */
static UInt recursion_success_count;
/* Whether or not recursing through an IRTemp assignment helped
establishing IRExpr equality for a given sameIRExprs invocation. */
static Bool recursion_helped;
/* Whether or not a given sameIRExprs invocation recursed through an
IRTemp assignment */
static Bool recursed;
/* Maximum number of nodes ever visited when comparing two IRExprs. */
static UInt max_nodes_visited;
#endif /* STATS_IROPT */
/* Count the number of nodes visited for a given sameIRExprs invocation. */
static UInt num_nodes_visited;
/* Do not visit more than NODE_LIMIT nodes when comparing two IRExprs.
This is to guard against performance degradation by visiting large
trees without success. */
#define NODE_LIMIT 30
/* The env in this section is a map from IRTemp to IRExpr*,
that is, an array indexed by IRTemp. */
/* Do both expressions compute the same value? The answer is generally
conservative, i.e. it will report that the expressions do not compute
the same value when in fact they do. The reason is that we do not
keep track of changes in the guest state and memory. Thusly, two
Get's, GetI's or Load's, even when accessing the same location, will be
assumed to compute different values. After all the accesses may happen
at different times and the guest state / memory can have changed in
the meantime.
XXX IMPORTANT XXX the two expressions must have the same IR type.
DO NOT CALL HERE WITH DIFFERENTLY-TYPED EXPRESSIONS. */
/* JRS 20-Mar-2012: split sameIRExprs_aux into a fast inlineable
wrapper that deals with the common tags-don't-match case, and a
slower out of line general case. Saves a few insns. */
__attribute__((noinline))
static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 );
inline
static Bool sameIRExprs_aux ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
{
if (e1->tag != e2->tag) return False;
return sameIRExprs_aux2(env, e1, e2);
}
__attribute__((noinline))
static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
{
if (num_nodes_visited++ > NODE_LIMIT) return False;
switch (e1->tag) {
case Iex_RdTmp:
if (e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp) return True;
if (env[e1->Iex.RdTmp.tmp] && env[e2->Iex.RdTmp.tmp]) {
Bool same = sameIRExprs_aux(env, env[e1->Iex.RdTmp.tmp],
env[e2->Iex.RdTmp.tmp]);
#if STATS_IROPT
recursed = True;
if (same) recursion_helped = True;
#endif
return same;
}
return False;
case Iex_Get:
case Iex_GetI:
case Iex_Load:
/* Guest state / memory could have changed in the meantime. */
return False;
case Iex_Binop:
return toBool( e1->Iex.Binop.op == e2->Iex.Binop.op
&& sameIRExprs_aux( env, e1->Iex.Binop.arg1,
e2->Iex.Binop.arg1 )
&& sameIRExprs_aux( env, e1->Iex.Binop.arg2,
e2->Iex.Binop.arg2 ));
case Iex_Unop:
return toBool( e1->Iex.Unop.op == e2->Iex.Unop.op
&& sameIRExprs_aux( env, e1->Iex.Unop.arg,
e2->Iex.Unop.arg ));
case Iex_Const: {
IRConst *c1 = e1->Iex.Const.con;
IRConst *c2 = e2->Iex.Const.con;
vassert(c1->tag == c2->tag);
switch (c1->tag) {
case Ico_U1: return toBool( c1->Ico.U1 == c2->Ico.U1 );
case Ico_U8: return toBool( c1->Ico.U8 == c2->Ico.U8 );
case Ico_U16: return toBool( c1->Ico.U16 == c2->Ico.U16 );
case Ico_U32: return toBool( c1->Ico.U32 == c2->Ico.U32 );
case Ico_U64: return toBool( c1->Ico.U64 == c2->Ico.U64 );
default: break;
}
return False;
}
case Iex_Triop: {
IRTriop *tri1 = e1->Iex.Triop.details;
IRTriop *tri2 = e2->Iex.Triop.details;
return toBool( tri1->op == tri2->op
&& sameIRExprs_aux( env, tri1->arg1, tri2->arg1 )
&& sameIRExprs_aux( env, tri1->arg2, tri2->arg2 )
&& sameIRExprs_aux( env, tri1->arg3, tri2->arg3 ));
}
case Iex_ITE:
return toBool( sameIRExprs_aux( env, e1->Iex.ITE.cond,
e2->Iex.ITE.cond )
&& sameIRExprs_aux( env, e1->Iex.ITE.iftrue,
e2->Iex.ITE.iftrue )
&& sameIRExprs_aux( env, e1->Iex.ITE.iffalse,
e2->Iex.ITE.iffalse ));
default:
/* Not very likely to be "same". */
break;
}
return False;
}
inline
static Bool sameIRExprs ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
{
Bool same;
num_nodes_visited = 0;
same = sameIRExprs_aux(env, e1, e2);
#if STATS_IROPT
++invocation_count;
if (recursed) ++recursion_count;
success_count += same;
if (same && recursion_helped)
++recursion_success_count;
if (num_nodes_visited > max_nodes_visited)
max_nodes_visited = num_nodes_visited;
recursed = False; /* reset */
recursion_helped = False; /* reset */
#endif /* STATS_IROPT */
return same;
}
/* Debugging-only hack (not used in production runs): make a guess
whether sameIRExprs might assert due to the two args being of
different types. If in doubt return False. Is only used when
--vex-iropt-level > 0, that is, vex_control.iropt_verbosity > 0.
Bad because it duplicates functionality from typeOfIRExpr. See
comment on the single use point below for rationale. */
static
Bool debug_only_hack_sameIRExprs_might_assert ( IRExpr* e1, IRExpr* e2 )
{
if (e1->tag != e2->tag) return False;
switch (e1->tag) {
case Iex_Const: {
/* The only interesting case */
IRConst *c1 = e1->Iex.Const.con;
IRConst *c2 = e2->Iex.Const.con;
return c1->tag != c2->tag;
}
default:
break;
}
return False;
}
/* Is this literally IRExpr_Const(IRConst_U32(0)) ? */
static Bool isZeroU32 ( IRExpr* e )
{
return toBool( e->tag == Iex_Const
&& e->Iex.Const.con->tag == Ico_U32
&& e->Iex.Const.con->Ico.U32 == 0);
}
/* Is this literally IRExpr_Const(IRConst_U64(0)) ?
Currently unused; commented out to avoid compiler warning */
#if 0
static Bool isZeroU64 ( IRExpr* e )
{
return toBool( e->tag == Iex_Const
&& e->Iex.Const.con->tag == Ico_U64
&& e->Iex.Const.con->Ico.U64 == 0);
}
#endif
/* Is this literally IRExpr_Const(IRConst_V128(0)) ? */
static Bool isZeroV128 ( IRExpr* e )
{
return toBool( e->tag == Iex_Const
&& e->Iex.Const.con->tag == Ico_V128
&& e->Iex.Const.con->Ico.V128 == 0x0000);
}
/* Is this literally IRExpr_Const(IRConst_V256(0)) ? */
static Bool isZeroV256 ( IRExpr* e )
{
return toBool( e->tag == Iex_Const
&& e->Iex.Const.con->tag == Ico_V256
&& e->Iex.Const.con->Ico.V256 == 0x00000000);
}
/* Is this an integer constant with value 0 ? */
static Bool isZeroU ( IRExpr* e )
{
if (e->tag != Iex_Const) return False;
switch (e->Iex.Const.con->tag) {
case Ico_U1: return toBool( e->Iex.Const.con->Ico.U1 == 0);
case Ico_U8: return toBool( e->Iex.Const.con->Ico.U8 == 0);
case Ico_U16: return toBool( e->Iex.Const.con->Ico.U16 == 0);
case Ico_U32: return toBool( e->Iex.Const.con->Ico.U32 == 0);
case Ico_U64: return toBool( e->Iex.Const.con->Ico.U64 == 0);
default: vpanic("isZeroU");
}
}
/* Is this an integer constant with value 1---1b ? */
static Bool isOnesU ( IRExpr* e )
{
if (e->tag != Iex_Const) return False;
switch (e->Iex.Const.con->tag) {
case Ico_U8: return toBool( e->Iex.Const.con->Ico.U8 == 0xFF);
case Ico_U16: return toBool( e->Iex.Const.con->Ico.U16 == 0xFFFF);
case Ico_U32: return toBool( e->Iex.Const.con->Ico.U32
== 0xFFFFFFFF);
case Ico_U64: return toBool( e->Iex.Const.con->Ico.U64
== 0xFFFFFFFFFFFFFFFFULL);
default: ppIRExpr(e); vpanic("isOnesU");
}
}
static Bool notBool ( Bool b )
{
if (b == True) return False;
if (b == False) return True;
vpanic("notBool");
}
/* Make a zero which has the same type as the result of the given
primop. */
static IRExpr* mkZeroOfPrimopResultType ( IROp op )
{
switch (op) {
case Iop_CmpNE32: return IRExpr_Const(IRConst_U1(toBool(0)));
case Iop_Xor8: return IRExpr_Const(IRConst_U8(0));
case Iop_Xor16: return IRExpr_Const(IRConst_U16(0));
case Iop_Sub32:
case Iop_Xor32: return IRExpr_Const(IRConst_U32(0));
case Iop_And64:
case Iop_Sub64:
case Iop_Xor64: return IRExpr_Const(IRConst_U64(0));
case Iop_XorV128:
case Iop_AndV128: return IRExpr_Const(IRConst_V128(0));
case Iop_AndV256: return IRExpr_Const(IRConst_V256(0));
default: vpanic("mkZeroOfPrimopResultType: bad primop");
}
}
/* Make a value containing all 1-bits, which has the same type as the
result of the given primop. */
static IRExpr* mkOnesOfPrimopResultType ( IROp op )
{
switch (op) {
case Iop_CmpEQ32:
case Iop_CmpEQ64:
return IRExpr_Const(IRConst_U1(toBool(1)));
case Iop_Or8:
return IRExpr_Const(IRConst_U8(0xFF));
case Iop_Or16:
return IRExpr_Const(IRConst_U16(0xFFFF));
case Iop_Or32:
return IRExpr_Const(IRConst_U32(0xFFFFFFFF));
case Iop_CmpEQ8x8:
case Iop_Or64:
return IRExpr_Const(IRConst_U64(0xFFFFFFFFFFFFFFFFULL));
case Iop_CmpEQ8x16:
case Iop_CmpEQ16x8:
case Iop_CmpEQ32x4:
return IRExpr_Const(IRConst_V128(0xFFFF));
default:
ppIROp(op);
vpanic("mkOnesOfPrimopResultType: bad primop");
}
}
/* Helpers for folding Clz32/64. */
static UInt fold_Clz64 ( ULong value )
{
UInt i;
vassert(value != 0ULL); /* no defined semantics for arg==0 */
for (i = 0; i < 64; ++i) {
if (0ULL != (value & (((ULong)1) << (63 - i)))) return i;
}
vassert(0);
/*NOTREACHED*/
return 0;
}
static UInt fold_Clz32 ( UInt value )
{
UInt i;
vassert(value != 0); /* no defined semantics for arg==0 */
for (i = 0; i < 32; ++i) {
if (0 != (value & (((UInt)1) << (31 - i)))) return i;
}
vassert(0);
/*NOTREACHED*/
return 0;
}
/* V64 holds 8 summary-constant bits in V128/V256 style. Convert to
the corresponding real constant. */
//XXX re-check this before use
//static ULong de_summarise_V64 ( UChar v64 )
//{
// ULong r = 0;
// if (v64 & (1<<0)) r |= 0x00000000000000FFULL;
// if (v64 & (1<<1)) r |= 0x000000000000FF00ULL;
// if (v64 & (1<<2)) r |= 0x0000000000FF0000ULL;
// if (v64 & (1<<3)) r |= 0x00000000FF000000ULL;
// if (v64 & (1<<4)) r |= 0x000000FF00000000ULL;
// if (v64 & (1<<5)) r |= 0x0000FF0000000000ULL;
// if (v64 & (1<<6)) r |= 0x00FF000000000000ULL;
// if (v64 & (1<<7)) r |= 0xFF00000000000000ULL;
// return r;
//}
/* Helper for arbitrary expression pattern matching in flat IR. If
'e' is a reference to a tmp, look it up in env -- repeatedly, if
necessary -- until it resolves to a non-tmp. Note that this can
return NULL if it can't resolve 'e' to a new expression, which will
be the case if 'e' is instead defined by an IRStmt (IRDirty or
LLSC). */
static IRExpr* chase ( IRExpr** env, IRExpr* e )
{
/* Why is this loop guaranteed to terminate? Because all tmps must
have definitions before use, hence a tmp cannot be bound
(directly or indirectly) to itself. */
while (e->tag == Iex_RdTmp) {
if (0) { vex_printf("chase "); ppIRExpr(e); vex_printf("\n"); }
e = env[(Int)e->Iex.RdTmp.tmp];
if (e == NULL) break;
}
return e;
}
static IRExpr* fold_Expr ( IRExpr** env, IRExpr* e )
{
Int shift;
IRExpr* e2 = e; /* e2 is the result of folding e, if possible */
switch (e->tag) {
case Iex_Unop:
/* UNARY ops */
if (e->Iex.Unop.arg->tag == Iex_Const) {
switch (e->Iex.Unop.op) {
case Iop_1Uto8:
e2 = IRExpr_Const(IRConst_U8(toUChar(
e->Iex.Unop.arg->Iex.Const.con->Ico.U1
? 1 : 0)));
break;
case Iop_1Uto32:
e2 = IRExpr_Const(IRConst_U32(
e->Iex.Unop.arg->Iex.Const.con->Ico.U1
? 1 : 0));
break;
case Iop_1Uto64:
e2 = IRExpr_Const(IRConst_U64(
e->Iex.Unop.arg->Iex.Const.con->Ico.U1
? 1 : 0));
break;
case Iop_1Sto8:
e2 = IRExpr_Const(IRConst_U8(toUChar(
e->Iex.Unop.arg->Iex.Const.con->Ico.U1
? 0xFF : 0)));
break;
case Iop_1Sto16:
e2 = IRExpr_Const(IRConst_U16(toUShort(
e->Iex.Unop.arg->Iex.Const.con->Ico.U1
? 0xFFFF : 0)));
break;
case Iop_1Sto32:
e2 = IRExpr_Const(IRConst_U32(
e->Iex.Unop.arg->Iex.Const.con->Ico.U1
? 0xFFFFFFFF : 0));
break;
case Iop_1Sto64:
e2 = IRExpr_Const(IRConst_U64(
e->Iex.Unop.arg->Iex.Const.con->Ico.U1
? 0xFFFFFFFFFFFFFFFFULL : 0));
break;
case Iop_8Sto32: {
/* signed */ Int s32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
s32 <<= 24;
s32 >>= 24;
e2 = IRExpr_Const(IRConst_U32((UInt)s32));
break;
}
case Iop_16Sto32: {
/* signed */ Int s32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
s32 <<= 16;
s32 >>= 16;
e2 = IRExpr_Const(IRConst_U32( (UInt)s32) );
break;
}
case Iop_8Uto64:
e2 = IRExpr_Const(IRConst_U64(
0xFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
break;
case Iop_16Uto64:
e2 = IRExpr_Const(IRConst_U64(
0xFFFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
break;
case Iop_8Uto32:
e2 = IRExpr_Const(IRConst_U32(
0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
break;
case Iop_8Sto16: {
/* signed */ Short s16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
s16 <<= 8;
s16 >>= 8;
e2 = IRExpr_Const(IRConst_U16( (UShort)s16) );
break;
}
case Iop_8Uto16:
e2 = IRExpr_Const(IRConst_U16(
0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
break;
case Iop_16Uto32:
e2 = IRExpr_Const(IRConst_U32(
0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
break;
case Iop_32to16:
e2 = IRExpr_Const(IRConst_U16(toUShort(
0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
break;
case Iop_32to8:
e2 = IRExpr_Const(IRConst_U8(toUChar(
0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
break;
case Iop_32to1:
e2 = IRExpr_Const(IRConst_U1(toBool(
1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
)));
break;
case Iop_64to1:
e2 = IRExpr_Const(IRConst_U1(toBool(
1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U64)
)));
break;
case Iop_NotV128:
e2 = IRExpr_Const(IRConst_V128(
~ (e->Iex.Unop.arg->Iex.Const.con->Ico.V128)));
break;
case Iop_Not64:
e2 = IRExpr_Const(IRConst_U64(
~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U64)));
break;
case Iop_Not32:
e2 = IRExpr_Const(IRConst_U32(
~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
break;
case Iop_Not16:
e2 = IRExpr_Const(IRConst_U16(toUShort(
~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U16))));
break;
case Iop_Not8:
e2 = IRExpr_Const(IRConst_U8(toUChar(
~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U8))));
break;
case Iop_Not1:
e2 = IRExpr_Const(IRConst_U1(
notBool(e->Iex.Unop.arg->Iex.Const.con->Ico.U1)));
break;
case Iop_64to8: {
ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
w64 &= 0xFFULL;
e2 = IRExpr_Const(IRConst_U8( (UChar)w64 ));
break;
}
case Iop_64to16: {
ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
w64 &= 0xFFFFULL;
e2 = IRExpr_Const(IRConst_U16( (UShort)w64 ));
break;
}
case Iop_64to32: {
ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
w64 &= 0x00000000FFFFFFFFULL;
e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
break;
}
case Iop_64HIto32: {
ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
w64 >>= 32;
e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
break;
}
case Iop_32Uto64:
e2 = IRExpr_Const(IRConst_U64(
0xFFFFFFFFULL
& e->Iex.Unop.arg->Iex.Const.con->Ico.U32));
break;
case Iop_16Sto64: {
/* signed */ Long s64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
s64 <<= 48;
s64 >>= 48;
e2 = IRExpr_Const(IRConst_U64((ULong)s64));
break;
}
case Iop_32Sto64: {
/* signed */ Long s64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
s64 <<= 32;
s64 >>= 32;
e2 = IRExpr_Const(IRConst_U64((ULong)s64));
break;
}
case Iop_16to8: {
UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
w16 &= 0xFF;
e2 = IRExpr_Const(IRConst_U8( (UChar)w16 ));
break;
}
case Iop_16HIto8: {
UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
w16 >>= 8;
w16 &= 0xFF;
e2 = IRExpr_Const(IRConst_U8( (UChar)w16 ));
break;
}
case Iop_CmpNEZ8:
e2 = IRExpr_Const(IRConst_U1(toBool(
0 !=
(0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8)
)));
break;
case Iop_CmpNEZ32:
e2 = IRExpr_Const(IRConst_U1(toBool(
0 !=
(0xFFFFFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
)));
break;
case Iop_CmpNEZ64:
e2 = IRExpr_Const(IRConst_U1(toBool(
0ULL != e->Iex.Unop.arg->Iex.Const.con->Ico.U64
)));
break;
case Iop_CmpwNEZ32: {
UInt w32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
if (w32 == 0)
e2 = IRExpr_Const(IRConst_U32( 0 ));
else
e2 = IRExpr_Const(IRConst_U32( 0xFFFFFFFF ));
break;
}
case Iop_CmpwNEZ64: {
ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
if (w64 == 0)
e2 = IRExpr_Const(IRConst_U64( 0 ));
else
e2 = IRExpr_Const(IRConst_U64( 0xFFFFFFFFFFFFFFFFULL ));
break;
}
case Iop_Left32: {
UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
Int s32 = (Int)(u32 & 0xFFFFFFFF);
s32 = (s32 | (-s32));
e2 = IRExpr_Const( IRConst_U32( (UInt)s32 ));
break;
}
case Iop_Left64: {
ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
Long s64 = (Long)u64;
s64 = (s64 | (-s64));
e2 = IRExpr_Const( IRConst_U64( (ULong)s64 ));
break;
}
case Iop_Clz32: {
UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
if (u32 != 0)
e2 = IRExpr_Const(IRConst_U32(fold_Clz32(u32)));
break;
}
case Iop_Clz64: {
ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
if (u64 != 0ULL)
e2 = IRExpr_Const(IRConst_U64(fold_Clz64(u64)));
break;
}
/* For these vector ones, can't fold all cases, but at least
do the most obvious one. Could do better here using
summarise/desummarise of vector constants, but too
difficult to verify; hence just handle the zero cases. */
case Iop_32UtoV128: {
UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
if (0 == u32) {
e2 = IRExpr_Const(IRConst_V128(0x0000));
} else {
goto unhandled;
}
break;
}
case Iop_V128to64: {
UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128;
if (0 == ((v128 >> 0) & 0xFF)) {
e2 = IRExpr_Const(IRConst_U64(0));
} else {
goto unhandled;
}
break;
}
case Iop_V128HIto64: {
UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128;
if (0 == ((v128 >> 8) & 0xFF)) {
e2 = IRExpr_Const(IRConst_U64(0));
} else {
goto unhandled;
}
break;
}
case Iop_64UtoV128: {
ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
if (0 == u64) {
e2 = IRExpr_Const(IRConst_V128(0x0000));
} else {
goto unhandled;
}
break;
}
/* Even stupider (although still correct ..) */
case Iop_V256to64_0: case Iop_V256to64_1:
case Iop_V256to64_2: case Iop_V256to64_3: {
UInt v256 = e->Iex.Unop.arg->Iex.Const.con->Ico.V256;
if (v256 == 0x00000000) {
e2 = IRExpr_Const(IRConst_U64(0));
} else {
goto unhandled;
}
break;
}
case Iop_ZeroHI64ofV128: {
/* Could do better here -- only need to look at the bottom 64 bits
of the argument, really. */
UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128;
if (v128 == 0x0000) {
e2 = IRExpr_Const(IRConst_V128(0x0000));
} else {
goto unhandled;
}
break;
}
default:
goto unhandled;
}
}
break;
case Iex_Binop:
/* BINARY ops */
if (e->Iex.Binop.arg1->tag == Iex_Const
&& e->Iex.Binop.arg2->tag == Iex_Const) {
/* cases where both args are consts */
switch (e->Iex.Binop.op) {
/* -- Or -- */
case Iop_Or8:
e2 = IRExpr_Const(IRConst_U8(toUChar(
(e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
| e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
break;
case Iop_Or16:
e2 = IRExpr_Const(IRConst_U16(toUShort(
(e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
| e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
break;
case Iop_Or32:
e2 = IRExpr_Const(IRConst_U32(
(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
| e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
break;
case Iop_Or64:
e2 = IRExpr_Const(IRConst_U64(
(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
| e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
break;
case Iop_OrV128:
e2 = IRExpr_Const(IRConst_V128(
(e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
| e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
break;
/* -- Xor -- */
case Iop_Xor8:
e2 = IRExpr_Const(IRConst_U8(toUChar(
(e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
break;
case Iop_Xor16:
e2 = IRExpr_Const(IRConst_U16(toUShort(
(e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
break;
case Iop_Xor32:
e2 = IRExpr_Const(IRConst_U32(
(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
break;
case Iop_Xor64:
e2 = IRExpr_Const(IRConst_U64(
(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
break;
case Iop_XorV128:
e2 = IRExpr_Const(IRConst_V128(
(e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
^ e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
break;
/* -- And -- */
case Iop_And8:
e2 = IRExpr_Const(IRConst_U8(toUChar(
(e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
& e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
break;
case Iop_And16:
e2 = IRExpr_Const(IRConst_U16(toUShort(
(e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
& e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
break;
case Iop_And32:
e2 = IRExpr_Const(IRConst_U32(
(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
& e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
break;
case Iop_And64:
e2 = IRExpr_Const(IRConst_U64(
(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
& e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
break;
case Iop_AndV128:
e2 = IRExpr_Const(IRConst_V128(
(e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
& e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
break;
/* -- Add -- */
case Iop_Add8:
e2 = IRExpr_Const(IRConst_U8(toUChar(
(e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
+ e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
break;
case Iop_Add32:
e2 = IRExpr_Const(IRConst_U32(
(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
+ e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
break;
case Iop_Add64:
e2 = IRExpr_Const(IRConst_U64(
(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
+ e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
break;
/* -- Sub -- */
case Iop_Sub8:
e2 = IRExpr_Const(IRConst_U8(toUChar(
(e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
- e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
break;
case Iop_Sub32:
e2 = IRExpr_Const(IRConst_U32(
(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
- e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
break;
case Iop_Sub64:
e2 = IRExpr_Const(IRConst_U64(
(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
- e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
break;
/* -- Max32U -- */
case Iop_Max32U: {
UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
UInt res = u32a > u32b ? u32a : u32b;
e2 = IRExpr_Const(IRConst_U32(res));
break;
}
/* -- Mul -- */
case Iop_Mul32:
e2 = IRExpr_Const(IRConst_U32(
(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
* e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
break;
case Iop_Mul64:
e2 = IRExpr_Const(IRConst_U64(
(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
* e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
break;
case Iop_MullS32: {
/* very paranoid */
UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
Int s32a = (Int)u32a;
Int s32b = (Int)u32b;
Long s64a = (Long)s32a;
Long s64b = (Long)s32b;
Long sres = s64a * s64b;
ULong ures = (ULong)sres;
e2 = IRExpr_Const(IRConst_U64(ures));
break;
}
/* -- Shl -- */
case Iop_Shl32:
vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
if (shift >= 0 && shift <= 31)
e2 = IRExpr_Const(IRConst_U32(
(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
<< shift)));
break;
case Iop_Shl64:
vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
if (shift >= 0 && shift <= 63)
e2 = IRExpr_Const(IRConst_U64(
(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
<< shift)));
break;
/* -- Sar -- */
case Iop_Sar32: {
/* paranoid ... */
/*signed*/ Int s32;
vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
s32 = (Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
if (shift >= 0 && shift <= 31) {
s32 >>=/*signed*/ shift;
e2 = IRExpr_Const(IRConst_U32((UInt)s32));
}
break;
}
case Iop_Sar64: {
/* paranoid ... */
/*signed*/ Long s64;
vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
s64 = (Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
if (shift >= 0 && shift <= 63) {
s64 >>=/*signed*/ shift;
e2 = IRExpr_Const(IRConst_U64((ULong)s64));
}
break;
}
/* -- Shr -- */
case Iop_Shr32: {
/* paranoid ... */
/*unsigned*/ UInt u32;
vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
u32 = (UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
if (shift >= 0 && shift <= 31) {
u32 >>=/*unsigned*/ shift;
e2 = IRExpr_Const(IRConst_U32(u32));
}
break;
}
case Iop_Shr64: {
/* paranoid ... */
/*unsigned*/ ULong u64;
vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
u64 = (ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
if (shift >= 0 && shift <= 63) {
u64 >>=/*unsigned*/ shift;
e2 = IRExpr_Const(IRConst_U64(u64));
}
break;
}
/* -- CmpEQ -- */
case Iop_CmpEQ32:
e2 = IRExpr_Const(IRConst_U1(toBool(
(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
== e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
break;
case Iop_CmpEQ64:
e2 = IRExpr_Const(IRConst_U1(toBool(
(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
== e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
break;
/* -- CmpNE -- */
case Iop_CmpNE8:
case Iop_CasCmpNE8:
case Iop_ExpCmpNE8:
e2 = IRExpr_Const(IRConst_U1(toBool(
((0xFF & e->Iex.Binop.arg1->Iex.Const.con->Ico.U8)
!= (0xFF & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))));
break;
case Iop_CmpNE32:
case Iop_CasCmpNE32:
case Iop_ExpCmpNE32:
e2 = IRExpr_Const(IRConst_U1(toBool(
(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
!= e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
break;
case Iop_CmpNE64:
case Iop_CasCmpNE64:
case Iop_ExpCmpNE64:
e2 = IRExpr_Const(IRConst_U1(toBool(
(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
!= e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
break;
/* -- CmpLEU -- */
case Iop_CmpLE32U:
e2 = IRExpr_Const(IRConst_U1(toBool(
((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
<= (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
break;
case Iop_CmpLE64U:
e2 = IRExpr_Const(IRConst_U1(toBool(
((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
<= (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
break;
/* -- CmpLES -- */
case Iop_CmpLE32S:
e2 = IRExpr_Const(IRConst_U1(toBool(
((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
<= (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
break;
case Iop_CmpLE64S:
e2 = IRExpr_Const(IRConst_U1(toBool(
((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
<= (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
break;
/* -- CmpLTS -- */
case Iop_CmpLT32S:
e2 = IRExpr_Const(IRConst_U1(toBool(
((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
< (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
break;
case Iop_CmpLT64S:
e2 = IRExpr_Const(IRConst_U1(toBool(
((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
< (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
break;
/* -- CmpLTU -- */
case Iop_CmpLT32U:
e2 = IRExpr_Const(IRConst_U1(toBool(
((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
< (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
break;
case Iop_CmpLT64U:
e2 = IRExpr_Const(IRConst_U1(toBool(
((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
< (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
break;
/* -- CmpORD -- */
case Iop_CmpORD32S: {
/* very paranoid */
UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
Int s32a = (Int)u32a;
Int s32b = (Int)u32b;
Int r = 0x2; /* EQ */
if (s32a < s32b) {
r = 0x8; /* LT */
}
else if (s32a > s32b) {
r = 0x4; /* GT */
}
e2 = IRExpr_Const(IRConst_U32(r));
break;
}
/* -- nHLto2n -- */
case Iop_32HLto64:
e2 = IRExpr_Const(IRConst_U64(
(((ULong)(e->Iex.Binop.arg1
->Iex.Const.con->Ico.U32)) << 32)
| ((ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))
));
break;
case Iop_64HLto128:
/* We can't fold this, because there is no way to
express he result in IR, but at least pretend to
handle it, so as to stop getting blasted with
no-rule-for-this-primop messages. */
break;
/* For this vector one, can't fold all cases, but at
least do the most obvious one. Could do better here
using summarise/desummarise of vector constants, but
too difficult to verify; hence just handle the zero
cases. */
case Iop_64HLtoV128: {
ULong argHi = e->Iex.Binop.arg1->Iex.Const.con->Ico.U64;
ULong argLo = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
if (0 == argHi && 0 == argLo) {
e2 = IRExpr_Const(IRConst_V128(0));
} else {
goto unhandled;
}
break;
}
/* Same reasoning for the 256-bit version. */
case Iop_V128HLtoV256: {
IRExpr* argHi = e->Iex.Binop.arg1;
IRExpr* argLo = e->Iex.Binop.arg2;
if (isZeroV128(argHi) && isZeroV128(argLo)) {
e2 = IRExpr_Const(IRConst_V256(0));
} else {
goto unhandled;
}
break;
}
/* -- V128 stuff -- */
case Iop_InterleaveLO8x16: {
/* This turns up a lot in Memcheck instrumentation of
Icc generated code. I don't know why. */
UShort arg1 = e->Iex.Binop.arg1->Iex.Const.con->Ico.V128;
UShort arg2 = e->Iex.Binop.arg2->Iex.Const.con->Ico.V128;
if (0 == arg1 && 0 == arg2) {
e2 = IRExpr_Const(IRConst_V128(0));
} else {
goto unhandled;
}
break;
}
default:
goto unhandled;
}
} else {
/* other cases (identities, etc) */
switch (e->Iex.Binop.op) {
case Iop_Shl32:
case Iop_Shl64:
case Iop_Shr64:
/* Shl32/Shl64/Shr64(x,0) ==> x */
if (isZeroU(e->Iex.Binop.arg2)) {
e2 = e->Iex.Binop.arg1;
break;
}
/* Shl32/Shl64/Shr64(0,x) ==> 0 */
if (isZeroU(e->Iex.Binop.arg1)) {
e2 = e->Iex.Binop.arg1;
break;
}
break;
case Iop_Shr32:
/* Shr32(x,0) ==> x */
if (isZeroU(e->Iex.Binop.arg2)) {
e2 = e->Iex.Binop.arg1;
break;
}
break;
case Iop_Or8:
case Iop_Or16:
case Iop_Or32:
case Iop_Or64:
case Iop_Max32U:
/* Or8/Or16/Or32/Or64/Max32U(x,0) ==> x */
if (isZeroU(e->Iex.Binop.arg2)) {
e2 = e->Iex.Binop.arg1;
break;
}
/* Or8/Or16/Or32/Or64/Max32U(0,x) ==> x */
if (isZeroU(e->Iex.Binop.arg1)) {
e2 = e->Iex.Binop.arg2;
break;
}
/* Or8/Or16/Or32/Or64/Max32U(x,1---1b) ==> 1---1b */
/* Or8/Or16/Or32/Or64/Max32U(1---1b,x) ==> 1---1b */
if (isOnesU(e->Iex.Binop.arg1) || isOnesU(e->Iex.Binop.arg2)) {
e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op);
break;
}
/* Or8/Or16/Or32/Or64/Max32U(t,t) ==> t, for some IRTemp t */
if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
e2 = e->Iex.Binop.arg1;
break;
}
break;
case Iop_Add8:
/* Add8(t,t) ==> t << 1.
Memcheck doesn't understand that
x+x produces a defined least significant bit, and it seems
simplest just to get rid of the problem by rewriting it
out, since the opportunity to do so exists. */
if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
e2 = IRExpr_Binop(Iop_Shl8, e->Iex.Binop.arg1,
IRExpr_Const(IRConst_U8(1)));
break;
}
break;
/* NB no Add16(t,t) case yet as no known test case exists */
case Iop_Add32:
case Iop_Add64:
/* Add32/Add64(x,0) ==> x */
if (isZeroU(e->Iex.Binop.arg2)) {
e2 = e->Iex.Binop.arg1;
break;
}
/* Add32/Add64(0,x) ==> x */
if (isZeroU(e->Iex.Binop.arg1)) {
e2 = e->Iex.Binop.arg2;
break;
}
/* Add32/Add64(t,t) ==> t << 1. Same rationale as for Add8. */
if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
e2 = IRExpr_Binop(
e->Iex.Binop.op == Iop_Add32 ? Iop_Shl32 : Iop_Shl64,
e->Iex.Binop.arg1, IRExpr_Const(IRConst_U8(1)));
break;
}
break;
case Iop_Sub32:
case Iop_Sub64:
/* Sub32/Sub64(x,0) ==> x */
if (isZeroU(e->Iex.Binop.arg2)) {
e2 = e->Iex.Binop.arg1;
break;
}
/* Sub32/Sub64(t,t) ==> 0, for some IRTemp t */
if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
break;
}
break;
case Iop_Sub8x16:
/* Sub8x16(x,0) ==> x */
if (isZeroV128(e->Iex.Binop.arg2)) {
e2 = e->Iex.Binop.arg1;
break;
}
break;
case Iop_And8:
case Iop_And16:
case Iop_And32:
case Iop_And64:
/* And8/And16/And32/And64(x,1---1b) ==> x */
if (isOnesU(e->Iex.Binop.arg2)) {
e2 = e->Iex.Binop.arg1;
break;
}
/* And8/And16/And32/And64(1---1b,x) ==> x */
if (isOnesU(e->Iex.Binop.arg1)) {
e2 = e->Iex.Binop.arg2;
break;
}
/* And8/And16/And32/And64(x,0) ==> 0 */
if (isZeroU(e->Iex.Binop.arg2)) {
e2 = e->Iex.Binop.arg2;
break;
}
/* And8/And16/And32/And64(0,x) ==> 0 */
if (isZeroU(e->Iex.Binop.arg1)) {
e2 = e->Iex.Binop.arg1;
break;
}
/* And8/And16/And32/And64(t,t) ==> t, for some IRTemp t */
if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
e2 = e->Iex.Binop.arg1;
break;
}
break;
case Iop_AndV128:
case Iop_AndV256:
/* And8/And16/AndV128/AndV256(t,t)
==> t, for some IRTemp t */
if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
e2 = e->Iex.Binop.arg1;
break;
}
/* Deal with either arg zero. Could handle other And
cases here too. */
if (e->Iex.Binop.op == Iop_AndV256
&& (isZeroV256(e->Iex.Binop.arg1)
|| isZeroV256(e->Iex.Binop.arg2))) {
e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
break;
} else if (e->Iex.Binop.op == Iop_AndV128
&& (isZeroV128(e->Iex.Binop.arg1)
|| isZeroV128(e->Iex.Binop.arg2))) {
e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
break;
}
break;
case Iop_OrV128:
case Iop_OrV256:
/* V128/V256(t,t) ==> t, for some IRTemp t */
if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
e2 = e->Iex.Binop.arg1;
break;
}
/* OrV128(t,0) ==> t */
if (e->Iex.Binop.op == Iop_OrV128) {
if (isZeroV128(e->Iex.Binop.arg2)) {
e2 = e->Iex.Binop.arg1;
break;
}
if (isZeroV128(e->Iex.Binop.arg1)) {
e2 = e->Iex.Binop.arg2;
break;
}
}
/* OrV256(t,0) ==> t */
if (e->Iex.Binop.op == Iop_OrV256) {
if (isZeroV256(e->Iex.Binop.arg2)) {
e2 = e->Iex.Binop.arg1;
break;
}
//Disabled because there's no known test case right now.
//if (isZeroV256(e->Iex.Binop.arg1)) {
// e2 = e->Iex.Binop.arg2;
// break;
//}
}
break;
case Iop_Xor8:
case Iop_Xor16:
case Iop_Xor32:
case Iop_Xor64:
case Iop_XorV128:
/* Xor8/16/32/64/V128(t,t) ==> 0, for some IRTemp t */
if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
break;
}
/* XorV128(t,0) ==> t */
if (e->Iex.Binop.op == Iop_XorV128) {
if (isZeroV128(e->Iex.Binop.arg2)) {
e2 = e->Iex.Binop.arg1;
break;
}
//Disabled because there's no known test case right now.
//if (isZeroV128(e->Iex.Binop.arg1)) {
// e2 = e->Iex.Binop.arg2;
// break;
//}
}
break;
case Iop_CmpNE32:
/* CmpNE32(t,t) ==> 0, for some IRTemp t */
if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
break;
}
/* CmpNE32(1Uto32(b), 0) ==> b */
if (isZeroU32(e->Iex.Binop.arg2)) {
IRExpr* a1 = chase(env, e->Iex.Binop.arg1);
if (a1 && a1->tag == Iex_Unop
&& a1->Iex.Unop.op == Iop_1Uto32) {
e2 = a1->Iex.Unop.arg;
break;
}
}
break;
case Iop_CmpEQ32:
case Iop_CmpEQ64:
case Iop_CmpEQ8x8:
case Iop_CmpEQ8x16:
case Iop_CmpEQ16x8:
case Iop_CmpEQ32x4:
if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op);
break;
}
break;
default:
break;
}
}
break;
case Iex_ITE:
/* ITE */
/* is the discriminant is a constant? */
if (e->Iex.ITE.cond->tag == Iex_Const) {
/* assured us by the IR type rules */
vassert(e->Iex.ITE.cond->Iex.Const.con->tag == Ico_U1);
e2 = e->Iex.ITE.cond->Iex.Const.con->Ico.U1
? e->Iex.ITE.iftrue : e->Iex.ITE.iffalse;
}
else
/* are the arms identical? (pretty weedy test) */
if (sameIRExprs(env, e->Iex.ITE.iftrue,
e->Iex.ITE.iffalse)) {
e2 = e->Iex.ITE.iffalse;
}
break;
default:
/* not considered */
break;
}
/* Show cases where we've found but not folded 'op(t,t)'. Be
careful not to call sameIRExprs with values of different types,
though, else it will assert (and so it should!). We can't
conveniently call typeOfIRExpr on the two args without a whole
bunch of extra plumbing to pass in a type env, so just use a
hacky test to check the arguments are not anything that might
sameIRExprs to assert. This is only OK because this kludge is
only used for debug printing, not for "real" operation. For
"real" operation (ie, all other calls to sameIRExprs), it is
essential that the to args have the same type.
The "right" solution is to plumb the containing block's
IRTypeEnv through to here and use typeOfIRExpr to be sure. But
that's a bunch of extra parameter passing which will just slow
down the normal case, for no purpose. */
if (vex_control.iropt_verbosity > 0
&& e == e2
&& e->tag == Iex_Binop
&& !debug_only_hack_sameIRExprs_might_assert(e->Iex.Binop.arg1,
e->Iex.Binop.arg2)
&& sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
vex_printf("vex iropt: fold_Expr: no ident rule for: ");
ppIRExpr(e);
vex_printf("\n");
}
/* Show the overall results of folding. */
if (DEBUG_IROPT && e2 != e) {
vex_printf("FOLD: ");
ppIRExpr(e); vex_printf(" -> ");
ppIRExpr(e2); vex_printf("\n");
}
return e2;
unhandled:
# if 0
vex_printf("\n\n");
ppIRExpr(e);
vpanic("fold_Expr: no rule for the above");
# else
if (vex_control.iropt_verbosity > 0) {
vex_printf("vex iropt: fold_Expr: no const rule for: ");
ppIRExpr(e);
vex_printf("\n");
}
return e2;
# endif
}
/* Apply the subst to a simple 1-level expression -- guaranteed to be
1-level due to previous flattening pass. */
static IRExpr* subst_Expr ( IRExpr** env, IRExpr* ex )
{
switch (ex->tag) {
case Iex_RdTmp:
if (env[(Int)ex->Iex.RdTmp.tmp] != NULL) {
IRExpr *rhs = env[(Int)ex->Iex.RdTmp.tmp];
if (rhs->tag == Iex_RdTmp)
return rhs;
if (rhs->tag == Iex_Const
&& rhs->Iex.Const.con->tag != Ico_F64i)
return rhs;
}
/* not bound in env */
return ex;
case Iex_Const:
case Iex_Get:
return ex;
case Iex_GetI:
vassert(isIRAtom(ex->Iex.GetI.ix));
return IRExpr_GetI(
ex->Iex.GetI.descr,
subst_Expr(env, ex->Iex.GetI.ix),
ex->Iex.GetI.bias
);
case Iex_Qop: {
IRQop* qop = ex->Iex.Qop.details;
vassert(isIRAtom(qop->arg1));
vassert(isIRAtom(qop->arg2));
vassert(isIRAtom(qop->arg3));
vassert(isIRAtom(qop->arg4));
return IRExpr_Qop(
qop->op,
subst_Expr(env, qop->arg1),
subst_Expr(env, qop->arg2),
subst_Expr(env, qop->arg3),
subst_Expr(env, qop->arg4)
);
}
case Iex_Triop: {
IRTriop* triop = ex->Iex.Triop.details;
vassert(isIRAtom(triop->arg1));
vassert(isIRAtom(triop->arg2));
vassert(isIRAtom(triop->arg3));
return IRExpr_Triop(
triop->op,
subst_Expr(env, triop->arg1),
subst_Expr(env, triop->arg2),
subst_Expr(env, triop->arg3)
);
}
case Iex_Binop:
vassert(isIRAtom(ex->Iex.Binop.arg1));
vassert(isIRAtom(ex->Iex.Binop.arg2));
return IRExpr_Binop(
ex->Iex.Binop.op,
subst_Expr(env, ex->Iex.Binop.arg1),
subst_Expr(env, ex->Iex.Binop.arg2)
);
case Iex_Unop:
vassert(isIRAtom(ex->Iex.Unop.arg));
return IRExpr_Unop(
ex->Iex.Unop.op,
subst_Expr(env, ex->Iex.Unop.arg)
);
case