blob: 16152ba67b09a9af36bd26a6ea618230f717166e [file] [log] [blame]
/*---------------------------------------------------------------*/
/*--- begin host_x86_isel.c ---*/
/*---------------------------------------------------------------*/
/*
This file is part of Valgrind, a dynamic binary instrumentation
framework.
Copyright (C) 2004-2013 OpenWorks LLP
info@open-works.net
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA.
The GNU General Public License is contained in the file COPYING.
Neither the names of the U.S. Department of Energy nor the
University of California nor the names of its contributors may be
used to endorse or promote products derived from this software
without prior written permission.
*/
#include "libvex_basictypes.h"
#include "libvex_ir.h"
#include "libvex.h"
#include "ir_match.h"
#include "main_util.h"
#include "main_globals.h"
#include "host_generic_regs.h"
#include "host_generic_simd64.h"
#include "host_generic_simd128.h"
#include "host_x86_defs.h"
/* TODO 21 Apr 2005:
-- (Really an assembler issue) don't emit CMov32 as a cmov
insn, since that's expensive on P4 and conditional branch
is cheaper if (as we expect) the condition is highly predictable
-- preserve xmm registers across function calls (by declaring them
as trashed by call insns)
-- preserve x87 ST stack discipline across function calls. Sigh.
-- Check doHelperCall: if a call is conditional, we cannot safely
compute any regparm args directly to registers. Hence, the
fast-regparm marshalling should be restricted to unconditional
calls only.
*/
/*---------------------------------------------------------*/
/*--- x87 control word stuff ---*/
/*---------------------------------------------------------*/
/* Vex-generated code expects to run with the FPU set as follows: all
exceptions masked, round-to-nearest, precision = 53 bits. This
corresponds to a FPU control word value of 0x027F.
Similarly the SSE control word (%mxcsr) should be 0x1F80.
%fpucw and %mxcsr should have these values on entry to
Vex-generated code, and should those values should be
unchanged at exit.
*/
#define DEFAULT_FPUCW 0x027F
/* debugging only, do not use */
/* define DEFAULT_FPUCW 0x037F */
/*---------------------------------------------------------*/
/*--- misc helpers ---*/
/*---------------------------------------------------------*/
/* These are duplicated in guest-x86/toIR.c */
static IRExpr* unop ( IROp op, IRExpr* a )
{
return IRExpr_Unop(op, a);
}
static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
{
return IRExpr_Binop(op, a1, a2);
}
static IRExpr* bind ( Int binder )
{
return IRExpr_Binder(binder);
}
static Bool isZeroU8 ( IRExpr* e )
{
return e->tag == Iex_Const
&& e->Iex.Const.con->tag == Ico_U8
&& e->Iex.Const.con->Ico.U8 == 0;
}
static Bool isZeroU32 ( IRExpr* e )
{
return e->tag == Iex_Const
&& e->Iex.Const.con->tag == Ico_U32
&& e->Iex.Const.con->Ico.U32 == 0;
}
//static Bool isZeroU64 ( IRExpr* e )
//{
// return e->tag == Iex_Const
// && e->Iex.Const.con->tag == Ico_U64
// && e->Iex.Const.con->Ico.U64 == 0ULL;
//}
/*---------------------------------------------------------*/
/*--- ISelEnv ---*/
/*---------------------------------------------------------*/
/* This carries around:
- A mapping from IRTemp to IRType, giving the type of any IRTemp we
might encounter. This is computed before insn selection starts,
and does not change.
- A mapping from IRTemp to HReg. This tells the insn selector
which virtual register(s) are associated with each IRTemp
temporary. This is computed before insn selection starts, and
does not change. We expect this mapping to map precisely the
same set of IRTemps as the type mapping does.
- vregmap holds the primary register for the IRTemp.
- vregmapHI is only used for 64-bit integer-typed
IRTemps. It holds the identity of a second
32-bit virtual HReg, which holds the high half
of the value.
- The code array, that is, the insns selected so far.
- A counter, for generating new virtual registers.
- The host subarchitecture we are selecting insns for.
This is set at the start and does not change.
- A Bool for indicating whether we may generate chain-me
instructions for control flow transfers, or whether we must use
XAssisted.
- The maximum guest address of any guest insn in this block.
Actually, the address of the highest-addressed byte from any insn
in this block. Is set at the start and does not change. This is
used for detecting jumps which are definitely forward-edges from
this block, and therefore can be made (chained) to the fast entry
point of the destination, thereby avoiding the destination's
event check.
Note, this is all (well, mostly) host-independent.
*/
typedef
struct {
/* Constant -- are set at the start and do not change. */
IRTypeEnv* type_env;
HReg* vregmap;
HReg* vregmapHI;
Int n_vregmap;
UInt hwcaps;
Bool chainingAllowed;
Addr64 max_ga;
/* These are modified as we go along. */
HInstrArray* code;
Int vreg_ctr;
}
ISelEnv;
static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
{
vassert(tmp >= 0);
vassert(tmp < env->n_vregmap);
return env->vregmap[tmp];
}
static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp )
{
vassert(tmp >= 0);
vassert(tmp < env->n_vregmap);
vassert(! hregIsInvalid(env->vregmapHI[tmp]));
*vrLO = env->vregmap[tmp];
*vrHI = env->vregmapHI[tmp];
}
static void addInstr ( ISelEnv* env, X86Instr* instr )
{
addHInstr(env->code, instr);
if (vex_traceflags & VEX_TRACE_VCODE) {
ppX86Instr(instr, False);
vex_printf("\n");
}
}
static HReg newVRegI ( ISelEnv* env )
{
HReg reg = mkHReg(env->vreg_ctr, HRcInt32, True/*virtual reg*/);
env->vreg_ctr++;
return reg;
}
static HReg newVRegF ( ISelEnv* env )
{
HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/);
env->vreg_ctr++;
return reg;
}
static HReg newVRegV ( ISelEnv* env )
{
HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
env->vreg_ctr++;
return reg;
}
/*---------------------------------------------------------*/
/*--- ISEL: Forward declarations ---*/
/*---------------------------------------------------------*/
/* These are organised as iselXXX and iselXXX_wrk pairs. The
iselXXX_wrk do the real work, but are not to be called directly.
For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
checks that all returned registers are virtual. You should not
call the _wrk version directly.
*/
static X86RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e );
static X86RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e );
static X86RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e );
static X86RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e );
static X86RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e );
static X86RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e );
static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e );
static X86AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e );
static X86AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e );
static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
ISelEnv* env, IRExpr* e );
static void iselInt64Expr ( HReg* rHi, HReg* rLo,
ISelEnv* env, IRExpr* e );
static X86CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
static X86CondCode iselCondCode ( ISelEnv* env, IRExpr* e );
static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
static HReg iselDblExpr ( ISelEnv* env, IRExpr* e );
static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e );
static HReg iselVecExpr ( ISelEnv* env, IRExpr* e );
/*---------------------------------------------------------*/
/*--- ISEL: Misc helpers ---*/
/*---------------------------------------------------------*/
/* Make a int reg-reg move. */
static X86Instr* mk_iMOVsd_RR ( HReg src, HReg dst )
{
vassert(hregClass(src) == HRcInt32);
vassert(hregClass(dst) == HRcInt32);
return X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(src), dst);
}
/* Make a vector reg-reg move. */
static X86Instr* mk_vMOVsd_RR ( HReg src, HReg dst )
{
vassert(hregClass(src) == HRcVec128);
vassert(hregClass(dst) == HRcVec128);
return X86Instr_SseReRg(Xsse_MOV, src, dst);
}
/* Advance/retreat %esp by n. */
static void add_to_esp ( ISelEnv* env, Int n )
{
vassert(n > 0 && n < 256 && (n%4) == 0);
addInstr(env,
X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(n), hregX86_ESP()));
}
static void sub_from_esp ( ISelEnv* env, Int n )
{
vassert(n > 0 && n < 256 && (n%4) == 0);
addInstr(env,
X86Instr_Alu32R(Xalu_SUB, X86RMI_Imm(n), hregX86_ESP()));
}
/* Given an amode, return one which references 4 bytes further
along. */
static X86AMode* advance4 ( X86AMode* am )
{
X86AMode* am4 = dopyX86AMode(am);
switch (am4->tag) {
case Xam_IRRS:
am4->Xam.IRRS.imm += 4; break;
case Xam_IR:
am4->Xam.IR.imm += 4; break;
default:
vpanic("advance4(x86,host)");
}
return am4;
}
/* Push an arg onto the host stack, in preparation for a call to a
helper function of some kind. Returns the number of 32-bit words
pushed. If we encounter an IRExpr_VECRET() then we expect that
r_vecRetAddr will be a valid register, that holds the relevant
address.
*/
static Int pushArg ( ISelEnv* env, IRExpr* arg, HReg r_vecRetAddr )
{
if (UNLIKELY(arg->tag == Iex_VECRET)) {
vassert(0); //ATC
vassert(!hregIsInvalid(r_vecRetAddr));
addInstr(env, X86Instr_Push(X86RMI_Reg(r_vecRetAddr)));
return 1;
}
if (UNLIKELY(arg->tag == Iex_BBPTR)) {
addInstr(env, X86Instr_Push(X86RMI_Reg(hregX86_EBP())));
return 1;
}
/* Else it's a "normal" expression. */
IRType arg_ty = typeOfIRExpr(env->type_env, arg);
if (arg_ty == Ity_I32) {
addInstr(env, X86Instr_Push(iselIntExpr_RMI(env, arg)));
return 1;
} else
if (arg_ty == Ity_I64) {
HReg rHi, rLo;
iselInt64Expr(&rHi, &rLo, env, arg);
addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
return 2;
}
ppIRExpr(arg);
vpanic("pushArg(x86): can't handle arg of this type");
}
/* Complete the call to a helper function, by calling the
helper and clearing the args off the stack. */
static
void callHelperAndClearArgs ( ISelEnv* env, X86CondCode cc,
IRCallee* cee, Int n_arg_ws,
RetLoc rloc )
{
/* Complication. Need to decide which reg to use as the fn address
pointer, in a way that doesn't trash regparm-passed
parameters. */
vassert(sizeof(void*) == 4);
addInstr(env, X86Instr_Call( cc, toUInt(Ptr_to_ULong(cee->addr)),
cee->regparms, rloc));
if (n_arg_ws > 0)
add_to_esp(env, 4*n_arg_ws);
}
/* Used only in doHelperCall. See big comment in doHelperCall re
handling of regparm args. This function figures out whether
evaluation of an expression might require use of a fixed register.
If in doubt return True (safe but suboptimal).
*/
static
Bool mightRequireFixedRegs ( IRExpr* e )
{
if (UNLIKELY(is_IRExpr_VECRET_or_BBPTR(e))) {
// These are always "safe" -- either a copy of %esp in some
// arbitrary vreg, or a copy of %ebp, respectively.
return False;
}
/* Else it's a "normal" expression. */
switch (e->tag) {
case Iex_RdTmp: case Iex_Const: case Iex_Get:
return False;
default:
return True;
}
}
/* Do a complete function call. |guard| is a Ity_Bit expression
indicating whether or not the call happens. If guard==NULL, the
call is unconditional. |retloc| is set to indicate where the
return value is after the call. The caller (of this fn) must
generate code to add |stackAdjustAfterCall| to the stack pointer
after the call is done. */
static
void doHelperCall ( /*OUT*/UInt* stackAdjustAfterCall,
/*OUT*/RetLoc* retloc,
ISelEnv* env,
IRExpr* guard,
IRCallee* cee, IRType retTy, IRExpr** args )
{
X86CondCode cc;
HReg argregs[3];
HReg tmpregs[3];
Bool danger;
Int not_done_yet, n_args, n_arg_ws, stack_limit,
i, argreg, argregX;
/* Set default returns. We'll update them later if needed. */
*stackAdjustAfterCall = 0;
*retloc = mk_RetLoc_INVALID();
/* These are used for cross-checking that IR-level constraints on
the use of Iex_VECRET and Iex_BBPTR are observed. */
UInt nVECRETs = 0;
UInt nBBPTRs = 0;
/* Marshal args for a call, do the call, and clear the stack.
Complexities to consider:
* The return type can be I{64,32,16,8} or V128. In the V128
case, it is expected that |args| will contain the special
node IRExpr_VECRET(), in which case this routine generates
code to allocate space on the stack for the vector return
value. Since we are not passing any scalars on the stack, it
is enough to preallocate the return space before marshalling
any arguments, in this case.
|args| may also contain IRExpr_BBPTR(), in which case the
value in %ebp is passed as the corresponding argument.
* If the callee claims regparmness of 1, 2 or 3, we must pass the
first 1, 2 or 3 args in registers (EAX, EDX, and ECX
respectively). To keep things relatively simple, only args of
type I32 may be passed as regparms -- just bomb out if anything
else turns up. Clearly this depends on the front ends not
trying to pass any other types as regparms.
*/
/* 16 Nov 2004: the regparm handling is complicated by the
following problem.
Consider a call two a function with two regparm parameters:
f(e1,e2). We need to compute e1 into %eax and e2 into %edx.
Suppose code is first generated to compute e1 into %eax. Then,
code is generated to compute e2 into %edx. Unfortunately, if
the latter code sequence uses %eax, it will trash the value of
e1 computed by the former sequence. This could happen if (for
example) e2 itself involved a function call. In the code below,
args are evaluated right-to-left, not left-to-right, but the
principle and the problem are the same.
One solution is to compute all regparm-bound args into vregs
first, and once they are all done, move them to the relevant
real regs. This always gives correct code, but it also gives
a bunch of vreg-to-rreg moves which are usually redundant but
are hard for the register allocator to get rid of.
A compromise is to first examine all regparm'd argument
expressions. If they are all so simple that it is clear
they will be evaluated without use of any fixed registers,
use the old compute-directly-to-fixed-target scheme. If not,
be safe and use the via-vregs scheme.
Note this requires being able to examine an expression and
determine whether or not evaluation of it might use a fixed
register. That requires knowledge of how the rest of this
insn selector works. Currently just the following 3 are
regarded as safe -- hopefully they cover the majority of
arguments in practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
*/
vassert(cee->regparms >= 0 && cee->regparms <= 3);
/* Count the number of args and also the VECRETs */
n_args = n_arg_ws = 0;
while (args[n_args]) {
IRExpr* arg = args[n_args];
n_args++;
if (UNLIKELY(arg->tag == Iex_VECRET)) {
nVECRETs++;
} else if (UNLIKELY(arg->tag == Iex_BBPTR)) {
nBBPTRs++;
}
}
/* If this fails, the IR is ill-formed */
vassert(nBBPTRs == 0 || nBBPTRs == 1);
/* If we have a VECRET, allocate space on the stack for the return
value, and record the stack pointer after that. */
HReg r_vecRetAddr = INVALID_HREG;
if (nVECRETs == 1) {
vassert(retTy == Ity_V128 || retTy == Ity_V256);
vassert(retTy != Ity_V256); // we don't handle that yet (if ever)
r_vecRetAddr = newVRegI(env);
sub_from_esp(env, 16);
addInstr(env, mk_iMOVsd_RR( hregX86_ESP(), r_vecRetAddr ));
} else {
// If either of these fail, the IR is ill-formed
vassert(retTy != Ity_V128 && retTy != Ity_V256);
vassert(nVECRETs == 0);
}
not_done_yet = n_args;
stack_limit = cee->regparms;
/* ------ BEGIN marshall all arguments ------ */
/* Push (R to L) the stack-passed args, [n_args-1 .. stack_limit] */
for (i = n_args-1; i >= stack_limit; i--) {
n_arg_ws += pushArg(env, args[i], r_vecRetAddr);
not_done_yet--;
}
/* args [stack_limit-1 .. 0] and possibly %ebp are to be passed in
registers. */
if (cee->regparms > 0) {
/* ------ BEGIN deal with regparms ------ */
/* deal with regparms, not forgetting %ebp if needed. */
argregs[0] = hregX86_EAX();
argregs[1] = hregX86_EDX();
argregs[2] = hregX86_ECX();
tmpregs[0] = tmpregs[1] = tmpregs[2] = INVALID_HREG;
argreg = cee->regparms;
/* In keeping with big comment above, detect potential danger
and use the via-vregs scheme if needed. */
danger = False;
for (i = stack_limit-1; i >= 0; i--) {
if (mightRequireFixedRegs(args[i])) {
danger = True;
break;
}
}
if (danger) {
/* Move via temporaries */
argregX = argreg;
for (i = stack_limit-1; i >= 0; i--) {
if (0) {
vex_printf("x86 host: register param is complex: ");
ppIRExpr(args[i]);
vex_printf("\n");
}
IRExpr* arg = args[i];
argreg--;
vassert(argreg >= 0);
if (UNLIKELY(arg->tag == Iex_VECRET)) {
vassert(0); //ATC
}
else if (UNLIKELY(arg->tag == Iex_BBPTR)) {
vassert(0); //ATC
} else {
vassert(typeOfIRExpr(env->type_env, arg) == Ity_I32);
tmpregs[argreg] = iselIntExpr_R(env, arg);
}
not_done_yet--;
}
for (i = stack_limit-1; i >= 0; i--) {
argregX--;
vassert(argregX >= 0);
addInstr( env, mk_iMOVsd_RR( tmpregs[argregX], argregs[argregX] ) );
}
} else {
/* It's safe to compute all regparm args directly into their
target registers. */
for (i = stack_limit-1; i >= 0; i--) {
IRExpr* arg = args[i];
argreg--;
vassert(argreg >= 0);
if (UNLIKELY(arg->tag == Iex_VECRET)) {
vassert(!hregIsInvalid(r_vecRetAddr));
addInstr(env, X86Instr_Alu32R(Xalu_MOV,
X86RMI_Reg(r_vecRetAddr),
argregs[argreg]));
}
else if (UNLIKELY(arg->tag == Iex_BBPTR)) {
vassert(0); //ATC
} else {
vassert(typeOfIRExpr(env->type_env, arg) == Ity_I32);
addInstr(env, X86Instr_Alu32R(Xalu_MOV,
iselIntExpr_RMI(env, arg),
argregs[argreg]));
}
not_done_yet--;
}
}
/* ------ END deal with regparms ------ */
}
vassert(not_done_yet == 0);
/* ------ END marshall all arguments ------ */
/* Now we can compute the condition. We can't do it earlier
because the argument computations could trash the condition
codes. Be a bit clever to handle the common case where the
guard is 1:Bit. */
cc = Xcc_ALWAYS;
if (guard) {
if (guard->tag == Iex_Const
&& guard->Iex.Const.con->tag == Ico_U1
&& guard->Iex.Const.con->Ico.U1 == True) {
/* unconditional -- do nothing */
} else {
cc = iselCondCode( env, guard );
}
}
/* Do final checks, set the return values, and generate the call
instruction proper. */
vassert(*stackAdjustAfterCall == 0);
vassert(is_RetLoc_INVALID(*retloc));
switch (retTy) {
case Ity_INVALID:
/* Function doesn't return a value. */
*retloc = mk_RetLoc_simple(RLPri_None);
break;
case Ity_I64:
*retloc = mk_RetLoc_simple(RLPri_2Int);
break;
case Ity_I32: case Ity_I16: case Ity_I8:
*retloc = mk_RetLoc_simple(RLPri_Int);
break;
case Ity_V128:
*retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
*stackAdjustAfterCall = 16;
break;
case Ity_V256:
vassert(0); // ATC
*retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
*stackAdjustAfterCall = 32;
break;
default:
/* IR can denote other possible return types, but we don't
handle those here. */
vassert(0);
}
/* Finally, generate the call itself. This needs the *retloc value
set in the switch above, which is why it's at the end. */
callHelperAndClearArgs( env, cc, cee, n_arg_ws, *retloc );
}
/* Given a guest-state array descriptor, an index expression and a
bias, generate an X86AMode holding the relevant guest state
offset. */
static
X86AMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr,
IRExpr* off, Int bias )
{
HReg tmp, roff;
Int elemSz = sizeofIRType(descr->elemTy);
Int nElems = descr->nElems;
Int shift = 0;
/* throw out any cases not generated by an x86 front end. In
theory there might be a day where we need to handle them -- if
we ever run non-x86-guest on x86 host. */
if (nElems != 8)
vpanic("genGuestArrayOffset(x86 host)(1)");
switch (elemSz) {
case 1: shift = 0; break;
case 4: shift = 2; break;
case 8: shift = 3; break;
default: vpanic("genGuestArrayOffset(x86 host)(2)");
}
/* Compute off into a reg, %off. Then return:
movl %off, %tmp
addl $bias, %tmp (if bias != 0)
andl %tmp, 7
... base(%ebp, %tmp, shift) ...
*/
tmp = newVRegI(env);
roff = iselIntExpr_R(env, off);
addInstr(env, mk_iMOVsd_RR(roff, tmp));
if (bias != 0) {
addInstr(env,
X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(bias), tmp));
}
addInstr(env,
X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(7), tmp));
return
X86AMode_IRRS( descr->base, hregX86_EBP(), tmp, shift );
}
/* Mess with the FPU's rounding mode: set to the default rounding mode
(DEFAULT_FPUCW). */
static
void set_FPU_rounding_default ( ISelEnv* env )
{
/* pushl $DEFAULT_FPUCW
fldcw 0(%esp)
addl $4, %esp
*/
X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
addInstr(env, X86Instr_Push(X86RMI_Imm(DEFAULT_FPUCW)));
addInstr(env, X86Instr_FpLdCW(zero_esp));
add_to_esp(env, 4);
}
/* Mess with the FPU's rounding mode: 'mode' is an I32-typed
expression denoting a value in the range 0 .. 3, indicating a round
mode encoded as per type IRRoundingMode. Set the x87 FPU to have
the same rounding.
*/
static
void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode )
{
HReg rrm = iselIntExpr_R(env, mode);
HReg rrm2 = newVRegI(env);
X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
/* movl %rrm, %rrm2
andl $3, %rrm2 -- shouldn't be needed; paranoia
shll $10, %rrm2
orl $DEFAULT_FPUCW, %rrm2
pushl %rrm2
fldcw 0(%esp)
addl $4, %esp
*/
addInstr(env, mk_iMOVsd_RR(rrm, rrm2));
addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(3), rrm2));
addInstr(env, X86Instr_Sh32(Xsh_SHL, 10, rrm2));
addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Imm(DEFAULT_FPUCW), rrm2));
addInstr(env, X86Instr_Push(X86RMI_Reg(rrm2)));
addInstr(env, X86Instr_FpLdCW(zero_esp));
add_to_esp(env, 4);
}
/* Generate !src into a new vector register, and be sure that the code
is SSE1 compatible. Amazing that Intel doesn't offer a less crappy
way to do this.
*/
static HReg do_sse_Not128 ( ISelEnv* env, HReg src )
{
HReg dst = newVRegV(env);
/* Set dst to zero. If dst contains a NaN then all hell might
break loose after the comparison. So, first zero it. */
addInstr(env, X86Instr_SseReRg(Xsse_XOR, dst, dst));
/* And now make it all 1s ... */
addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, dst, dst));
/* Finally, xor 'src' into it. */
addInstr(env, X86Instr_SseReRg(Xsse_XOR, src, dst));
/* Doesn't that just totally suck? */
return dst;
}
/* Round an x87 FPU value to 53-bit-mantissa precision, to be used
after most non-simple FPU operations (simple = +, -, *, / and
sqrt).
This could be done a lot more efficiently if needed, by loading
zero and adding it to the value to be rounded (fldz ; faddp?).
*/
static void roundToF64 ( ISelEnv* env, HReg reg )
{
X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
sub_from_esp(env, 8);
addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, reg, zero_esp));
addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, reg, zero_esp));
add_to_esp(env, 8);
}
/*---------------------------------------------------------*/
/*--- ISEL: Integer expressions (32/16/8 bit) ---*/
/*---------------------------------------------------------*/
/* Select insns for an integer-typed expression, and add them to the
code list. Return a reg holding the result. This reg will be a
virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
want to modify it, ask for a new vreg, copy it in there, and modify
the copy. The register allocator will do its best to map both
vregs to the same real register, so the copies will often disappear
later in the game.
This should handle expressions of 32, 16 and 8-bit type. All
results are returned in a 32-bit register. For 16- and 8-bit
expressions, the upper 16/24 bits are arbitrary, so you should mask
or sign extend partial values if necessary.
*/
static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
{
HReg r = iselIntExpr_R_wrk(env, e);
/* sanity checks ... */
# if 0
vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
# endif
vassert(hregClass(r) == HRcInt32);
vassert(hregIsVirtual(r));
return r;
}
/* DO NOT CALL THIS DIRECTLY ! */
static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
{
MatchInfo mi;
IRType ty = typeOfIRExpr(env->type_env,e);
vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
switch (e->tag) {
/* --------- TEMP --------- */
case Iex_RdTmp: {
return lookupIRTemp(env, e->Iex.RdTmp.tmp);
}
/* --------- LOAD --------- */
case Iex_Load: {
HReg dst = newVRegI(env);
X86AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr );
/* We can't handle big-endian loads, nor load-linked. */
if (e->Iex.Load.end != Iend_LE)
goto irreducible;
if (ty == Ity_I32) {
addInstr(env, X86Instr_Alu32R(Xalu_MOV,
X86RMI_Mem(amode), dst) );
return dst;
}
if (ty == Ity_I16) {
addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
return dst;
}
if (ty == Ity_I8) {
addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
return dst;
}
break;
}
/* --------- TERNARY OP --------- */
case Iex_Triop: {
IRTriop *triop = e->Iex.Triop.details;
/* C3210 flags following FPU partial remainder (fprem), both
IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
if (triop->op == Iop_PRemC3210F64
|| triop->op == Iop_PRem1C3210F64) {
HReg junk = newVRegF(env);
HReg dst = newVRegI(env);
HReg srcL = iselDblExpr(env, triop->arg2);
HReg srcR = iselDblExpr(env, triop->arg3);
/* XXXROUNDINGFIXME */
/* set roundingmode here */
addInstr(env, X86Instr_FpBinary(
e->Iex.Binop.op==Iop_PRemC3210F64
? Xfp_PREM : Xfp_PREM1,
srcL,srcR,junk
));
/* The previous pseudo-insn will have left the FPU's C3210
flags set correctly. So bag them. */
addInstr(env, X86Instr_FpStSW_AX());
addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
return dst;
}
break;
}
/* --------- BINARY OP --------- */
case Iex_Binop: {
X86AluOp aluOp;
X86ShiftOp shOp;
/* Pattern: Sub32(0,x) */
if (e->Iex.Binop.op == Iop_Sub32 && isZeroU32(e->Iex.Binop.arg1)) {
HReg dst = newVRegI(env);
HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg2);
addInstr(env, mk_iMOVsd_RR(reg,dst));
addInstr(env, X86Instr_Unary32(Xun_NEG,dst));
return dst;
}
/* Is it an addition or logical style op? */
switch (e->Iex.Binop.op) {
case Iop_Add8: case Iop_Add16: case Iop_Add32:
aluOp = Xalu_ADD; break;
case Iop_Sub8: case Iop_Sub16: case Iop_Sub32:
aluOp = Xalu_SUB; break;
case Iop_And8: case Iop_And16: case Iop_And32:
aluOp = Xalu_AND; break;
case Iop_Or8: case Iop_Or16: case Iop_Or32:
aluOp = Xalu_OR; break;
case Iop_Xor8: case Iop_Xor16: case Iop_Xor32:
aluOp = Xalu_XOR; break;
case Iop_Mul16: case Iop_Mul32:
aluOp = Xalu_MUL; break;
default:
aluOp = Xalu_INVALID; break;
}
/* For commutative ops we assume any literal
values are on the second operand. */
if (aluOp != Xalu_INVALID) {
HReg dst = newVRegI(env);
HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
addInstr(env, mk_iMOVsd_RR(reg,dst));
addInstr(env, X86Instr_Alu32R(aluOp, rmi, dst));
return dst;
}
/* Could do better here; forcing the first arg into a reg
isn't always clever.
-- t70 = Xor32(And32(Xor32(LDle:I32(Add32(t41,0xFFFFFFA0:I32)),
LDle:I32(Add32(t41,0xFFFFFFA4:I32))),LDle:I32(Add32(
t41,0xFFFFFFA8:I32))),LDle:I32(Add32(t41,0xFFFFFFA0:I32)))
movl 0xFFFFFFA0(%vr41),%vr107
movl 0xFFFFFFA4(%vr41),%vr108
movl %vr107,%vr106
xorl %vr108,%vr106
movl 0xFFFFFFA8(%vr41),%vr109
movl %vr106,%vr105
andl %vr109,%vr105
movl 0xFFFFFFA0(%vr41),%vr110
movl %vr105,%vr104
xorl %vr110,%vr104
movl %vr104,%vr70
*/
/* Perhaps a shift op? */
switch (e->Iex.Binop.op) {
case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
shOp = Xsh_SHL; break;
case Iop_Shr32: case Iop_Shr16: case Iop_Shr8:
shOp = Xsh_SHR; break;
case Iop_Sar32: case Iop_Sar16: case Iop_Sar8:
shOp = Xsh_SAR; break;
default:
shOp = Xsh_INVALID; break;
}
if (shOp != Xsh_INVALID) {
HReg dst = newVRegI(env);
/* regL = the value to be shifted */
HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
addInstr(env, mk_iMOVsd_RR(regL,dst));
/* Do any necessary widening for 16/8 bit operands */
switch (e->Iex.Binop.op) {
case Iop_Shr8:
addInstr(env, X86Instr_Alu32R(
Xalu_AND, X86RMI_Imm(0xFF), dst));
break;
case Iop_Shr16:
addInstr(env, X86Instr_Alu32R(
Xalu_AND, X86RMI_Imm(0xFFFF), dst));
break;
case Iop_Sar8:
addInstr(env, X86Instr_Sh32(Xsh_SHL, 24, dst));
addInstr(env, X86Instr_Sh32(Xsh_SAR, 24, dst));
break;
case Iop_Sar16:
addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, dst));
addInstr(env, X86Instr_Sh32(Xsh_SAR, 16, dst));
break;
default: break;
}
/* Now consider the shift amount. If it's a literal, we
can do a much better job than the general case. */
if (e->Iex.Binop.arg2->tag == Iex_Const) {
/* assert that the IR is well-typed */
Int nshift;
vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
vassert(nshift >= 0);
if (nshift > 0)
/* Can't allow nshift==0 since that means %cl */
addInstr(env, X86Instr_Sh32( shOp, nshift, dst ));
} else {
/* General case; we have to force the amount into %cl. */
HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
addInstr(env, mk_iMOVsd_RR(regR,hregX86_ECX()));
addInstr(env, X86Instr_Sh32(shOp, 0/* %cl */, dst));
}
return dst;
}
/* Handle misc other ops. */
if (e->Iex.Binop.op == Iop_Max32U) {
HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
HReg dst = newVRegI(env);
HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
addInstr(env, mk_iMOVsd_RR(src1,dst));
addInstr(env, X86Instr_Alu32R(Xalu_CMP, X86RMI_Reg(src2), dst));
addInstr(env, X86Instr_CMov32(Xcc_B, X86RM_Reg(src2), dst));
return dst;
}
if (e->Iex.Binop.op == Iop_8HLto16) {
HReg hi8 = newVRegI(env);
HReg lo8 = newVRegI(env);
HReg hi8s = iselIntExpr_R(env, e->Iex.Binop.arg1);
HReg lo8s = iselIntExpr_R(env, e->Iex.Binop.arg2);
addInstr(env, mk_iMOVsd_RR(hi8s, hi8));
addInstr(env, mk_iMOVsd_RR(lo8s, lo8));
addInstr(env, X86Instr_Sh32(Xsh_SHL, 8, hi8));
addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFF), lo8));
addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo8), hi8));
return hi8;
}
if (e->Iex.Binop.op == Iop_16HLto32) {
HReg hi16 = newVRegI(env);
HReg lo16 = newVRegI(env);
HReg hi16s = iselIntExpr_R(env, e->Iex.Binop.arg1);
HReg lo16s = iselIntExpr_R(env, e->Iex.Binop.arg2);
addInstr(env, mk_iMOVsd_RR(hi16s, hi16));
addInstr(env, mk_iMOVsd_RR(lo16s, lo16));
addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, hi16));
addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFFFF), lo16));
addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo16), hi16));
return hi16;
}
if (e->Iex.Binop.op == Iop_MullS16 || e->Iex.Binop.op == Iop_MullS8
|| e->Iex.Binop.op == Iop_MullU16 || e->Iex.Binop.op == Iop_MullU8) {
HReg a16 = newVRegI(env);
HReg b16 = newVRegI(env);
HReg a16s = iselIntExpr_R(env, e->Iex.Binop.arg1);
HReg b16s = iselIntExpr_R(env, e->Iex.Binop.arg2);
Int shift = (e->Iex.Binop.op == Iop_MullS8
|| e->Iex.Binop.op == Iop_MullU8)
? 24 : 16;
X86ShiftOp shr_op = (e->Iex.Binop.op == Iop_MullS8
|| e->Iex.Binop.op == Iop_MullS16)
? Xsh_SAR : Xsh_SHR;
addInstr(env, mk_iMOVsd_RR(a16s, a16));
addInstr(env, mk_iMOVsd_RR(b16s, b16));
addInstr(env, X86Instr_Sh32(Xsh_SHL, shift, a16));
addInstr(env, X86Instr_Sh32(Xsh_SHL, shift, b16));
addInstr(env, X86Instr_Sh32(shr_op, shift, a16));
addInstr(env, X86Instr_Sh32(shr_op, shift, b16));
addInstr(env, X86Instr_Alu32R(Xalu_MUL, X86RMI_Reg(a16), b16));
return b16;
}
if (e->Iex.Binop.op == Iop_CmpF64) {
HReg fL = iselDblExpr(env, e->Iex.Binop.arg1);
HReg fR = iselDblExpr(env, e->Iex.Binop.arg2);
HReg dst = newVRegI(env);
addInstr(env, X86Instr_FpCmp(fL,fR,dst));
/* shift this right 8 bits so as to conform to CmpF64
definition. */
addInstr(env, X86Instr_Sh32(Xsh_SHR, 8, dst));
return dst;
}
if (e->Iex.Binop.op == Iop_F64toI32S
|| e->Iex.Binop.op == Iop_F64toI16S) {
Int sz = e->Iex.Binop.op == Iop_F64toI16S ? 2 : 4;
HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
HReg dst = newVRegI(env);
/* Used several times ... */
X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
/* rf now holds the value to be converted, and rrm holds the
rounding mode value, encoded as per the IRRoundingMode
enum. The first thing to do is set the FPU's rounding
mode accordingly. */
/* Create a space for the format conversion. */
/* subl $4, %esp */
sub_from_esp(env, 4);
/* Set host rounding mode */
set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
/* gistw/l %rf, 0(%esp) */
addInstr(env, X86Instr_FpLdStI(False/*store*/,
toUChar(sz), rf, zero_esp));
if (sz == 2) {
/* movzwl 0(%esp), %dst */
addInstr(env, X86Instr_LoadEX(2,False,zero_esp,dst));
} else {
/* movl 0(%esp), %dst */
vassert(sz == 4);
addInstr(env, X86Instr_Alu32R(
Xalu_MOV, X86RMI_Mem(zero_esp), dst));
}
/* Restore default FPU rounding. */
set_FPU_rounding_default( env );
/* addl $4, %esp */
add_to_esp(env, 4);
return dst;
}
break;
}
/* --------- UNARY OP --------- */
case Iex_Unop: {
/* 1Uto8(32to1(expr32)) */
if (e->Iex.Unop.op == Iop_1Uto8) {
DECLARE_PATTERN(p_32to1_then_1Uto8);
DEFINE_PATTERN(p_32to1_then_1Uto8,
unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
IRExpr* expr32 = mi.bindee[0];
HReg dst = newVRegI(env);
HReg src = iselIntExpr_R(env, expr32);
addInstr(env, mk_iMOVsd_RR(src,dst) );
addInstr(env, X86Instr_Alu32R(Xalu_AND,
X86RMI_Imm(1), dst));
return dst;
}
}
/* 8Uto32(LDle(expr32)) */
if (e->Iex.Unop.op == Iop_8Uto32) {
DECLARE_PATTERN(p_LDle8_then_8Uto32);
DEFINE_PATTERN(p_LDle8_then_8Uto32,
unop(Iop_8Uto32,
IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) {
HReg dst = newVRegI(env);
X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
return dst;
}
}
/* 8Sto32(LDle(expr32)) */
if (e->Iex.Unop.op == Iop_8Sto32) {
DECLARE_PATTERN(p_LDle8_then_8Sto32);
DEFINE_PATTERN(p_LDle8_then_8Sto32,
unop(Iop_8Sto32,
IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) {
HReg dst = newVRegI(env);
X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
addInstr(env, X86Instr_LoadEX(1,True,amode,dst));
return dst;
}
}
/* 16Uto32(LDle(expr32)) */
if (e->Iex.Unop.op == Iop_16Uto32) {
DECLARE_PATTERN(p_LDle16_then_16Uto32);
DEFINE_PATTERN(p_LDle16_then_16Uto32,
unop(Iop_16Uto32,
IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
HReg dst = newVRegI(env);
X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
return dst;
}
}
/* 8Uto32(GET:I8) */
if (e->Iex.Unop.op == Iop_8Uto32) {
if (e->Iex.Unop.arg->tag == Iex_Get) {
HReg dst;
X86AMode* amode;
vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8);
dst = newVRegI(env);
amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
hregX86_EBP());
addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
return dst;
}
}
/* 16to32(GET:I16) */
if (e->Iex.Unop.op == Iop_16Uto32) {
if (e->Iex.Unop.arg->tag == Iex_Get) {
HReg dst;
X86AMode* amode;
vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16);
dst = newVRegI(env);
amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
hregX86_EBP());
addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
return dst;
}
}
switch (e->Iex.Unop.op) {
case Iop_8Uto16:
case Iop_8Uto32:
case Iop_16Uto32: {
HReg dst = newVRegI(env);
HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF;
addInstr(env, mk_iMOVsd_RR(src,dst) );
addInstr(env, X86Instr_Alu32R(Xalu_AND,
X86RMI_Imm(mask), dst));
return dst;
}
case Iop_8Sto16:
case Iop_8Sto32:
case Iop_16Sto32: {
HReg dst = newVRegI(env);
HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
UInt amt = e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24;
addInstr(env, mk_iMOVsd_RR(src,dst) );
addInstr(env, X86Instr_Sh32(Xsh_SHL, amt, dst));
addInstr(env, X86Instr_Sh32(Xsh_SAR, amt, dst));
return dst;
}
case Iop_Not8:
case Iop_Not16:
case Iop_Not32: {
HReg dst = newVRegI(env);
HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
addInstr(env, mk_iMOVsd_RR(src,dst) );
addInstr(env, X86Instr_Unary32(Xun_NOT,dst));
return dst;
}
case Iop_64HIto32: {
HReg rHi, rLo;
iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
return rHi; /* and abandon rLo .. poor wee thing :-) */
}
case Iop_64to32: {
HReg rHi, rLo;
iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
return rLo; /* similar stupid comment to the above ... */
}
case Iop_16HIto8:
case Iop_32HIto16: {
HReg dst = newVRegI(env);
HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
Int shift = e->Iex.Unop.op == Iop_16HIto8 ? 8 : 16;
addInstr(env, mk_iMOVsd_RR(src,dst) );
addInstr(env, X86Instr_Sh32(Xsh_SHR, shift, dst));
return dst;
}
case Iop_1Uto32:
case Iop_1Uto8: {
HReg dst = newVRegI(env);
X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
addInstr(env, X86Instr_Set32(cond,dst));
return dst;
}
case Iop_1Sto8:
case Iop_1Sto16:
case Iop_1Sto32: {
/* could do better than this, but for now ... */
HReg dst = newVRegI(env);
X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
addInstr(env, X86Instr_Set32(cond,dst));
addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst));
addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
return dst;
}
case Iop_Ctz32: {
/* Count trailing zeroes, implemented by x86 'bsfl' */
HReg dst = newVRegI(env);
HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
addInstr(env, X86Instr_Bsfr32(True,src,dst));
return dst;
}
case Iop_Clz32: {
/* Count leading zeroes. Do 'bsrl' to establish the index
of the highest set bit, and subtract that value from
31. */
HReg tmp = newVRegI(env);
HReg dst = newVRegI(env);
HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
addInstr(env, X86Instr_Bsfr32(False,src,tmp));
addInstr(env, X86Instr_Alu32R(Xalu_MOV,
X86RMI_Imm(31), dst));
addInstr(env, X86Instr_Alu32R(Xalu_SUB,
X86RMI_Reg(tmp), dst));
return dst;
}
case Iop_CmpwNEZ32: {
HReg dst = newVRegI(env);
HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
addInstr(env, mk_iMOVsd_RR(src,dst));
addInstr(env, X86Instr_Unary32(Xun_NEG,dst));
addInstr(env, X86Instr_Alu32R(Xalu_OR,
X86RMI_Reg(src), dst));
addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
return dst;
}
case Iop_Left8:
case Iop_Left16:
case Iop_Left32: {
HReg dst = newVRegI(env);
HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
addInstr(env, mk_iMOVsd_RR(src, dst));
addInstr(env, X86Instr_Unary32(Xun_NEG, dst));
addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(src), dst));
return dst;
}
case Iop_V128to32: {
HReg dst = newVRegI(env);
HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
sub_from_esp(env, 16);
addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst ));
add_to_esp(env, 16);
return dst;
}
/* ReinterpF32asI32(e) */
/* Given an IEEE754 single, produce an I32 with the same bit
pattern. Keep stack 8-aligned even though only using 4
bytes. */
case Iop_ReinterpF32asI32: {
HReg rf = iselFltExpr(env, e->Iex.Unop.arg);
HReg dst = newVRegI(env);
X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
/* paranoia */
set_FPU_rounding_default(env);
/* subl $8, %esp */
sub_from_esp(env, 8);
/* gstF %rf, 0(%esp) */
addInstr(env,
X86Instr_FpLdSt(False/*store*/, 4, rf, zero_esp));
/* movl 0(%esp), %dst */
addInstr(env,
X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(zero_esp), dst));
/* addl $8, %esp */
add_to_esp(env, 8);
return dst;
}
case Iop_16to8:
case Iop_32to8:
case Iop_32to16:
/* These are no-ops. */
return iselIntExpr_R(env, e->Iex.Unop.arg);
case Iop_GetMSBs8x8: {
/* Note: the following assumes the helper is of
signature
UInt fn ( ULong ), and is not a regparm fn.
*/
HReg xLo, xHi;
HReg dst = newVRegI(env);
HWord fn = (HWord)h_generic_calc_GetMSBs8x8;
iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg);
addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn,
0, mk_RetLoc_simple(RLPri_Int) ));
add_to_esp(env, 2*4);
addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
return dst;
}
default:
break;
}
break;
}
/* --------- GET --------- */
case Iex_Get: {
if (ty == Ity_I32) {
HReg dst = newVRegI(env);
addInstr(env, X86Instr_Alu32R(
Xalu_MOV,
X86RMI_Mem(X86AMode_IR(e->Iex.Get.offset,
hregX86_EBP())),
dst));
return dst;
}
if (ty == Ity_I8 || ty == Ity_I16) {
HReg dst = newVRegI(env);
addInstr(env, X86Instr_LoadEX(
toUChar(ty==Ity_I8 ? 1 : 2),
False,
X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()),
dst));
return dst;
}
break;
}
case Iex_GetI: {
X86AMode* am
= genGuestArrayOffset(
env, e->Iex.GetI.descr,
e->Iex.GetI.ix, e->Iex.GetI.bias );
HReg dst = newVRegI(env);
if (ty == Ity_I8) {
addInstr(env, X86Instr_LoadEX( 1, False, am, dst ));
return dst;
}
if (ty == Ity_I32) {
addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst));
return dst;
}
break;
}
/* --------- CCALL --------- */
case Iex_CCall: {
HReg dst = newVRegI(env);
vassert(ty == e->Iex.CCall.retty);
/* be very restrictive for now. Only 32/64-bit ints allowed for
args, and 32 bits for return type. Don't forget to change
the RetLoc if more return types are allowed in future. */
if (e->Iex.CCall.retty != Ity_I32)
goto irreducible;
/* Marshal args, do the call, clear stack. */
UInt addToSp = 0;
RetLoc rloc = mk_RetLoc_INVALID();
doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
e->Iex.CCall.cee, e->Iex.CCall.retty, e->Iex.CCall.args );
vassert(is_sane_RetLoc(rloc));
vassert(rloc.pri == RLPri_Int);
vassert(addToSp == 0);
addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
return dst;
}
/* --------- LITERAL --------- */
/* 32/16/8-bit literals */
case Iex_Const: {
X86RMI* rmi = iselIntExpr_RMI ( env, e );
HReg r = newVRegI(env);
addInstr(env, X86Instr_Alu32R(Xalu_MOV, rmi, r));
return r;
}
/* --------- MULTIPLEX --------- */
case Iex_ITE: { // VFD
if ((ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8)
&& typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
HReg r1 = iselIntExpr_R(env, e->Iex.ITE.iftrue);
X86RM* r0 = iselIntExpr_RM(env, e->Iex.ITE.iffalse);
HReg dst = newVRegI(env);
addInstr(env, mk_iMOVsd_RR(r1,dst));
X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
addInstr(env, X86Instr_CMov32(cc ^ 1, r0, dst));
return dst;
}
break;
}
default:
break;
} /* switch (e->tag) */
/* We get here if no pattern matched. */
irreducible:
ppIRExpr(e);
vpanic("iselIntExpr_R: cannot reduce tree");
}
/*---------------------------------------------------------*/
/*--- ISEL: Integer expression auxiliaries ---*/
/*---------------------------------------------------------*/
/* --------------------- AMODEs --------------------- */
/* Return an AMode which computes the value of the specified
expression, possibly also adding insns to the code list as a
result. The expression may only be a 32-bit one.
*/
static Bool sane_AMode ( X86AMode* am )
{
switch (am->tag) {
case Xam_IR:
return
toBool( hregClass(am->Xam.IR.reg) == HRcInt32
&& (hregIsVirtual(am->Xam.IR.reg)
|| sameHReg(am->Xam.IR.reg, hregX86_EBP())) );
case Xam_IRRS:
return
toBool( hregClass(am->Xam.IRRS.base) == HRcInt32
&& hregIsVirtual(am->Xam.IRRS.base)
&& hregClass(am->Xam.IRRS.index) == HRcInt32
&& hregIsVirtual(am->Xam.IRRS.index) );
default:
vpanic("sane_AMode: unknown x86 amode tag");
}
}
static X86AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e )
{
X86AMode* am = iselIntExpr_AMode_wrk(env, e);
vassert(sane_AMode(am));
return am;
}
/* DO NOT CALL THIS DIRECTLY ! */
static X86AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e )
{
IRType ty = typeOfIRExpr(env->type_env,e);
vassert(ty == Ity_I32);
/* Add32( Add32(expr1, Shl32(expr2, simm)), imm32 ) */
if (e->tag == Iex_Binop
&& e->Iex.Binop.op == Iop_Add32
&& e->Iex.Binop.arg2->tag == Iex_Const
&& e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32
&& e->Iex.Binop.arg1->tag == Iex_Binop
&& e->Iex.Binop.arg1->Iex.Binop.op == Iop_Add32
&& e->Iex.Binop.arg1->Iex.Binop.arg2->tag == Iex_Binop
&& e->Iex.Binop.arg1->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32
&& e->Iex.Binop.arg1
->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
&& e->Iex.Binop.arg1
->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
UInt shift = e->Iex.Binop.arg1
->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
UInt imm32 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
if (shift == 1 || shift == 2 || shift == 3) {
HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1->Iex.Binop.arg1);
HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg1
->Iex.Binop.arg2->Iex.Binop.arg1 );
return X86AMode_IRRS(imm32, r1, r2, shift);
}
}
/* Add32(expr1, Shl32(expr2, imm)) */
if (e->tag == Iex_Binop
&& e->Iex.Binop.op == Iop_Add32
&& e->Iex.Binop.arg2->tag == Iex_Binop
&& e->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32
&& e->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
&& e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
UInt shift = e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
if (shift == 1 || shift == 2 || shift == 3) {
HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg2->Iex.Binop.arg1 );
return X86AMode_IRRS(0, r1, r2, shift);
}
}
/* Add32(expr,i) */
if (e->tag == Iex_Binop
&& e->Iex.Binop.op == Iop_Add32
&& e->Iex.Binop.arg2->tag == Iex_Const
&& e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
return X86AMode_IR(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32, r1);
}
/* Doesn't match anything in particular. Generate it into
a register and use that. */
{
HReg r1 = iselIntExpr_R(env, e);
return X86AMode_IR(0, r1);
}
}
/* --------------------- RMIs --------------------- */
/* Similarly, calculate an expression into an X86RMI operand. As with
iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
static X86RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e )
{
X86RMI* rmi = iselIntExpr_RMI_wrk(env, e);
/* sanity checks ... */
switch (rmi->tag) {
case Xrmi_Imm:
return rmi;
case Xrmi_Reg:
vassert(hregClass(rmi->Xrmi.Reg.reg) == HRcInt32);
vassert(hregIsVirtual(rmi->Xrmi.Reg.reg));
return rmi;
case Xrmi_Mem:
vassert(sane_AMode(rmi->Xrmi.Mem.am));
return rmi;
default:
vpanic("iselIntExpr_RMI: unknown x86 RMI tag");
}
}
/* DO NOT CALL THIS DIRECTLY ! */
static X86RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e )
{
IRType ty = typeOfIRExpr(env->type_env,e);
vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
/* special case: immediate */
if (e->tag == Iex_Const) {
UInt u;
switch (e->Iex.Const.con->tag) {
case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)");
}
return X86RMI_Imm(u);
}
/* special case: 32-bit GET */
if (e->tag == Iex_Get && ty == Ity_I32) {
return X86RMI_Mem(X86AMode_IR(e->Iex.Get.offset,
hregX86_EBP()));
}
/* special case: 32-bit load from memory */
if (e->tag == Iex_Load && ty == Ity_I32
&& e->Iex.Load.end == Iend_LE) {
X86AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
return X86RMI_Mem(am);
}
/* default case: calculate into a register and return that */
{
HReg r = iselIntExpr_R ( env, e );
return X86RMI_Reg(r);
}
}
/* --------------------- RIs --------------------- */
/* Calculate an expression into an X86RI operand. As with
iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
static X86RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e )
{
X86RI* ri = iselIntExpr_RI_wrk(env, e);
/* sanity checks ... */
switch (ri->tag) {
case Xri_Imm:
return ri;
case Xri_Reg:
vassert(hregClass(ri->Xri.Reg.reg) == HRcInt32);
vassert(hregIsVirtual(ri->Xri.Reg.reg));
return ri;
default:
vpanic("iselIntExpr_RI: unknown x86 RI tag");
}
}
/* DO NOT CALL THIS DIRECTLY ! */
static X86RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e )
{
IRType ty = typeOfIRExpr(env->type_env,e);
vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
/* special case: immediate */
if (e->tag == Iex_Const) {
UInt u;
switch (e->Iex.Const.con->tag) {
case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)");
}
return X86RI_Imm(u);
}
/* default case: calculate into a register and return that */
{
HReg r = iselIntExpr_R ( env, e );
return X86RI_Reg(r);
}
}
/* --------------------- RMs --------------------- */
/* Similarly, calculate an expression into an X86RM operand. As with
iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
static X86RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e )
{
X86RM* rm = iselIntExpr_RM_wrk(env, e);
/* sanity checks ... */
switch (rm->tag) {
case Xrm_Reg:
vassert(hregClass(rm->Xrm.Reg.reg) == HRcInt32);
vassert(hregIsVirtual(rm->Xrm.Reg.reg));
return rm;
case Xrm_Mem:
vassert(sane_AMode(rm->Xrm.Mem.am));
return rm;
default:
vpanic("iselIntExpr_RM: unknown x86 RM tag");
}
}
/* DO NOT CALL THIS DIRECTLY ! */
static X86RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e )
{
IRType ty = typeOfIRExpr(env->type_env,e);
vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
/* special case: 32-bit GET */
if (e->tag == Iex_Get && ty == Ity_I32) {
return X86RM_Mem(X86AMode_IR(e->Iex.Get.offset,
hregX86_EBP()));
}
/* special case: load from memory */
/* default case: calculate into a register and return that */
{
HReg r = iselIntExpr_R ( env, e );
return X86RM_Reg(r);
}
}
/* --------------------- CONDCODE --------------------- */
/* Generate code to evaluated a bit-typed expression, returning the
condition code which would correspond when the expression would
notionally have returned 1. */
static X86CondCode iselCondCode ( ISelEnv* env, IRExpr* e )
{
/* Uh, there's nothing we can sanity check here, unfortunately. */
return iselCondCode_wrk(env,e);
}
/* DO NOT CALL THIS DIRECTLY ! */
static X86CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
{
MatchInfo mi;
vassert(e);
vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
/* var */
if (e->tag == Iex_RdTmp) {
HReg r32 = lookupIRTemp(env, e->Iex.RdTmp.tmp);
/* Test32 doesn't modify r32; so this is OK. */
addInstr(env, X86Instr_Test32(1,X86RM_Reg(r32)));
return Xcc_NZ;
}
/* Constant 1:Bit */
if (e->tag == Iex_Const) {
HReg r;
vassert(e->Iex.Const.con->tag == Ico_U1);
vassert(e->Iex.Const.con->Ico.U1 == True
|| e->Iex.Const.con->Ico.U1 == False);
r = newVRegI(env);
addInstr(env, X86Instr_Alu32R(Xalu_MOV,X86RMI_Imm(0),r));
addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(r),r));
return e->Iex.Const.con->Ico.U1 ? Xcc_Z : Xcc_NZ;
}
/* Not1(e) */
if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
/* Generate code for the arg, and negate the test condition */
return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
}
/* --- patterns rooted at: 32to1 --- */
if (e->tag == Iex_Unop
&& e->Iex.Unop.op == Iop_32to1) {
X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
addInstr(env, X86Instr_Test32(1,rm));
return Xcc_NZ;
}
/* --- patterns rooted at: CmpNEZ8 --- */
/* CmpNEZ8(x) */
if (e->tag == Iex_Unop
&& e->Iex.Unop.op == Iop_CmpNEZ8) {
X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
addInstr(env, X86Instr_Test32(0xFF,rm));
return Xcc_NZ;
}
/* --- patterns rooted at: CmpNEZ16 --- */
/* CmpNEZ16(x) */
if (e->tag == Iex_Unop
&& e->Iex.Unop.op == Iop_CmpNEZ16) {
X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
addInstr(env, X86Instr_Test32(0xFFFF,rm));
return Xcc_NZ;
}
/* --- patterns rooted at: CmpNEZ32 --- */
/* CmpNEZ32(And32(x,y)) */
{
DECLARE_PATTERN(p_CmpNEZ32_And32);
DEFINE_PATTERN(p_CmpNEZ32_And32,
unop(Iop_CmpNEZ32, binop(Iop_And32, bind(0), bind(1))));
if (matchIRExpr(&mi, p_CmpNEZ32_And32, e)) {
HReg r0 = iselIntExpr_R(env, mi.bindee[0]);
X86RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
HReg tmp = newVRegI(env);
addInstr(env, mk_iMOVsd_RR(r0, tmp));
addInstr(env, X86Instr_Alu32R(Xalu_AND,rmi1,tmp));
return Xcc_NZ;
}
}
/* CmpNEZ32(Or32(x,y)) */
{
DECLARE_PATTERN(p_CmpNEZ32_Or32);
DEFINE_PATTERN(p_CmpNEZ32_Or32,
unop(Iop_CmpNEZ32, binop(Iop_Or32, bind(0), bind(1))));
if (matchIRExpr(&mi, p_CmpNEZ32_Or32, e)) {
HReg r0 = iselIntExpr_R(env, mi.bindee[0]);
X86RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
HReg tmp = newVRegI(env);
addInstr(env, mk_iMOVsd_RR(r0, tmp));
addInstr(env, X86Instr_Alu32R(Xalu_OR,rmi1,tmp));
return Xcc_NZ;
}
}
/* CmpNEZ32(GET(..):I32) */
if (e->tag == Iex_Unop
&& e->Iex.Unop.op == Iop_CmpNEZ32
&& e->Iex.Unop.arg->tag == Iex_Get) {
X86AMode* am = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
hregX86_EBP());
addInstr(env, X86Instr_Alu32M(Xalu_CMP, X86RI_Imm(0), am));
return Xcc_NZ;
}
/* CmpNEZ32(x) */
if (e->tag == Iex_Unop
&& e->Iex.Unop.op == Iop_CmpNEZ32) {
HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
X86RMI* rmi2 = X86RMI_Imm(0);
addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1));
return Xcc_NZ;
}
/* --- patterns rooted at: CmpNEZ64 --- */
/* CmpNEZ64(Or64(x,y)) */
{
DECLARE_PATTERN(p_CmpNEZ64_Or64);
DEFINE_PATTERN(p_CmpNEZ64_Or64,
unop(Iop_CmpNEZ64, binop(Iop_Or64, bind(0), bind(1))));
if (matchIRExpr(&mi, p_CmpNEZ64_Or64, e)) {
HReg hi1, lo1, hi2, lo2;
HReg tmp = newVRegI(env);
iselInt64Expr( &hi1, &lo1, env, mi.bindee[0] );
addInstr(env, mk_iMOVsd_RR(hi1, tmp));
addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo1),tmp));
iselInt64Expr( &hi2, &lo2, env, mi.bindee[1] );
addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(hi2),tmp));
addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo2),tmp));
return Xcc_NZ;
}
}
/* CmpNEZ64(x) */
if (e->tag == Iex_Unop
&& e->Iex.Unop.op == Iop_CmpNEZ64) {
HReg hi, lo;
HReg tmp = newVRegI(env);
iselInt64Expr( &hi, &lo, env, e->Iex.Unop.arg );
addInstr(env, mk_iMOVsd_RR(hi, tmp));
addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo), tmp));
return Xcc_NZ;
}
/* --- patterns rooted at: Cmp{EQ,NE}{8,16} --- */
/* CmpEQ8 / CmpNE8 */
if (e->tag == Iex_Binop
&& (e->Iex.Binop.op == Iop_CmpEQ8
|| e->Iex.Binop.op == Iop_CmpNE8
|| e->Iex.Binop.op == Iop_CasCmpEQ8
|| e->Iex.Binop.op == Iop_CasCmpNE8)) {
if (isZeroU8(e->Iex.Binop.arg2)) {
HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r1)));
switch (e->Iex.Binop.op) {
case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Xcc_Z;
case Iop_CmpNE8: case Iop_CasCmpNE8: return Xcc_NZ;
default: vpanic("iselCondCode(x86): CmpXX8(expr,0:I8)");
}
} else {
HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
HReg r = newVRegI(env);
addInstr(env, mk_iMOVsd_RR(r1,r));
addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r));
addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r)));
switch (e->Iex.Binop.op) {
case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Xcc_Z;
case Iop_CmpNE8: case Iop_CasCmpNE8: return Xcc_NZ;
default: vpanic("iselCondCode(x86): CmpXX8(expr,expr)");
}
}
}
/* CmpEQ16 / CmpNE16 */
if (e->tag == Iex_Binop
&& (e->Iex.Binop.op == Iop_CmpEQ16
|| e->Iex.Binop.op == Iop_CmpNE16
|| e->Iex.Binop.op == Iop_CasCmpEQ16
|| e->Iex.Binop.op == Iop_CasCmpNE16
|| e->Iex.Binop.op == Iop_ExpCmpNE16)) {
HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
HReg r = newVRegI(env);
addInstr(env, mk_iMOVsd_RR(r1,r));
addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r));
addInstr(env, X86Instr_Test32(0xFFFF,X86RM_Reg(r)));
switch (e->Iex.Binop.op) {
case Iop_CmpEQ16: case Iop_CasCmpEQ16:
return Xcc_Z;
case Iop_CmpNE16: case Iop_CasCmpNE16: case Iop_ExpCmpNE16:
return Xcc_NZ;
default:
vpanic("iselCondCode(x86): CmpXX16");
}
}
/* CmpNE32(ccall, 32-bit constant) (--smc-check=all optimisation).
Saves a "movl %eax, %tmp" compared to the default route. */
if (e->tag == Iex_Binop
&& e->Iex.Binop.op == Iop_CmpNE32
&& e->Iex.Binop.arg1->tag == Iex_CCall
&& e->Iex.Binop.arg2->tag == Iex_Const) {
IRExpr* cal = e->Iex.Binop.arg1;
IRExpr* con = e->Iex.Binop.arg2;
/* clone & partial-eval of generic Iex_CCall and Iex_Const cases */
vassert(cal->Iex.CCall.retty == Ity_I32); /* else ill-typed IR */
vassert(con->Iex.Const.con->tag == Ico_U32);
/* Marshal args, do the call. */
UInt addToSp = 0;
RetLoc rloc = mk_RetLoc_INVALID();
doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
cal->Iex.CCall.cee,
cal->Iex.CCall.retty, cal->Iex.CCall.args );
vassert(is_sane_RetLoc(rloc));
vassert(rloc.pri == RLPri_Int);
vassert(addToSp == 0);
/* */
addInstr(env, X86Instr_Alu32R(Xalu_CMP,
X86RMI_Imm(con->Iex.Const.con->Ico.U32),
hregX86_EAX()));
return Xcc_NZ;
}
/* Cmp*32*(x,y) */
if (e->tag == Iex_Binop
&& (e->Iex.Binop.op == Iop_CmpEQ32
|| e->Iex.Binop.op == Iop_CmpNE32
|| e->Iex.Binop.op == Iop_CmpLT32S
|| e->Iex.Binop.op == Iop_CmpLT32U
|| e->Iex.Binop.op == Iop_CmpLE32S
|| e->Iex.Binop.op == Iop_CmpLE32U
|| e->Iex.Binop.op == Iop_CasCmpEQ32
|| e->Iex.Binop.op == Iop_CasCmpNE32
|| e->Iex.Binop.op == Iop_ExpCmpNE32)) {
HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1));
switch (e->Iex.Binop.op) {
case Iop_CmpEQ32: case Iop_CasCmpEQ32: return Xcc_Z;
case Iop_CmpNE32:
case Iop_CasCmpNE32: case Iop_ExpCmpNE32: return Xcc_NZ;
case Iop_CmpLT32S: return Xcc_L;
case Iop_CmpLT32U: return Xcc_B;
case Iop_CmpLE32S: return Xcc_LE;
case Iop_CmpLE32U: return Xcc_BE;
default: vpanic("iselCondCode(x86): CmpXX32");
}
}
/* CmpNE64 */
if (e->tag == Iex_Binop
&& (e->Iex.Binop.op == Iop_CmpNE64
|| e->Iex.Binop.op == Iop_CmpEQ64)) {
HReg hi1, hi2, lo1, lo2;
HReg tHi = newVRegI(env);
HReg tLo = newVRegI(env);
iselInt64Expr( &hi1, &lo1, env, e->Iex.Binop.arg1 );
iselInt64Expr( &hi2, &lo2, env, e->Iex.Binop.arg2 );
addInstr(env, mk_iMOVsd_RR(hi1, tHi));
addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(hi2), tHi));
addInstr(env, mk_iMOVsd_RR(lo1, tLo));
addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(lo2), tLo));
addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(tHi), tLo));
switch (e->Iex.Binop.op) {
case Iop_CmpNE64: return Xcc_NZ;
case Iop_CmpEQ64: return Xcc_Z;
default: vpanic("iselCondCode(x86): CmpXX64");
}
}
ppIRExpr(e);
vpanic("iselCondCode");
}
/*---------------------------------------------------------*/
/*--- ISEL: Integer expressions (64 bit) ---*/
/*---------------------------------------------------------*/
/* Compute a 64-bit value into a register pair, which is returned as
the first two parameters. As with iselIntExpr_R, these may be
either real or virtual regs; in any case they must not be changed
by subsequent code emitted by the caller. */
static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
{
iselInt64Expr_wrk(rHi, rLo, env, e);
# if 0
vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
# endif
vassert(hregClass(*rHi) == HRcInt32);
vassert(hregIsVirtual(*rHi));
vassert(hregClass(*rLo) == HRcInt32);
vassert(hregIsVirtual(*rLo));
}
/* DO NOT CALL THIS DIRECTLY ! */
static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
{
MatchInfo mi;
HWord fn = 0; /* helper fn for most SIMD64 stuff */
vassert(e);
vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
/* 64-bit literal */
if (e->tag == Iex_Const) {
ULong w64 = e->Iex.Const.con->Ico.U64;
UInt wHi = toUInt(w64 >> 32);
UInt wLo = toUInt(w64);
HReg tLo = newVRegI(env);
HReg tHi = newVRegI(env);
vassert(e->Iex.Const.con->tag == Ico_U64);
if (wLo == wHi) {
/* Save a precious Int register in this special case. */
addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo));
*rHi = tLo;
*rLo = tLo;
} else {
addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wHi), tHi));
addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo));
*rHi = tHi;
*rLo = tLo;
}
return;
}
/* read 64-bit IRTemp */
if (e->tag == Iex_RdTmp) {
lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
return;
}
/* 64-bit load */
if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
HReg tLo, tHi;
X86AMode *am0, *am4;
vassert(e->Iex.Load.ty == Ity_I64);
tLo = newVRegI(env);
tHi = newVRegI(env);
am0 = iselIntExpr_AMode(env, e->Iex.Load.addr);
am4 = advance4(am0);
addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am0), tLo ));
addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
*rHi = tHi;
*rLo = tLo;
return;
}
/* 64-bit GET */
if (e->tag == Iex_Get) {
X86AMode* am = X86AMode_IR(e->Iex.Get.offset, hregX86_EBP());
X86AMode* am4 = advance4(am);
HReg tLo = newVRegI(env);
HReg tHi = newVRegI(env);
addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
*rHi = tHi;
*rLo = tLo;
return;
}
/* 64-bit GETI */
if (e->tag == Iex_GetI) {
X86AMode* am
= genGuestArrayOffset( env, e->Iex.GetI.descr,
e->Iex.GetI.ix, e->Iex.GetI.bias );
X86AMode* am4 = advance4(am);
HReg tLo = newVRegI(env);
HReg tHi = newVRegI(env);
addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
*rHi = tHi;
*rLo = tLo;
return;
}
/* 64-bit ITE: ITE(g, expr, expr) */ // VFD
if (e->tag == Iex_ITE) {
HReg e0Lo, e0Hi, e1Lo, e1Hi;
HReg tLo = newVRegI(env);
HReg tHi = newVRegI(env);
iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.ITE.iffalse);
iselInt64Expr(&e1Hi, &e1Lo, env, e->Iex.ITE.iftrue);
addInstr(env, mk_iMOVsd_RR(e1Hi, tHi));
addInstr(env, mk_iMOVsd_RR(e1Lo, tLo));
X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
/* This assumes the first cmov32 doesn't trash the condition
codes, so they are still available for the second cmov32 */
addInstr(env, X86Instr_CMov32(cc ^ 1, X86RM_Reg(e0Hi), tHi));
addInstr(env, X86Instr_CMov32(cc ^ 1, X86RM_Reg(e0Lo), tLo));
*rHi = tHi;
*rLo = tLo;
return;
}
/* --------- BINARY ops --------- */
if (e->tag == Iex_Binop) {
switch (e->Iex.Binop.op) {
/* 32 x 32 -> 64 multiply */
case Iop_MullU32:
case Iop_MullS32: {
/* get one operand into %eax, and the other into a R/M.
Need to make an educated guess about which is better in
which. */
HReg tLo = newVRegI(env);
HReg tHi = newVRegI(env);
Bool syned = toBool(e->Iex.Binop.op == Iop_MullS32);
X86RM* rmLeft = iselIntExpr_RM(env, e->Iex.Binop.arg1);
HReg rRight = iselIntExpr_R(env, e->Iex.Binop.arg2);
addInstr(env, mk_iMOVsd_RR(rRight, hregX86_EAX()));
addInstr(env, X86Instr_MulL(syned, rmLeft));
/* Result is now in EDX:EAX. Tell the caller. */
addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
*rHi = tHi;
*rLo = tLo;
return;
}
/* 64 x 32 -> (32(rem),32(div)) division */
case Iop_DivModU64to32:
case Iop_DivModS64to32: {
/* Get the 64-bit operand into edx:eax, and the other into
any old R/M. */
HReg sHi, sLo;
HReg tLo = newVRegI(env);
HReg tHi = newVRegI(env);
Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS64to32);
X86RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
addInstr(env, mk_iMOVsd_RR(sHi, hregX86_EDX()));
addInstr(env, mk_iMOVsd_RR(sLo, hregX86_EAX()));
addInstr(env, X86Instr_Div(syned, rmRight));
addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
*rHi = tHi;
*rLo = tLo;
return;
}
/* Or64/And64/Xor64 */
case Iop_Or64:
case Iop_And64:
case Iop_Xor64: {
HReg xLo, xHi, yLo, yHi;
HReg tLo = newVRegI(env);
HReg tHi = newVRegI(env);
X86AluOp op = e->Iex.Binop.op==Iop_Or64 ? Xalu_OR
: e->Iex.Binop.op==Iop_And64 ? Xalu_AND
: Xalu_XOR;
iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
addInstr(env, mk_iMOVsd_RR(xHi, tHi));
addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yHi), tHi));
addInstr(env, mk_iMOVsd_RR(xLo, tLo));
addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yLo), tLo));
*rHi = tHi;
*rLo = tLo;
return;
}
/* Add64/Sub64 */
case Iop_Add64:
if (e->Iex.Binop.arg2->tag == Iex_Const) {
/* special case Add64(e, const) */
ULong w64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
UInt wHi = toUInt(w64 >> 32);
UInt wLo = toUInt(w64);
HReg tLo = newVRegI(env);
HReg tHi = newVRegI(env);
HReg xLo, xHi;
vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64);
iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
addInstr(env, mk_iMOVsd_RR(xHi, tHi));
addInstr(env, mk_iMOVsd_RR(xLo, tLo));
addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(wLo), tLo));
addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Imm(wHi), tHi));
*rHi = tHi;
*rLo = tLo;
return;
}
/* else fall through to the generic case */
case Iop_Sub64: {
HReg xLo, xHi, yLo, yHi;
HReg tLo = newVRegI(env);
HReg tHi = newVRegI(env);
iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
addInstr(env, mk_iMOVsd_RR(xHi, tHi));
addInstr(env, mk_iMOVsd_RR(xLo, tLo));
iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
if (e->Iex.Binop.op==Iop_Add64) {
addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Reg(yLo), tLo));
addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Reg(yHi), tHi));
} else {
addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo));
addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi));
}
*rHi = tHi;
*rLo = tLo;
return;
}
/* 32HLto64(e1,e2) */
case Iop_32HLto64:
*rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
*rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
return;
/* 64-bit shifts */
case Iop_Shl64: {
/* We use the same ingenious scheme as gcc. Put the value
to be shifted into %hi:%lo, and the shift amount into
%cl. Then (dsts on right, a la ATT syntax):
shldl %cl, %lo, %hi -- make %hi be right for the
-- shift amt %cl % 32
shll %cl, %lo -- make %lo be right for the
-- shift amt %cl % 32
Now, if (shift amount % 64) is in the range 32 .. 63,
we have to do a fixup, which puts the result low half
into the result high half, and zeroes the low half:
testl $32, %ecx
cmovnz %lo, %hi
movl $0, %tmp -- sigh; need yet another reg
cmovnz %tmp, %lo
*/
HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
tLo = newVRegI(env);
tHi = newVRegI(env);
tTemp = newVRegI(env);
rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
addInstr(env, mk_iMOVsd_RR(sHi, tHi));
addInstr(env, mk_iMOVsd_RR(sLo, tLo));
/* Ok. Now shift amt is in %ecx, and value is in tHi/tLo
and those regs are legitimately modifiable. */
addInstr(env, X86Instr_Sh3232(Xsh_SHL, 0/*%cl*/, tLo, tHi));
addInstr(env, X86Instr_Sh32(Xsh_SHL, 0/*%cl*/, tLo));
addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX())));
addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tLo), tHi));
addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tLo));
*rHi = tHi;
*rLo = tLo;
return;
}
case Iop_Shr64: {
/* We use the same ingenious scheme as gcc. Put the value
to be shifted into %hi:%lo, and the shift amount into
%cl. Then:
shrdl %cl, %hi, %lo -- make %lo be right for the
-- shift amt %cl % 32
shrl %cl, %hi -- make %hi be right for the
-- shift amt %cl % 32
Now, if (shift amount % 64) is in the range 32 .. 63,
we have to do a fixup, which puts the result high half
into the result low half, and zeroes the high half:
testl $32, %ecx
cmovnz %hi, %lo
movl $0, %tmp -- sigh; need yet another reg
cmovnz %tmp, %hi
*/
HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
tLo = newVRegI(env);
tHi = newVRegI(env);
tTemp = newVRegI(env);
rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
addInstr(env, mk_iMOVsd_RR(sHi, tHi));
addInstr(env, mk_iMOVsd_RR(sLo, tLo));
/* Ok. Now shift amt is in %ecx, and value is in tHi/tLo
and those regs are legitimately modifiable. */
addInstr(env, X86Instr_Sh3232(Xsh_SHR, 0/*%cl*/, tHi, tLo));
addInstr(env, X86Instr_Sh32(Xsh_SHR, 0/*%cl*/, tHi));
addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX())));
addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tHi), tLo));
addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tHi));
*rHi = tHi;
*rLo = tLo;
return;
}
/* F64 -> I64 */
/* Sigh, this is an almost exact copy of the F64 -> I32/I16
case. Unfortunately I see no easy way to avoid the
duplication. */
case Iop_F64toI64S: {
HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
HReg tLo = newVRegI(env);
HReg tHi = newVRegI(env);
/* Used several times ... */
/* Careful ... this sharing is only safe because
zero_esp/four_esp do not hold any registers which the
register allocator could attempt to swizzle later. */
X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP());
/* rf now holds the value to be converted, and rrm holds
the rounding mode value, encoded as per the
IRRoundingMode enum. The first thing to do is set the
FPU's rounding mode accordingly. */
/* Create a space for the format conversion. */
/* subl $8, %esp */
sub_from_esp(env, 8);
/* Set host rounding mode */
set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
/* gistll %rf, 0(%esp) */
addInstr(env, X86Instr_FpLdStI(False/*store*/, 8, rf, zero_esp));
/* movl 0(%esp), %dstLo */
/* movl 4(%esp), %dstHi */
addInstr(env, X86Instr_Alu32R(
Xalu_MOV, X86RMI_Mem(zero_esp), tLo));
addInstr(env, X86Instr_Alu32R(
Xalu_MOV, X86RMI_Mem(four_esp), tHi));
/* Restore default FPU rounding. */
set_FPU_rounding_default( env );
/* addl $8, %esp */
add_to_esp(env, 8);
*rHi = tHi;
*rLo = tLo;
return;
}
case Iop_Add8x8:
fn = (HWord)h_generic_calc_Add8x8; goto binnish;
case Iop_Add16x4:
fn = (HWord)h_generic_calc_Add16x4; goto binnish;
case Iop_Add32x2:
fn = (HWord)h_generic_calc_Add32x2; goto binnish;
case Iop_Avg8Ux8:
fn = (HWord)h_generic_calc_Avg8Ux8; goto binnish;
case Iop_Avg16Ux4:
fn = (HWord)h_generic_calc_Avg16Ux4; goto binnish;
case Iop_CmpEQ8x8:
fn = (HWord)h_generic_calc_CmpEQ8x8; goto binnish;
case Iop_CmpEQ16x4:
fn = (HWord)h_generic_calc_CmpEQ16x4; goto binnish;
case Iop_CmpEQ32x2:
fn = (HWord)h_generic_calc_CmpEQ32x2; goto binnish;
case Iop_CmpGT8Sx8:
fn = (HWord)h_generic_calc_CmpGT8Sx8; goto binnish;
case Iop_CmpGT16Sx4:
fn = (HWord)h_generic_calc_CmpGT16Sx4; goto binnish;
case Iop_CmpGT32Sx2:
fn = (HWord)h_generic_calc_CmpGT32Sx2; goto binnish;
case Iop_InterleaveHI8x8:
fn = (HWord)h_generic_calc_InterleaveHI8x8; goto binnish;
case Iop_InterleaveLO8x8:
fn = (HWord)h_generic_calc_InterleaveLO8x8; goto binnish;
case Iop_InterleaveHI16x4:
fn = (HWord)h_generic_calc_InterleaveHI16x4; goto binnish;
case Iop_InterleaveLO16x4:
fn = (HWord)h_generic_calc_InterleaveLO16x4; goto binnish;
case Iop_InterleaveHI32x2:
fn = (HWord)h_generic_calc_InterleaveHI32x2; goto binnish;
case Iop_InterleaveLO32x2:
fn = (HWord)h_generic_calc_InterleaveLO32x2; goto binnish;
case Iop_CatOddLanes16x4:
fn = (HWord)h_generic_calc_CatOddLanes16x4; goto binnish;
case Iop_CatEvenLanes16x4:
fn = (HWord)h_generic_calc_CatEvenLanes16x4; goto binnish;
case Iop_Perm8x8:
fn = (HWord)h_generic_calc_Perm8x8; goto binnish;
case Iop_Max8Ux8:
fn = (HWord)h_generic_calc_Max8Ux8; goto binnish;
case Iop_Max16Sx4:
fn = (HWord)h_generic_calc_Max16Sx4; goto binnish;
case Iop_Min8Ux8:
fn = (HWord)h_generic_calc_Min8Ux8; goto binnish;
case Iop_Min16Sx4:
fn = (HWord)h_generic_calc_Min16Sx4; goto binnish;
case Iop_Mul16x4:
fn = (HWord)h_generic_calc_Mul16x4; goto binnish;
case Iop_Mul32x2:
fn = (HWord)h_generic_calc_Mul32x2; goto binnish;
case Iop_MulHi16Sx4:
fn = (HWord)h_generic_calc_MulHi16Sx4; goto binnish;
case Iop_MulHi16Ux4:
fn = (HWord)h_generic_calc_MulHi16Ux4; goto binnish;
case Iop_QAdd8Sx8:
fn = (HWord)h_generic_calc_QAdd8Sx8; goto binnish;
case Iop_QAdd16Sx4:
fn = (HWord)h_generic_calc_QAdd16Sx4; goto binnish;
case Iop_QAdd8Ux8:
fn = (HWord)h_generic_calc_QAdd8Ux8; goto binnish;
case Iop_QAdd16Ux4:
fn = (HWord)h_generic_calc_QAdd16Ux4; goto binnish;
case Iop_QNarrowBin32Sto16Sx4:
fn = (HWord)h_generic_calc_QNarrowBin32Sto16Sx4; goto binnish;
case Iop_QNarrowBin16Sto8Sx8:
fn = (HWord)h_generic_calc_QNarrowBin16Sto8Sx8; goto binnish;
case Iop_QNarrowBin16Sto8Ux8:
fn = (HWord)h_generic_calc_QNarrowBin16Sto8Ux8; goto binnish;
case Iop_NarrowBin16to8x8:
fn = (HWord)h_generic_calc_NarrowBin16to8x8; goto binnish;
case Iop_NarrowBin32to16x4:
fn = (HWord)h_generic_calc_NarrowBin32to16x4; goto binnish;
case Iop_QSub8Sx8:
fn = (HWord)h_generic_calc_QSub8Sx8; goto binnish;
case Iop_QSub16Sx4:
fn = (HWord)h_generic_calc_QSub16Sx4; goto binnish;
case Iop_QSub8Ux8:
fn = (HWord)h_generic_