blob: dd894e834044bc3537e261cb64187fc9ca7b6b34 [file] [log] [blame]
/*---------------------------------------------------------------*/
/*--- begin host_amd64_isel.c ---*/
/*---------------------------------------------------------------*/
/*
This file is part of Valgrind, a dynamic binary instrumentation
framework.
Copyright (C) 2004-2013 OpenWorks LLP
info@open-works.net
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA.
The GNU General Public License is contained in the file COPYING.
Neither the names of the U.S. Department of Energy nor the
University of California nor the names of its contributors may be
used to endorse or promote products derived from this software
without prior written permission.
*/
#include "libvex_basictypes.h"
#include "libvex_ir.h"
#include "libvex.h"
#include "ir_match.h"
#include "main_util.h"
#include "main_globals.h"
#include "host_generic_regs.h"
#include "host_generic_simd64.h"
#include "host_generic_simd128.h"
#include "host_generic_simd256.h"
#include "host_generic_maddf.h"
#include "host_amd64_defs.h"
/*---------------------------------------------------------*/
/*--- x87/SSE control word stuff ---*/
/*---------------------------------------------------------*/
/* Vex-generated code expects to run with the FPU set as follows: all
exceptions masked, round-to-nearest, precision = 53 bits. This
corresponds to a FPU control word value of 0x027F.
Similarly the SSE control word (%mxcsr) should be 0x1F80.
%fpucw and %mxcsr should have these values on entry to
Vex-generated code, and should those values should be
unchanged at exit.
*/
#define DEFAULT_FPUCW 0x027F
#define DEFAULT_MXCSR 0x1F80
/* debugging only, do not use */
/* define DEFAULT_FPUCW 0x037F */
/*---------------------------------------------------------*/
/*--- misc helpers ---*/
/*---------------------------------------------------------*/
/* These are duplicated in guest-amd64/toIR.c */
static IRExpr* unop ( IROp op, IRExpr* a )
{
return IRExpr_Unop(op, a);
}
static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
{
return IRExpr_Binop(op, a1, a2);
}
static IRExpr* bind ( Int binder )
{
return IRExpr_Binder(binder);
}
static Bool isZeroU8 ( IRExpr* e )
{
return e->tag == Iex_Const
&& e->Iex.Const.con->tag == Ico_U8
&& e->Iex.Const.con->Ico.U8 == 0;
}
/*---------------------------------------------------------*/
/*--- ISelEnv ---*/
/*---------------------------------------------------------*/
/* This carries around:
- A mapping from IRTemp to IRType, giving the type of any IRTemp we
might encounter. This is computed before insn selection starts,
and does not change.
- A mapping from IRTemp to HReg. This tells the insn selector
which virtual register is associated with each IRTemp
temporary. This is computed before insn selection starts, and
does not change. We expect this mapping to map precisely the
same set of IRTemps as the type mapping does.
- vregmap holds the primary register for the IRTemp.
- vregmapHI is only used for 128-bit integer-typed
IRTemps. It holds the identity of a second
64-bit virtual HReg, which holds the high half
of the value.
- The host subarchitecture we are selecting insns for.
This is set at the start and does not change.
- The code array, that is, the insns selected so far.
- A counter, for generating new virtual registers.
- A Bool for indicating whether we may generate chain-me
instructions for control flow transfers, or whether we must use
XAssisted.
- The maximum guest address of any guest insn in this block.
Actually, the address of the highest-addressed byte from any insn
in this block. Is set at the start and does not change. This is
used for detecting jumps which are definitely forward-edges from
this block, and therefore can be made (chained) to the fast entry
point of the destination, thereby avoiding the destination's
event check.
Note, this is all host-independent. (JRS 20050201: well, kinda
... not completely. Compare with ISelEnv for X86.)
*/
typedef
struct {
/* Constant -- are set at the start and do not change. */
IRTypeEnv* type_env;
HReg* vregmap;
HReg* vregmapHI;
Int n_vregmap;
UInt hwcaps;
Bool chainingAllowed;
Addr64 max_ga;
/* These are modified as we go along. */
HInstrArray* code;
Int vreg_ctr;
}
ISelEnv;
static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
{
vassert(tmp >= 0);
vassert(tmp < env->n_vregmap);
return env->vregmap[tmp];
}
static void lookupIRTempPair ( HReg* vrHI, HReg* vrLO,
ISelEnv* env, IRTemp tmp )
{
vassert(tmp >= 0);
vassert(tmp < env->n_vregmap);
vassert(! hregIsInvalid(env->vregmapHI[tmp]));
*vrLO = env->vregmap[tmp];
*vrHI = env->vregmapHI[tmp];
}
static void addInstr ( ISelEnv* env, AMD64Instr* instr )
{
addHInstr(env->code, instr);
if (vex_traceflags & VEX_TRACE_VCODE) {
ppAMD64Instr(instr, True);
vex_printf("\n");
}
}
static HReg newVRegI ( ISelEnv* env )
{
HReg reg = mkHReg(env->vreg_ctr, HRcInt64, True/*virtual reg*/);
env->vreg_ctr++;
return reg;
}
static HReg newVRegV ( ISelEnv* env )
{
HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
env->vreg_ctr++;
return reg;
}
/*---------------------------------------------------------*/
/*--- ISEL: Forward declarations ---*/
/*---------------------------------------------------------*/
/* These are organised as iselXXX and iselXXX_wrk pairs. The
iselXXX_wrk do the real work, but are not to be called directly.
For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
checks that all returned registers are virtual. You should not
call the _wrk version directly.
*/
static AMD64RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e );
static AMD64RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e );
static AMD64RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e );
static AMD64RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e );
static AMD64RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e );
static AMD64RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e );
static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e );
static AMD64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e );
static AMD64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e );
static void iselInt128Expr_wrk ( /*OUT*/HReg* rHi, HReg* rLo,
ISelEnv* env, IRExpr* e );
static void iselInt128Expr ( /*OUT*/HReg* rHi, HReg* rLo,
ISelEnv* env, IRExpr* e );
static AMD64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
static AMD64CondCode iselCondCode ( ISelEnv* env, IRExpr* e );
static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
static HReg iselDblExpr ( ISelEnv* env, IRExpr* e );
static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e );
static HReg iselVecExpr ( ISelEnv* env, IRExpr* e );
static void iselDVecExpr_wrk ( /*OUT*/HReg* rHi, HReg* rLo,
ISelEnv* env, IRExpr* e );
static void iselDVecExpr ( /*OUT*/HReg* rHi, HReg* rLo,
ISelEnv* env, IRExpr* e );
/*---------------------------------------------------------*/
/*--- ISEL: Misc helpers ---*/
/*---------------------------------------------------------*/
static Bool sane_AMode ( AMD64AMode* am )
{
switch (am->tag) {
case Aam_IR:
return
toBool( hregClass(am->Aam.IR.reg) == HRcInt64
&& (hregIsVirtual(am->Aam.IR.reg)
|| sameHReg(am->Aam.IR.reg, hregAMD64_RBP())) );
case Aam_IRRS:
return
toBool( hregClass(am->Aam.IRRS.base) == HRcInt64
&& hregIsVirtual(am->Aam.IRRS.base)
&& hregClass(am->Aam.IRRS.index) == HRcInt64
&& hregIsVirtual(am->Aam.IRRS.index) );
default:
vpanic("sane_AMode: unknown amd64 amode tag");
}
}
/* Can the lower 32 bits be signedly widened to produce the whole
64-bit value? In other words, are the top 33 bits either all 0 or
all 1 ? */
static Bool fitsIn32Bits ( ULong x )
{
Long y0 = (Long)x;
Long y1 = y0;
y1 <<= 32;
y1 >>=/*s*/ 32;
return toBool(x == y1);
}
/* Is this a 64-bit zero expression? */
static Bool isZeroU64 ( IRExpr* e )
{
return e->tag == Iex_Const
&& e->Iex.Const.con->tag == Ico_U64
&& e->Iex.Const.con->Ico.U64 == 0ULL;
}
static Bool isZeroU32 ( IRExpr* e )
{
return e->tag == Iex_Const
&& e->Iex.Const.con->tag == Ico_U32
&& e->Iex.Const.con->Ico.U32 == 0;
}
/* Make a int reg-reg move. */
static AMD64Instr* mk_iMOVsd_RR ( HReg src, HReg dst )
{
vassert(hregClass(src) == HRcInt64);
vassert(hregClass(dst) == HRcInt64);
return AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(src), dst);
}
/* Make a vector (128 bit) reg-reg move. */
static AMD64Instr* mk_vMOVsd_RR ( HReg src, HReg dst )
{
vassert(hregClass(src) == HRcVec128);
vassert(hregClass(dst) == HRcVec128);
return AMD64Instr_SseReRg(Asse_MOV, src, dst);
}
/* Advance/retreat %rsp by n. */
static void add_to_rsp ( ISelEnv* env, Int n )
{
vassert(n > 0 && n < 256 && (n%8) == 0);
addInstr(env,
AMD64Instr_Alu64R(Aalu_ADD, AMD64RMI_Imm(n),
hregAMD64_RSP()));
}
static void sub_from_rsp ( ISelEnv* env, Int n )
{
vassert(n > 0 && n < 256 && (n%8) == 0);
addInstr(env,
AMD64Instr_Alu64R(Aalu_SUB, AMD64RMI_Imm(n),
hregAMD64_RSP()));
}
/* Push 64-bit constants on the stack. */
static void push_uimm64( ISelEnv* env, ULong uimm64 )
{
/* If uimm64 can be expressed as the sign extension of its
lower 32 bits, we can do it the easy way. */
Long simm64 = (Long)uimm64;
if ( simm64 == ((simm64 << 32) >> 32) ) {
addInstr( env, AMD64Instr_Push(AMD64RMI_Imm( (UInt)uimm64 )) );
} else {
HReg tmp = newVRegI(env);
addInstr( env, AMD64Instr_Imm64(uimm64, tmp) );
addInstr( env, AMD64Instr_Push(AMD64RMI_Reg(tmp)) );
}
}
/* Used only in doHelperCall. If possible, produce a single
instruction which computes 'e' into 'dst'. If not possible, return
NULL. */
static AMD64Instr* iselIntExpr_single_instruction ( ISelEnv* env,
HReg dst,
IRExpr* e )
{
/* Per comments in doHelperCall below, appearance of
Iex_VECRET implies ill-formed IR. */
vassert(e->tag != Iex_VECRET);
/* In this case we give out a copy of the BaseBlock pointer. */
if (UNLIKELY(e->tag == Iex_BBPTR)) {
return mk_iMOVsd_RR( hregAMD64_RBP(), dst );
}
vassert(typeOfIRExpr(env->type_env, e) == Ity_I64);
if (e->tag == Iex_Const) {
vassert(e->Iex.Const.con->tag == Ico_U64);
if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) {
return AMD64Instr_Alu64R(
Aalu_MOV,
AMD64RMI_Imm(toUInt(e->Iex.Const.con->Ico.U64)),
dst
);
} else {
return AMD64Instr_Imm64(e->Iex.Const.con->Ico.U64, dst);
}
}
if (e->tag == Iex_RdTmp) {
HReg src = lookupIRTemp(env, e->Iex.RdTmp.tmp);
return mk_iMOVsd_RR(src, dst);
}
if (e->tag == Iex_Get) {
vassert(e->Iex.Get.ty == Ity_I64);
return AMD64Instr_Alu64R(
Aalu_MOV,
AMD64RMI_Mem(
AMD64AMode_IR(e->Iex.Get.offset,
hregAMD64_RBP())),
dst);
}
if (e->tag == Iex_Unop
&& e->Iex.Unop.op == Iop_32Uto64
&& e->Iex.Unop.arg->tag == Iex_RdTmp) {
HReg src = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp);
return AMD64Instr_MovxLQ(False, src, dst);
}
if (0) { ppIRExpr(e); vex_printf("\n"); }
return NULL;
}
/* Do a complete function call. |guard| is a Ity_Bit expression
indicating whether or not the call happens. If guard==NULL, the
call is unconditional. |retloc| is set to indicate where the
return value is after the call. The caller (of this fn) must
generate code to add |stackAdjustAfterCall| to the stack pointer
after the call is done. */
static
void doHelperCall ( /*OUT*/UInt* stackAdjustAfterCall,
/*OUT*/RetLoc* retloc,
ISelEnv* env,
IRExpr* guard,
IRCallee* cee, IRType retTy, IRExpr** args )
{
AMD64CondCode cc;
HReg argregs[6];
HReg tmpregs[6];
AMD64Instr* fastinstrs[6];
UInt n_args, i;
/* Set default returns. We'll update them later if needed. */
*stackAdjustAfterCall = 0;
*retloc = mk_RetLoc_INVALID();
/* These are used for cross-checking that IR-level constraints on
the use of IRExpr_VECRET() and IRExpr_BBPTR() are observed. */
UInt nVECRETs = 0;
UInt nBBPTRs = 0;
/* Marshal args for a call and do the call.
This function only deals with a tiny set of possibilities, which
cover all helpers in practice. The restrictions are that only
arguments in registers are supported, hence only 6x64 integer
bits in total can be passed. In fact the only supported arg
type is I64.
The return type can be I{64,32,16,8} or V{128,256}. In the
latter two cases, it is expected that |args| will contain the
special node IRExpr_VECRET(), in which case this routine
generates code to allocate space on the stack for the vector
return value. Since we are not passing any scalars on the
stack, it is enough to preallocate the return space before
marshalling any arguments, in this case.
|args| may also contain IRExpr_BBPTR(), in which case the
value in %rbp is passed as the corresponding argument.
Generating code which is both efficient and correct when
parameters are to be passed in registers is difficult, for the
reasons elaborated in detail in comments attached to
doHelperCall() in priv/host-x86/isel.c. Here, we use a variant
of the method described in those comments.
The problem is split into two cases: the fast scheme and the
slow scheme. In the fast scheme, arguments are computed
directly into the target (real) registers. This is only safe
when we can be sure that computation of each argument will not
trash any real registers set by computation of any other
argument.
In the slow scheme, all args are first computed into vregs, and
once they are all done, they are moved to the relevant real
regs. This always gives correct code, but it also gives a bunch
of vreg-to-rreg moves which are usually redundant but are hard
for the register allocator to get rid of.
To decide which scheme to use, all argument expressions are
first examined. If they are all so simple that it is clear they
will be evaluated without use of any fixed registers, use the
fast scheme, else use the slow scheme. Note also that only
unconditional calls may use the fast scheme, since having to
compute a condition expression could itself trash real
registers. Note that for simplicity, in the case where
IRExpr_VECRET() is present, we use the slow scheme. This is
motivated by the desire to avoid any possible complexity
w.r.t. nested calls.
Note this requires being able to examine an expression and
determine whether or not evaluation of it might use a fixed
register. That requires knowledge of how the rest of this insn
selector works. Currently just the following 3 are regarded as
safe -- hopefully they cover the majority of arguments in
practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
*/
/* Note that the cee->regparms field is meaningless on AMD64 host
(since there is only one calling convention) and so we always
ignore it. */
n_args = 0;
for (i = 0; args[i]; i++)
n_args++;
if (n_args > 6)
vpanic("doHelperCall(AMD64): cannot currently handle > 6 args");
argregs[0] = hregAMD64_RDI();
argregs[1] = hregAMD64_RSI();
argregs[2] = hregAMD64_RDX();
argregs[3] = hregAMD64_RCX();
argregs[4] = hregAMD64_R8();
argregs[5] = hregAMD64_R9();
tmpregs[0] = tmpregs[1] = tmpregs[2] =
tmpregs[3] = tmpregs[4] = tmpregs[5] = INVALID_HREG;
fastinstrs[0] = fastinstrs[1] = fastinstrs[2] =
fastinstrs[3] = fastinstrs[4] = fastinstrs[5] = NULL;
/* First decide which scheme (slow or fast) is to be used. First
assume the fast scheme, and select slow if any contraindications
(wow) appear. */
/* We'll need space on the stack for the return value. Avoid
possible complications with nested calls by using the slow
scheme. */
if (retTy == Ity_V128 || retTy == Ity_V256)
goto slowscheme;
if (guard) {
if (guard->tag == Iex_Const
&& guard->Iex.Const.con->tag == Ico_U1
&& guard->Iex.Const.con->Ico.U1 == True) {
/* unconditional */
} else {
/* Not manifestly unconditional -- be conservative. */
goto slowscheme;
}
}
/* Ok, let's try for the fast scheme. If it doesn't pan out, we'll
use the slow scheme. Because this is tentative, we can't call
addInstr (that is, commit to) any instructions until we're
handled all the arguments. So park the resulting instructions
in a buffer and emit that if we're successful. */
/* FAST SCHEME */
/* In this loop, we process args that can be computed into the
destination (real) register with a single instruction, without
using any fixed regs. That also includes IRExpr_BBPTR(), but
not IRExpr_VECRET(). Indeed, if the IR is well-formed, we can
never see IRExpr_VECRET() at this point, since the return-type
check above should ensure all those cases use the slow scheme
instead. */
vassert(n_args >= 0 && n_args <= 6);
for (i = 0; i < n_args; i++) {
IRExpr* arg = args[i];
if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) {
vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64);
}
fastinstrs[i]
= iselIntExpr_single_instruction( env, argregs[i], args[i] );
if (fastinstrs[i] == NULL)
goto slowscheme;
}
/* Looks like we're in luck. Emit the accumulated instructions and
move on to doing the call itself. */
for (i = 0; i < n_args; i++)
addInstr(env, fastinstrs[i]);
/* Fast scheme only applies for unconditional calls. Hence: */
cc = Acc_ALWAYS;
goto handle_call;
/* SLOW SCHEME; move via temporaries */
slowscheme:
{}
# if 0 /* debug only */
if (n_args > 0) {for (i = 0; args[i]; i++) {
ppIRExpr(args[i]); vex_printf(" "); }
vex_printf("\n");}
# endif
/* If we have a vector return type, allocate a place for it on the
stack and record its address. */
HReg r_vecRetAddr = INVALID_HREG;
if (retTy == Ity_V128) {
r_vecRetAddr = newVRegI(env);
sub_from_rsp(env, 16);
addInstr(env, mk_iMOVsd_RR( hregAMD64_RSP(), r_vecRetAddr ));
}
else if (retTy == Ity_V256) {
r_vecRetAddr = newVRegI(env);
sub_from_rsp(env, 32);
addInstr(env, mk_iMOVsd_RR( hregAMD64_RSP(), r_vecRetAddr ));
}
vassert(n_args >= 0 && n_args <= 6);
for (i = 0; i < n_args; i++) {
IRExpr* arg = args[i];
if (UNLIKELY(arg->tag == Iex_BBPTR)) {
tmpregs[i] = newVRegI(env);
addInstr(env, mk_iMOVsd_RR( hregAMD64_RBP(), tmpregs[i]));
nBBPTRs++;
}
else if (UNLIKELY(arg->tag == Iex_VECRET)) {
/* We stashed the address of the return slot earlier, so just
retrieve it now. */
vassert(!hregIsInvalid(r_vecRetAddr));
tmpregs[i] = r_vecRetAddr;
nVECRETs++;
}
else {
vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64);
tmpregs[i] = iselIntExpr_R(env, args[i]);
}
}
/* Now we can compute the condition. We can't do it earlier
because the argument computations could trash the condition
codes. Be a bit clever to handle the common case where the
guard is 1:Bit. */
cc = Acc_ALWAYS;
if (guard) {
if (guard->tag == Iex_Const
&& guard->Iex.Const.con->tag == Ico_U1
&& guard->Iex.Const.con->Ico.U1 == True) {
/* unconditional -- do nothing */
} else {
cc = iselCondCode( env, guard );
}
}
/* Move the args to their final destinations. */
for (i = 0; i < n_args; i++) {
/* None of these insns, including any spill code that might
be generated, may alter the condition codes. */
addInstr( env, mk_iMOVsd_RR( tmpregs[i], argregs[i] ) );
}
/* Do final checks, set the return values, and generate the call
instruction proper. */
handle_call:
if (retTy == Ity_V128 || retTy == Ity_V256) {
vassert(nVECRETs == 1);
} else {
vassert(nVECRETs == 0);
}
vassert(nBBPTRs == 0 || nBBPTRs == 1);
vassert(*stackAdjustAfterCall == 0);
vassert(is_RetLoc_INVALID(*retloc));
switch (retTy) {
case Ity_INVALID:
/* Function doesn't return a value. */
*retloc = mk_RetLoc_simple(RLPri_None);
break;
case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
*retloc = mk_RetLoc_simple(RLPri_Int);
break;
case Ity_V128:
*retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
*stackAdjustAfterCall = 16;
break;
case Ity_V256:
*retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
*stackAdjustAfterCall = 32;
break;
default:
/* IR can denote other possible return types, but we don't
handle those here. */
vassert(0);
}
/* Finally, generate the call itself. This needs the *retloc value
set in the switch above, which is why it's at the end. */
addInstr(env,
AMD64Instr_Call(cc, Ptr_to_ULong(cee->addr), n_args, *retloc));
}
/* Given a guest-state array descriptor, an index expression and a
bias, generate an AMD64AMode holding the relevant guest state
offset. */
static
AMD64AMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr,
IRExpr* off, Int bias )
{
HReg tmp, roff;
Int elemSz = sizeofIRType(descr->elemTy);
Int nElems = descr->nElems;
/* Throw out any cases not generated by an amd64 front end. In
theory there might be a day where we need to handle them -- if
we ever run non-amd64-guest on amd64 host. */
if (nElems != 8 || (elemSz != 1 && elemSz != 8))
vpanic("genGuestArrayOffset(amd64 host)");
/* Compute off into a reg, %off. Then return:
movq %off, %tmp
addq $bias, %tmp (if bias != 0)
andq %tmp, 7
... base(%rbp, %tmp, shift) ...
*/
tmp = newVRegI(env);
roff = iselIntExpr_R(env, off);
addInstr(env, mk_iMOVsd_RR(roff, tmp));
if (bias != 0) {
/* Make sure the bias is sane, in the sense that there are
no significant bits above bit 30 in it. */
vassert(-10000 < bias && bias < 10000);
addInstr(env,
AMD64Instr_Alu64R(Aalu_ADD, AMD64RMI_Imm(bias), tmp));
}
addInstr(env,
AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(7), tmp));
vassert(elemSz == 1 || elemSz == 8);
return
AMD64AMode_IRRS( descr->base, hregAMD64_RBP(), tmp,
elemSz==8 ? 3 : 0);
}
/* Set the SSE unit's rounding mode to default (%mxcsr = 0x1F80) */
static
void set_SSE_rounding_default ( ISelEnv* env )
{
/* pushq $DEFAULT_MXCSR
ldmxcsr 0(%rsp)
addq $8, %rsp
*/
AMD64AMode* zero_rsp = AMD64AMode_IR(0, hregAMD64_RSP());
addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(DEFAULT_MXCSR)));
addInstr(env, AMD64Instr_LdMXCSR(zero_rsp));
add_to_rsp(env, 8);
}
/* Mess with the FPU's rounding mode: set to the default rounding mode
(DEFAULT_FPUCW). */
static
void set_FPU_rounding_default ( ISelEnv* env )
{
/* movq $DEFAULT_FPUCW, -8(%rsp)
fldcw -8(%esp)
*/
AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
addInstr(env, AMD64Instr_Alu64M(
Aalu_MOV, AMD64RI_Imm(DEFAULT_FPUCW), m8_rsp));
addInstr(env, AMD64Instr_A87LdCW(m8_rsp));
}
/* Mess with the SSE unit's rounding mode: 'mode' is an I32-typed
expression denoting a value in the range 0 .. 3, indicating a round
mode encoded as per type IRRoundingMode. Set the SSE machinery to
have the same rounding.
*/
static
void set_SSE_rounding_mode ( ISelEnv* env, IRExpr* mode )
{
/* Note: this sequence only makes sense because DEFAULT_MXCSR has
both rounding bits == 0. If that wasn't the case, we couldn't
create a new rounding field simply by ORing the new value into
place. */
/* movq $3, %reg
andq [[mode]], %reg -- shouldn't be needed; paranoia
shlq $13, %reg
orq $DEFAULT_MXCSR, %reg
pushq %reg
ldmxcsr 0(%esp)
addq $8, %rsp
*/
HReg reg = newVRegI(env);
AMD64AMode* zero_rsp = AMD64AMode_IR(0, hregAMD64_RSP());
addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Imm(3), reg));
addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
iselIntExpr_RMI(env, mode), reg));
addInstr(env, AMD64Instr_Sh64(Ash_SHL, 13, reg));
addInstr(env, AMD64Instr_Alu64R(
Aalu_OR, AMD64RMI_Imm(DEFAULT_MXCSR), reg));
addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(reg)));
addInstr(env, AMD64Instr_LdMXCSR(zero_rsp));
add_to_rsp(env, 8);
}
/* Mess with the FPU's rounding mode: 'mode' is an I32-typed
expression denoting a value in the range 0 .. 3, indicating a round
mode encoded as per type IRRoundingMode. Set the x87 FPU to have
the same rounding.
*/
static
void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode )
{
HReg rrm = iselIntExpr_R(env, mode);
HReg rrm2 = newVRegI(env);
AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
/* movq %rrm, %rrm2
andq $3, %rrm2 -- shouldn't be needed; paranoia
shlq $10, %rrm2
orq $DEFAULT_FPUCW, %rrm2
movq %rrm2, -8(%rsp)
fldcw -8(%esp)
*/
addInstr(env, mk_iMOVsd_RR(rrm, rrm2));
addInstr(env, AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(3), rrm2));
addInstr(env, AMD64Instr_Sh64(Ash_SHL, 10, rrm2));
addInstr(env, AMD64Instr_Alu64R(Aalu_OR,
AMD64RMI_Imm(DEFAULT_FPUCW), rrm2));
addInstr(env, AMD64Instr_Alu64M(Aalu_MOV,
AMD64RI_Reg(rrm2), m8_rsp));
addInstr(env, AMD64Instr_A87LdCW(m8_rsp));
}
/* Generate all-zeroes into a new vector register.
*/
static HReg generate_zeroes_V128 ( ISelEnv* env )
{
HReg dst = newVRegV(env);
addInstr(env, AMD64Instr_SseReRg(Asse_XOR, dst, dst));
return dst;
}
/* Generate all-ones into a new vector register.
*/
static HReg generate_ones_V128 ( ISelEnv* env )
{
HReg dst = newVRegV(env);
addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, dst, dst));
return dst;
}
/* Generate !src into a new vector register. Amazing that there isn't
a less crappy way to do this.
*/
static HReg do_sse_NotV128 ( ISelEnv* env, HReg src )
{
HReg dst = generate_ones_V128(env);
addInstr(env, AMD64Instr_SseReRg(Asse_XOR, src, dst));
return dst;
}
/* Expand the given byte into a 64-bit word, by cloning each bit
8 times. */
static ULong bitmask8_to_bytemask64 ( UShort w8 )
{
vassert(w8 == (w8 & 0xFF));
ULong w64 = 0;
Int i;
for (i = 0; i < 8; i++) {
if (w8 & (1<<i))
w64 |= (0xFFULL << (8 * i));
}
return w64;
}
/*---------------------------------------------------------*/
/*--- ISEL: Integer expressions (64/32/16/8 bit) ---*/
/*---------------------------------------------------------*/
/* Select insns for an integer-typed expression, and add them to the
code list. Return a reg holding the result. This reg will be a
virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
want to modify it, ask for a new vreg, copy it in there, and modify
the copy. The register allocator will do its best to map both
vregs to the same real register, so the copies will often disappear
later in the game.
This should handle expressions of 64, 32, 16 and 8-bit type. All
results are returned in a 64-bit register. For 32-, 16- and 8-bit
expressions, the upper 32/48/56 bits are arbitrary, so you should
mask or sign extend partial values if necessary.
*/
static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
{
HReg r = iselIntExpr_R_wrk(env, e);
/* sanity checks ... */
# if 0
vex_printf("\niselIntExpr_R: "); ppIRExpr(e); vex_printf("\n");
# endif
vassert(hregClass(r) == HRcInt64);
vassert(hregIsVirtual(r));
return r;
}
/* DO NOT CALL THIS DIRECTLY ! */
static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
{
/* Used for unary/binary SIMD64 ops. */
HWord fn = 0;
Bool second_is_UInt;
MatchInfo mi;
DECLARE_PATTERN(p_1Uto8_64to1);
DECLARE_PATTERN(p_LDle8_then_8Uto64);
DECLARE_PATTERN(p_LDle16_then_16Uto64);
IRType ty = typeOfIRExpr(env->type_env,e);
switch (ty) {
case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: break;
default: vassert(0);
}
switch (e->tag) {
/* --------- TEMP --------- */
case Iex_RdTmp: {
return lookupIRTemp(env, e->Iex.RdTmp.tmp);
}
/* --------- LOAD --------- */
case Iex_Load: {
HReg dst = newVRegI(env);
AMD64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr );
/* We can't handle big-endian loads, nor load-linked. */
if (e->Iex.Load.end != Iend_LE)
goto irreducible;
if (ty == Ity_I64) {
addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,
AMD64RMI_Mem(amode), dst) );
return dst;
}
if (ty == Ity_I32) {
addInstr(env, AMD64Instr_LoadEX(4,False,amode,dst));
return dst;
}
if (ty == Ity_I16) {
addInstr(env, AMD64Instr_LoadEX(2,False,amode,dst));
return dst;
}
if (ty == Ity_I8) {
addInstr(env, AMD64Instr_LoadEX(1,False,amode,dst));
return dst;
}
break;
}
/* --------- BINARY OP --------- */
case Iex_Binop: {
AMD64AluOp aluOp;
AMD64ShiftOp shOp;
/* Pattern: Sub64(0,x) */
/* and: Sub32(0,x) */
if ((e->Iex.Binop.op == Iop_Sub64 && isZeroU64(e->Iex.Binop.arg1))
|| (e->Iex.Binop.op == Iop_Sub32 && isZeroU32(e->Iex.Binop.arg1))) {
HReg dst = newVRegI(env);
HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg2);
addInstr(env, mk_iMOVsd_RR(reg,dst));
addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst));
return dst;
}
/* Is it an addition or logical style op? */
switch (e->Iex.Binop.op) {
case Iop_Add8: case Iop_Add16: case Iop_Add32: case Iop_Add64:
aluOp = Aalu_ADD; break;
case Iop_Sub8: case Iop_Sub16: case Iop_Sub32: case Iop_Sub64:
aluOp = Aalu_SUB; break;
case Iop_And8: case Iop_And16: case Iop_And32: case Iop_And64:
aluOp = Aalu_AND; break;
case Iop_Or8: case Iop_Or16: case Iop_Or32: case Iop_Or64:
aluOp = Aalu_OR; break;
case Iop_Xor8: case Iop_Xor16: case Iop_Xor32: case Iop_Xor64:
aluOp = Aalu_XOR; break;
case Iop_Mul16: case Iop_Mul32: case Iop_Mul64:
aluOp = Aalu_MUL; break;
default:
aluOp = Aalu_INVALID; break;
}
/* For commutative ops we assume any literal
values are on the second operand. */
if (aluOp != Aalu_INVALID) {
HReg dst = newVRegI(env);
HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
addInstr(env, mk_iMOVsd_RR(reg,dst));
addInstr(env, AMD64Instr_Alu64R(aluOp, rmi, dst));
return dst;
}
/* Perhaps a shift op? */
switch (e->Iex.Binop.op) {
case Iop_Shl64: case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
shOp = Ash_SHL; break;
case Iop_Shr64: case Iop_Shr32: case Iop_Shr16: case Iop_Shr8:
shOp = Ash_SHR; break;
case Iop_Sar64: case Iop_Sar32: case Iop_Sar16: case Iop_Sar8:
shOp = Ash_SAR; break;
default:
shOp = Ash_INVALID; break;
}
if (shOp != Ash_INVALID) {
HReg dst = newVRegI(env);
/* regL = the value to be shifted */
HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
addInstr(env, mk_iMOVsd_RR(regL,dst));
/* Do any necessary widening for 32/16/8 bit operands */
switch (e->Iex.Binop.op) {
case Iop_Shr64: case Iop_Shl64: case Iop_Sar64:
break;
case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
break;
case Iop_Shr8:
addInstr(env, AMD64Instr_Alu64R(
Aalu_AND, AMD64RMI_Imm(0xFF), dst));
break;
case Iop_Shr16:
addInstr(env, AMD64Instr_Alu64R(
Aalu_AND, AMD64RMI_Imm(0xFFFF), dst));
break;
case Iop_Shr32:
addInstr(env, AMD64Instr_MovxLQ(False, dst, dst));
break;
case Iop_Sar8:
addInstr(env, AMD64Instr_Sh64(Ash_SHL, 56, dst));
addInstr(env, AMD64Instr_Sh64(Ash_SAR, 56, dst));
break;
case Iop_Sar16:
addInstr(env, AMD64Instr_Sh64(Ash_SHL, 48, dst));
addInstr(env, AMD64Instr_Sh64(Ash_SAR, 48, dst));
break;
case Iop_Sar32:
addInstr(env, AMD64Instr_MovxLQ(True, dst, dst));
break;
default:
ppIROp(e->Iex.Binop.op);
vassert(0);
}
/* Now consider the shift amount. If it's a literal, we
can do a much better job than the general case. */
if (e->Iex.Binop.arg2->tag == Iex_Const) {
/* assert that the IR is well-typed */
Int nshift;
vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
vassert(nshift >= 0);
if (nshift > 0)
/* Can't allow nshift==0 since that means %cl */
addInstr(env, AMD64Instr_Sh64(shOp, nshift, dst));
} else {
/* General case; we have to force the amount into %cl. */
HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
addInstr(env, mk_iMOVsd_RR(regR,hregAMD64_RCX()));
addInstr(env, AMD64Instr_Sh64(shOp, 0/* %cl */, dst));
}
return dst;
}
/* Deal with 64-bit SIMD binary ops */
second_is_UInt = False;
switch (e->Iex.Binop.op) {
case Iop_Add8x8:
fn = (HWord)h_generic_calc_Add8x8; break;
case Iop_Add16x4:
fn = (HWord)h_generic_calc_Add16x4; break;
case Iop_Add32x2:
fn = (HWord)h_generic_calc_Add32x2; break;
case Iop_Avg8Ux8:
fn = (HWord)h_generic_calc_Avg8Ux8; break;
case Iop_Avg16Ux4:
fn = (HWord)h_generic_calc_Avg16Ux4; break;
case Iop_CmpEQ8x8:
fn = (HWord)h_generic_calc_CmpEQ8x8; break;
case Iop_CmpEQ16x4:
fn = (HWord)h_generic_calc_CmpEQ16x4; break;
case Iop_CmpEQ32x2:
fn = (HWord)h_generic_calc_CmpEQ32x2; break;
case Iop_CmpGT8Sx8:
fn = (HWord)h_generic_calc_CmpGT8Sx8; break;
case Iop_CmpGT16Sx4:
fn = (HWord)h_generic_calc_CmpGT16Sx4; break;
case Iop_CmpGT32Sx2:
fn = (HWord)h_generic_calc_CmpGT32Sx2; break;
case Iop_InterleaveHI8x8:
fn = (HWord)h_generic_calc_InterleaveHI8x8; break;
case Iop_InterleaveLO8x8:
fn = (HWord)h_generic_calc_InterleaveLO8x8; break;
case Iop_InterleaveHI16x4:
fn = (HWord)h_generic_calc_InterleaveHI16x4; break;
case Iop_InterleaveLO16x4:
fn = (HWord)h_generic_calc_InterleaveLO16x4; break;
case Iop_InterleaveHI32x2:
fn = (HWord)h_generic_calc_InterleaveHI32x2; break;
case Iop_InterleaveLO32x2:
fn = (HWord)h_generic_calc_InterleaveLO32x2; break;
case Iop_CatOddLanes16x4:
fn = (HWord)h_generic_calc_CatOddLanes16x4; break;
case Iop_CatEvenLanes16x4:
fn = (HWord)h_generic_calc_CatEvenLanes16x4; break;
case Iop_Perm8x8:
fn = (HWord)h_generic_calc_Perm8x8; break;
case Iop_Max8Ux8:
fn = (HWord)h_generic_calc_Max8Ux8; break;
case Iop_Max16Sx4:
fn = (HWord)h_generic_calc_Max16Sx4; break;
case Iop_Min8Ux8:
fn = (HWord)h_generic_calc_Min8Ux8; break;
case Iop_Min16Sx4:
fn = (HWord)h_generic_calc_Min16Sx4; break;
case Iop_Mul16x4:
fn = (HWord)h_generic_calc_Mul16x4; break;
case Iop_Mul32x2:
fn = (HWord)h_generic_calc_Mul32x2; break;
case Iop_MulHi16Sx4:
fn = (HWord)h_generic_calc_MulHi16Sx4; break;
case Iop_MulHi16Ux4:
fn = (HWord)h_generic_calc_MulHi16Ux4; break;
case Iop_QAdd8Sx8:
fn = (HWord)h_generic_calc_QAdd8Sx8; break;
case Iop_QAdd16Sx4:
fn = (HWord)h_generic_calc_QAdd16Sx4; break;
case Iop_QAdd8Ux8:
fn = (HWord)h_generic_calc_QAdd8Ux8; break;
case Iop_QAdd16Ux4:
fn = (HWord)h_generic_calc_QAdd16Ux4; break;
case Iop_QNarrowBin32Sto16Sx4:
fn = (HWord)h_generic_calc_QNarrowBin32Sto16Sx4; break;
case Iop_QNarrowBin16Sto8Sx8:
fn = (HWord)h_generic_calc_QNarrowBin16Sto8Sx8; break;
case Iop_QNarrowBin16Sto8Ux8:
fn = (HWord)h_generic_calc_QNarrowBin16Sto8Ux8; break;
case Iop_NarrowBin16to8x8:
fn = (HWord)h_generic_calc_NarrowBin16to8x8; break;
case Iop_NarrowBin32to16x4:
fn = (HWord)h_generic_calc_NarrowBin32to16x4; break;
case Iop_QSub8Sx8:
fn = (HWord)h_generic_calc_QSub8Sx8; break;
case Iop_QSub16Sx4:
fn = (HWord)h_generic_calc_QSub16Sx4; break;
case Iop_QSub8Ux8:
fn = (HWord)h_generic_calc_QSub8Ux8; break;
case Iop_QSub16Ux4:
fn = (HWord)h_generic_calc_QSub16Ux4; break;
case Iop_Sub8x8:
fn = (HWord)h_generic_calc_Sub8x8; break;
case Iop_Sub16x4:
fn = (HWord)h_generic_calc_Sub16x4; break;
case Iop_Sub32x2:
fn = (HWord)h_generic_calc_Sub32x2; break;
case Iop_ShlN32x2:
fn = (HWord)h_generic_calc_ShlN32x2;
second_is_UInt = True;
break;
case Iop_ShlN16x4:
fn = (HWord)h_generic_calc_ShlN16x4;
second_is_UInt = True;
break;
case Iop_ShlN8x8:
fn = (HWord)h_generic_calc_ShlN8x8;
second_is_UInt = True;
break;
case Iop_ShrN32x2:
fn = (HWord)h_generic_calc_ShrN32x2;
second_is_UInt = True;
break;
case Iop_ShrN16x4:
fn = (HWord)h_generic_calc_ShrN16x4;
second_is_UInt = True;
break;
case Iop_SarN32x2:
fn = (HWord)h_generic_calc_SarN32x2;
second_is_UInt = True;
break;
case Iop_SarN16x4:
fn = (HWord)h_generic_calc_SarN16x4;
second_is_UInt = True;
break;
case Iop_SarN8x8:
fn = (HWord)h_generic_calc_SarN8x8;
second_is_UInt = True;
break;
default:
fn = (HWord)0; break;
}
if (fn != (HWord)0) {
/* Note: the following assumes all helpers are of signature
ULong fn ( ULong, ULong ), and they are
not marked as regparm functions.
*/
HReg dst = newVRegI(env);
HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
if (second_is_UInt)
addInstr(env, AMD64Instr_MovxLQ(False, argR, argR));
addInstr(env, mk_iMOVsd_RR(argL, hregAMD64_RDI()) );
addInstr(env, mk_iMOVsd_RR(argR, hregAMD64_RSI()) );
addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 2,
mk_RetLoc_simple(RLPri_Int) ));
addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
return dst;
}
/* Handle misc other ops. */
if (e->Iex.Binop.op == Iop_Max32U) {
HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
HReg dst = newVRegI(env);
HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
addInstr(env, mk_iMOVsd_RR(src1, dst));
addInstr(env, AMD64Instr_Alu32R(Aalu_CMP, AMD64RMI_Reg(src2), dst));
addInstr(env, AMD64Instr_CMov64(Acc_B, AMD64RM_Reg(src2), dst));
return dst;
}
if (e->Iex.Binop.op == Iop_DivModS64to32
|| e->Iex.Binop.op == Iop_DivModU64to32) {
/* 64 x 32 -> (32(rem),32(div)) division */
/* Get the 64-bit operand into edx:eax, and the other into
any old R/M. */
HReg rax = hregAMD64_RAX();
HReg rdx = hregAMD64_RDX();
HReg dst = newVRegI(env);
Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS64to32);
AMD64RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
/* Compute the left operand into a reg, and then
put the top half in edx and the bottom in eax. */
HReg left64 = iselIntExpr_R(env, e->Iex.Binop.arg1);
addInstr(env, mk_iMOVsd_RR(left64, rdx));
addInstr(env, mk_iMOVsd_RR(left64, rax));
addInstr(env, AMD64Instr_Sh64(Ash_SHR, 32, rdx));
addInstr(env, AMD64Instr_Div(syned, 4, rmRight));
addInstr(env, AMD64Instr_MovxLQ(False, rdx, rdx));
addInstr(env, AMD64Instr_MovxLQ(False, rax, rax));
addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, rdx));
addInstr(env, mk_iMOVsd_RR(rax, dst));
addInstr(env, AMD64Instr_Alu64R(Aalu_OR, AMD64RMI_Reg(rdx), dst));
return dst;
}
if (e->Iex.Binop.op == Iop_32HLto64) {
HReg hi32 = newVRegI(env);
HReg lo32 = newVRegI(env);
HReg hi32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
HReg lo32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
addInstr(env, mk_iMOVsd_RR(hi32s, hi32));
addInstr(env, mk_iMOVsd_RR(lo32s, lo32));
addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, hi32));
addInstr(env, AMD64Instr_MovxLQ(False, lo32, lo32));
addInstr(env, AMD64Instr_Alu64R(
Aalu_OR, AMD64RMI_Reg(lo32), hi32));
return hi32;
}
if (e->Iex.Binop.op == Iop_16HLto32) {
HReg hi16 = newVRegI(env);
HReg lo16 = newVRegI(env);
HReg hi16s = iselIntExpr_R(env, e->Iex.Binop.arg1);
HReg lo16s = iselIntExpr_R(env, e->Iex.Binop.arg2);
addInstr(env, mk_iMOVsd_RR(hi16s, hi16));
addInstr(env, mk_iMOVsd_RR(lo16s, lo16));
addInstr(env, AMD64Instr_Sh64(Ash_SHL, 16, hi16));
addInstr(env, AMD64Instr_Alu64R(
Aalu_AND, AMD64RMI_Imm(0xFFFF), lo16));
addInstr(env, AMD64Instr_Alu64R(
Aalu_OR, AMD64RMI_Reg(lo16), hi16));
return hi16;
}
if (e->Iex.Binop.op == Iop_8HLto16) {
HReg hi8 = newVRegI(env);
HReg lo8 = newVRegI(env);
HReg hi8s = iselIntExpr_R(env, e->Iex.Binop.arg1);
HReg lo8s = iselIntExpr_R(env, e->Iex.Binop.arg2);
addInstr(env, mk_iMOVsd_RR(hi8s, hi8));
addInstr(env, mk_iMOVsd_RR(lo8s, lo8));
addInstr(env, AMD64Instr_Sh64(Ash_SHL, 8, hi8));
addInstr(env, AMD64Instr_Alu64R(
Aalu_AND, AMD64RMI_Imm(0xFF), lo8));
addInstr(env, AMD64Instr_Alu64R(
Aalu_OR, AMD64RMI_Reg(lo8), hi8));
return hi8;
}
if (e->Iex.Binop.op == Iop_MullS32
|| e->Iex.Binop.op == Iop_MullS16
|| e->Iex.Binop.op == Iop_MullS8
|| e->Iex.Binop.op == Iop_MullU32
|| e->Iex.Binop.op == Iop_MullU16
|| e->Iex.Binop.op == Iop_MullU8) {
HReg a32 = newVRegI(env);
HReg b32 = newVRegI(env);
HReg a32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
HReg b32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
Int shift = 0;
AMD64ShiftOp shr_op = Ash_SHR;
switch (e->Iex.Binop.op) {
case Iop_MullS32: shr_op = Ash_SAR; shift = 32; break;
case Iop_MullS16: shr_op = Ash_SAR; shift = 48; break;
case Iop_MullS8: shr_op = Ash_SAR; shift = 56; break;
case Iop_MullU32: shr_op = Ash_SHR; shift = 32; break;
case Iop_MullU16: shr_op = Ash_SHR; shift = 48; break;
case Iop_MullU8: shr_op = Ash_SHR; shift = 56; break;
default: vassert(0);
}
addInstr(env, mk_iMOVsd_RR(a32s, a32));
addInstr(env, mk_iMOVsd_RR(b32s, b32));
addInstr(env, AMD64Instr_Sh64(Ash_SHL, shift, a32));
addInstr(env, AMD64Instr_Sh64(Ash_SHL, shift, b32));
addInstr(env, AMD64Instr_Sh64(shr_op, shift, a32));
addInstr(env, AMD64Instr_Sh64(shr_op, shift, b32));
addInstr(env, AMD64Instr_Alu64R(Aalu_MUL, AMD64RMI_Reg(a32), b32));
return b32;
}
if (e->Iex.Binop.op == Iop_CmpF64) {
HReg fL = iselDblExpr(env, e->Iex.Binop.arg1);
HReg fR = iselDblExpr(env, e->Iex.Binop.arg2);
HReg dst = newVRegI(env);
addInstr(env, AMD64Instr_SseUComIS(8,fL,fR,dst));
/* Mask out irrelevant parts of the result so as to conform
to the CmpF64 definition. */
addInstr(env, AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(0x45), dst));
return dst;
}
if (e->Iex.Binop.op == Iop_F64toI32S
|| e->Iex.Binop.op == Iop_F64toI64S) {
Int szD = e->Iex.Binop.op==Iop_F64toI32S ? 4 : 8;
HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
HReg dst = newVRegI(env);
set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
addInstr(env, AMD64Instr_SseSF2SI( 8, szD, rf, dst ));
set_SSE_rounding_default(env);
return dst;
}
break;
}
/* --------- UNARY OP --------- */
case Iex_Unop: {
/* 1Uto8(64to1(expr64)) */
{
DEFINE_PATTERN( p_1Uto8_64to1,
unop(Iop_1Uto8, unop(Iop_64to1, bind(0))) );
if (matchIRExpr(&mi,p_1Uto8_64to1,e)) {
IRExpr* expr64 = mi.bindee[0];
HReg dst = newVRegI(env);
HReg src = iselIntExpr_R(env, expr64);
addInstr(env, mk_iMOVsd_RR(src,dst) );
addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
AMD64RMI_Imm(1), dst));
return dst;
}
}
/* 8Uto64(LDle(expr64)) */
{
DEFINE_PATTERN(p_LDle8_then_8Uto64,
unop(Iop_8Uto64,
IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
if (matchIRExpr(&mi,p_LDle8_then_8Uto64,e)) {
HReg dst = newVRegI(env);
AMD64AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
addInstr(env, AMD64Instr_LoadEX(1,False,amode,dst));
return dst;
}
}
/* 16Uto64(LDle(expr64)) */
{
DEFINE_PATTERN(p_LDle16_then_16Uto64,
unop(Iop_16Uto64,
IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
if (matchIRExpr(&mi,p_LDle16_then_16Uto64,e)) {
HReg dst = newVRegI(env);
AMD64AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
addInstr(env, AMD64Instr_LoadEX(2,False,amode,dst));
return dst;
}
}
/* 32Uto64( Add32/Sub32/And32/Or32/Xor32(expr32, expr32) )
Use 32 bit arithmetic and let the default zero-extend rule
do the 32Uto64 for free. */
if (e->Iex.Unop.op == Iop_32Uto64 && e->Iex.Unop.arg->tag == Iex_Binop) {
IROp opi = e->Iex.Unop.arg->Iex.Binop.op; /* inner op */
IRExpr* argL = e->Iex.Unop.arg->Iex.Binop.arg1;
IRExpr* argR = e->Iex.Unop.arg->Iex.Binop.arg2;
AMD64AluOp aluOp = Aalu_INVALID;
switch (opi) {
case Iop_Add32: aluOp = Aalu_ADD; break;
case Iop_Sub32: aluOp = Aalu_SUB; break;
case Iop_And32: aluOp = Aalu_AND; break;
case Iop_Or32: aluOp = Aalu_OR; break;
case Iop_Xor32: aluOp = Aalu_XOR; break;
default: break;
}
if (aluOp != Aalu_INVALID) {
/* For commutative ops we assume any literal values are on
the second operand. */
HReg dst = newVRegI(env);
HReg reg = iselIntExpr_R(env, argL);
AMD64RMI* rmi = iselIntExpr_RMI(env, argR);
addInstr(env, mk_iMOVsd_RR(reg,dst));
addInstr(env, AMD64Instr_Alu32R(aluOp, rmi, dst));
return dst;
}
/* just fall through to normal handling for Iop_32Uto64 */
}
/* Fallback cases */
switch (e->Iex.Unop.op) {
case Iop_32Uto64:
case Iop_32Sto64: {
HReg dst = newVRegI(env);
HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
addInstr(env, AMD64Instr_MovxLQ(e->Iex.Unop.op == Iop_32Sto64,
src, dst) );
return dst;
}
case Iop_128HIto64: {
HReg rHi, rLo;
iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
return rHi; /* and abandon rLo */
}
case Iop_128to64: {
HReg rHi, rLo;
iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
return rLo; /* and abandon rHi */
}
case Iop_8Uto16:
case Iop_8Uto32:
case Iop_8Uto64:
case Iop_16Uto64:
case Iop_16Uto32: {
HReg dst = newVRegI(env);
HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
Bool srcIs16 = toBool( e->Iex.Unop.op==Iop_16Uto32
|| e->Iex.Unop.op==Iop_16Uto64 );
UInt mask = srcIs16 ? 0xFFFF : 0xFF;
addInstr(env, mk_iMOVsd_RR(src,dst) );
addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
AMD64RMI_Imm(mask), dst));
return dst;
}
case Iop_8Sto16:
case Iop_8Sto64:
case Iop_8Sto32:
case Iop_16Sto32:
case Iop_16Sto64: {
HReg dst = newVRegI(env);
HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
Bool srcIs16 = toBool( e->Iex.Unop.op==Iop_16Sto32
|| e->Iex.Unop.op==Iop_16Sto64 );
UInt amt = srcIs16 ? 48 : 56;
addInstr(env, mk_iMOVsd_RR(src,dst) );
addInstr(env, AMD64Instr_Sh64(Ash_SHL, amt, dst));
addInstr(env, AMD64Instr_Sh64(Ash_SAR, amt, dst));
return dst;
}
case Iop_Not8:
case Iop_Not16:
case Iop_Not32:
case Iop_Not64: {
HReg dst = newVRegI(env);
HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
addInstr(env, mk_iMOVsd_RR(src,dst) );
addInstr(env, AMD64Instr_Unary64(Aun_NOT,dst));
return dst;
}
case Iop_16HIto8:
case Iop_32HIto16:
case Iop_64HIto32: {
HReg dst = newVRegI(env);
HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
Int shift = 0;
switch (e->Iex.Unop.op) {
case Iop_16HIto8: shift = 8; break;
case Iop_32HIto16: shift = 16; break;
case Iop_64HIto32: shift = 32; break;
default: vassert(0);
}
addInstr(env, mk_iMOVsd_RR(src,dst) );
addInstr(env, AMD64Instr_Sh64(Ash_SHR, shift, dst));
return dst;
}
case Iop_1Uto64:
case Iop_1Uto32:
case Iop_1Uto8: {
HReg dst = newVRegI(env);
AMD64CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
addInstr(env, AMD64Instr_Set64(cond,dst));
return dst;
}
case Iop_1Sto8:
case Iop_1Sto16:
case Iop_1Sto32:
case Iop_1Sto64: {
/* could do better than this, but for now ... */
HReg dst = newVRegI(env);
AMD64CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
addInstr(env, AMD64Instr_Set64(cond,dst));
addInstr(env, AMD64Instr_Sh64(Ash_SHL, 63, dst));
addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst));
return dst;
}
case Iop_Ctz64: {
/* Count trailing zeroes, implemented by amd64 'bsfq' */
HReg dst = newVRegI(env);
HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
addInstr(env, AMD64Instr_Bsfr64(True,src,dst));
return dst;
}
case Iop_Clz64: {
/* Count leading zeroes. Do 'bsrq' to establish the index
of the highest set bit, and subtract that value from
63. */
HReg tmp = newVRegI(env);
HReg dst = newVRegI(env);
HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
addInstr(env, AMD64Instr_Bsfr64(False,src,tmp));
addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,
AMD64RMI_Imm(63), dst));
addInstr(env, AMD64Instr_Alu64R(Aalu_SUB,
AMD64RMI_Reg(tmp), dst));
return dst;
}
case Iop_CmpwNEZ64: {
HReg dst = newVRegI(env);
HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
addInstr(env, mk_iMOVsd_RR(src,dst));
addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst));
addInstr(env, AMD64Instr_Alu64R(Aalu_OR,
AMD64RMI_Reg(src), dst));
addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst));
return dst;
}
case Iop_CmpwNEZ32: {
HReg src = newVRegI(env);
HReg dst = newVRegI(env);
HReg pre = iselIntExpr_R(env, e->Iex.Unop.arg);
addInstr(env, mk_iMOVsd_RR(pre,src));
addInstr(env, AMD64Instr_MovxLQ(False, src, src));
addInstr(env, mk_iMOVsd_RR(src,dst));
addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst));
addInstr(env, AMD64Instr_Alu64R(Aalu_OR,
AMD64RMI_Reg(src), dst));
addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst));
return dst;
}
case Iop_Left8:
case Iop_Left16:
case Iop_Left32:
case Iop_Left64: {
HReg dst = newVRegI(env);
HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
addInstr(env, mk_iMOVsd_RR(src, dst));
addInstr(env, AMD64Instr_Unary64(Aun_NEG, dst));
addInstr(env, AMD64Instr_Alu64R(Aalu_OR, AMD64RMI_Reg(src), dst));
return dst;
}
case Iop_V128to32: {
HReg dst = newVRegI(env);
HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP());
addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, rsp_m16));
addInstr(env, AMD64Instr_LoadEX(4, False/*z-widen*/, rsp_m16, dst));
return dst;
}
/* V128{HI}to64 */
case Iop_V128HIto64:
case Iop_V128to64: {
HReg dst = newVRegI(env);
Int off = e->Iex.Unop.op==Iop_V128HIto64 ? -8 : -16;
HReg rsp = hregAMD64_RSP();
HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp);
AMD64AMode* off_rsp = AMD64AMode_IR(off, rsp);
addInstr(env, AMD64Instr_SseLdSt(False/*store*/,
16, vec, m16_rsp));
addInstr(env, AMD64Instr_Alu64R( Aalu_MOV,
AMD64RMI_Mem(off_rsp), dst ));
return dst;
}
case Iop_V256to64_0: case Iop_V256to64_1:
case Iop_V256to64_2: case Iop_V256to64_3: {
HReg vHi, vLo, vec;
iselDVecExpr(&vHi, &vLo, env, e->Iex.Unop.arg);
/* Do the first part of the selection by deciding which of
the 128 bit registers do look at, and second part using
the same scheme as for V128{HI}to64 above. */
Int off = 0;
switch (e->Iex.Unop.op) {
case Iop_V256to64_0: vec = vLo; off = -16; break;
case Iop_V256to64_1: vec = vLo; off = -8; break;
case Iop_V256to64_2: vec = vHi; off = -16; break;
case Iop_V256to64_3: vec = vHi; off = -8; break;
default: vassert(0);
}
HReg dst = newVRegI(env);
HReg rsp = hregAMD64_RSP();
AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp);
AMD64AMode* off_rsp = AMD64AMode_IR(off, rsp);
addInstr(env, AMD64Instr_SseLdSt(False/*store*/,
16, vec, m16_rsp));
addInstr(env, AMD64Instr_Alu64R( Aalu_MOV,
AMD64RMI_Mem(off_rsp), dst ));
return dst;
}
/* ReinterpF64asI64(e) */
/* Given an IEEE754 double, produce an I64 with the same bit
pattern. */
case Iop_ReinterpF64asI64: {
AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
HReg dst = newVRegI(env);
HReg src = iselDblExpr(env, e->Iex.Unop.arg);
/* paranoia */
set_SSE_rounding_default(env);
addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, src, m8_rsp));
addInstr(env, AMD64Instr_Alu64R(
Aalu_MOV, AMD64RMI_Mem(m8_rsp), dst));
return dst;
}
/* ReinterpF32asI32(e) */
/* Given an IEEE754 single, produce an I64 with the same bit
pattern in the lower half. */
case Iop_ReinterpF32asI32: {
AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
HReg dst = newVRegI(env);
HReg src = iselFltExpr(env, e->Iex.Unop.arg);
/* paranoia */
set_SSE_rounding_default(env);
addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, src, m8_rsp));
addInstr(env, AMD64Instr_LoadEX(4, False/*unsigned*/, m8_rsp, dst ));
return dst;
}
case Iop_16to8:
case Iop_32to8:
case Iop_64to8:
case Iop_32to16:
case Iop_64to16:
case Iop_64to32:
/* These are no-ops. */
return iselIntExpr_R(env, e->Iex.Unop.arg);
case Iop_GetMSBs8x8: {
/* Note: the following assumes the helper is of
signature
UInt fn ( ULong ), and is not a regparm fn.
*/
HReg dst = newVRegI(env);
HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
fn = (HWord)h_generic_calc_GetMSBs8x8;
addInstr(env, mk_iMOVsd_RR(arg, hregAMD64_RDI()) );
addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn,
1, mk_RetLoc_simple(RLPri_Int) ));
/* MovxLQ is not exactly the right thing here. We just
need to get the bottom 8 bits of RAX into dst, and zero
out everything else. Assuming that the helper returns
a UInt with the top 24 bits zeroed out, it'll do,
though. */
addInstr(env, AMD64Instr_MovxLQ(False, hregAMD64_RAX(), dst));
return dst;
}
case Iop_GetMSBs8x16: {
/* Note: the following assumes the helper is of signature
UInt fn ( ULong w64hi, ULong w64Lo ),
and is not a regparm fn. */
HReg dst = newVRegI(env);
HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
HReg rsp = hregAMD64_RSP();
fn = (HWord)h_generic_calc_GetMSBs8x16;
AMD64AMode* m8_rsp = AMD64AMode_IR( -8, rsp);
AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp);
addInstr(env, AMD64Instr_SseLdSt(False/*store*/,
16, vec, m16_rsp));
/* hi 64 bits into RDI -- the first arg */
addInstr(env, AMD64Instr_Alu64R( Aalu_MOV,
AMD64RMI_Mem(m8_rsp),
hregAMD64_RDI() )); /* 1st arg */
/* lo 64 bits into RSI -- the 2nd arg */
addInstr(env, AMD64Instr_Alu64R( Aalu_MOV,
AMD64RMI_Mem(m16_rsp),
hregAMD64_RSI() )); /* 2nd arg */
addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn,
2, mk_RetLoc_simple(RLPri_Int) ));
/* MovxLQ is not exactly the right thing here. We just
need to get the bottom 16 bits of RAX into dst, and zero
out everything else. Assuming that the helper returns
a UInt with the top 16 bits zeroed out, it'll do,
though. */
addInstr(env, AMD64Instr_MovxLQ(False, hregAMD64_RAX(), dst));
return dst;
}
default:
break;
}
/* Deal with unary 64-bit SIMD ops. */
switch (e->Iex.Unop.op) {
case Iop_CmpNEZ32x2:
fn = (HWord)h_generic_calc_CmpNEZ32x2; break;
case Iop_CmpNEZ16x4:
fn = (HWord)h_generic_calc_CmpNEZ16x4; break;
case Iop_CmpNEZ8x8:
fn = (HWord)h_generic_calc_CmpNEZ8x8; break;
default:
fn = (HWord)0; break;
}
if (fn != (HWord)0) {
/* Note: the following assumes all helpers are of
signature
ULong fn ( ULong ), and they are
not marked as regparm functions.
*/
HReg dst = newVRegI(env);
HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
addInstr(env, mk_iMOVsd_RR(arg, hregAMD64_RDI()) );
addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 1,
mk_RetLoc_simple(RLPri_Int) ));
addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
return dst;
}
break;
}
/* --------- GET --------- */
case Iex_Get: {
if (ty == Ity_I64) {
HReg dst = newVRegI(env);
addInstr(env, AMD64Instr_Alu64R(
Aalu_MOV,
AMD64RMI_Mem(
AMD64AMode_IR(e->Iex.Get.offset,
hregAMD64_RBP())),
dst));
return dst;
}
if (ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32) {
HReg dst = newVRegI(env);
addInstr(env, AMD64Instr_LoadEX(
toUChar(ty==Ity_I8 ? 1 : (ty==Ity_I16 ? 2 : 4)),
False,
AMD64AMode_IR(e->Iex.Get.offset,hregAMD64_RBP()),
dst));
return dst;
}
break;
}
case Iex_GetI: {
AMD64AMode* am
= genGuestArrayOffset(
env, e->Iex.GetI.descr,
e->Iex.GetI.ix, e->Iex.GetI.bias );
HReg dst = newVRegI(env);
if (ty == Ity_I8) {
addInstr(env, AMD64Instr_LoadEX( 1, False, am, dst ));
return dst;
}
if (ty == Ity_I64) {
addInstr(env, AMD64Instr_Alu64R( Aalu_MOV, AMD64RMI_Mem(am), dst ));
return dst;
}
break;
}
/* --------- CCALL --------- */
case Iex_CCall: {
HReg dst = newVRegI(env);
vassert(ty == e->Iex.CCall.retty);
/* be very restrictive for now. Only 64-bit ints allowed for
args, and 64 or 32 bits for return type. */
if (e->Iex.CCall.retty != Ity_I64 && e->Iex.CCall.retty != Ity_I32)
goto irreducible;
/* Marshal args, do the call. */
UInt addToSp = 0;
RetLoc rloc = mk_RetLoc_INVALID();
doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
e->Iex.CCall.cee, e->Iex.CCall.retty, e->Iex.CCall.args );
vassert(is_sane_RetLoc(rloc));
vassert(rloc.pri == RLPri_Int);
vassert(addToSp == 0);
/* Move to dst, and zero out the top 32 bits if the result type is
Ity_I32. Probably overkill, but still .. */
if (e->Iex.CCall.retty == Ity_I64)
addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
else
addInstr(env, AMD64Instr_MovxLQ(False, hregAMD64_RAX(), dst));
return dst;
}
/* --------- LITERAL --------- */
/* 64/32/16/8-bit literals */
case Iex_Const:
if (ty == Ity_I64) {
HReg r = newVRegI(env);
addInstr(env, AMD64Instr_Imm64(e->Iex.Const.con->Ico.U64, r));
return r;
} else {
AMD64RMI* rmi = iselIntExpr_RMI ( env, e );
HReg r = newVRegI(env);
addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, rmi, r));
return r;
}
/* --------- MULTIPLEX --------- */
case Iex_ITE: { // VFD
if ((ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8)
&& typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
HReg r1 = iselIntExpr_R(env, e->Iex.ITE.iftrue);
AMD64RM* r0 = iselIntExpr_RM(env, e->Iex.ITE.iffalse);
HReg dst = newVRegI(env);
addInstr(env, mk_iMOVsd_RR(r1,dst));
AMD64CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
addInstr(env, AMD64Instr_CMov64(cc ^ 1, r0, dst));
return dst;
}
break;
}
/* --------- TERNARY OP --------- */
case Iex_Triop: {
IRTriop *triop = e->Iex.Triop.details;
/* C3210 flags following FPU partial remainder (fprem), both
IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
if (triop->op == Iop_PRemC3210F64
|| triop->op == Iop_PRem1C3210F64) {
AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
HReg arg1 = iselDblExpr(env, triop->arg2);
HReg arg2 = iselDblExpr(env, triop->arg3);
HReg dst = newVRegI(env);
addInstr(env, AMD64Instr_A87Free(2));
/* one arg -> top of x87 stack */
addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg2, m8_rsp));
addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
/* other arg -> top of x87 stack */
addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg1, m8_rsp));
addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
switch (triop->op) {
case Iop_PRemC3210F64:
addInstr(env, AMD64Instr_A87FpOp(Afp_PREM));
break;
case Iop_PRem1C3210F64:
addInstr(env, AMD64Instr_A87FpOp(Afp_PREM1));
break;
default:
vassert(0);
}
/* Ignore the result, and instead make off with the FPU's
C3210 flags (in the status word). */
addInstr(env, AMD64Instr_A87StSW(m8_rsp));
addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,AMD64RMI_Mem(m8_rsp),dst));
addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0x4700),dst));
return dst;
}
break;
}
default:
break;
} /* switch (e->tag) */
/* We get here if no pattern matched. */
irreducible:
ppIRExpr(e);
vpanic("iselIntExpr_R(amd64): cannot reduce tree");
}
/*---------------------------------------------------------*/
/*--- ISEL: Integer expression auxiliaries ---*/
/*---------------------------------------------------------*/
/* --------------------- AMODEs --------------------- */
/* Return an AMode which computes the value of the specified
expression, possibly also adding insns to the code list as a
result. The expression may only be a 32-bit one.
*/
static AMD64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e )
{
AMD64AMode* am = iselIntExpr_AMode_wrk(env, e);
vassert(sane_AMode(am));
return am;
}
/* DO NOT CALL THIS DIRECTLY ! */
static AMD64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e )
{
MatchInfo mi;
DECLARE_PATTERN(p_complex);
IRType ty = typeOfIRExpr(env->type_env,e);
vassert(ty == Ity_I64);
/* Add64( Add64(expr1, Shl64(expr2, imm8)), simm32 ) */
/* bind0 bind1 bind2 bind3 */
DEFINE_PATTERN(p_complex,
binop( Iop_Add64,
binop( Iop_Add64,
bind(0),
binop(Iop_Shl64, bind(1), bind(2))
),
bind(3)
)
);
if (matchIRExpr(&mi, p_complex, e)) {
IRExpr* expr1 = mi.bindee[0];
IRExpr* expr2 = mi.bindee[1];
IRExpr* imm8 = mi.bindee[2];
IRExpr* simm32 = mi.bindee[3];
if (imm8->tag == Iex_Const
&& imm8->Iex.Const.con->tag == Ico_U8
&& imm8->Iex.Const.con->Ico.U8 < 4
/* imm8 is OK, now check simm32 */
&& simm32->tag == Iex_Const
&& simm32->Iex.Const.con->tag == Ico_U64
&& fitsIn32Bits(simm32->Iex.Const.con->Ico.U64)) {
UInt shift = imm8->Iex.Const.con->Ico.U8;
UInt offset = toUInt(simm32->Iex.Const.con->Ico.U64);
HReg r1 = iselIntExpr_R(env, expr1);
HReg r2 = iselIntExpr_R(env, expr2);
vassert(shift == 0 || shift == 1 || shift == 2 || shift == 3);
return AMD64AMode_IRRS(offset, r1, r2, shift);
}
}
/* Add64(expr1, Shl64(expr2, imm)) */
if (e->tag == Iex_Binop
&& e->Iex.Binop.op == Iop_Add64
&& e->Iex.Binop.arg2->tag == Iex_Binop
&& e->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl64
&& e->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
&& e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
UInt shift = e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
if (shift == 1 || shift == 2 || shift == 3) {
HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg2->Iex.Binop.arg1 );
return AMD64AMode_IRRS(0, r1, r2, shift);
}
}
/* Add64(expr,i) */
if (e->tag == Iex_Binop
&& e->Iex.Binop.op == Iop_Add64
&& e->Iex.Binop.arg2->tag == Iex_Const
&& e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64
&& fitsIn32Bits(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)) {
HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
return AMD64AMode_IR(
toUInt(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64),
r1
);
}
/* Doesn't match anything in particular. Generate it into
a register and use that. */
{
HReg r1 = iselIntExpr_R(env, e);
return AMD64AMode_IR(0, r1);
}
}
/* --------------------- RMIs --------------------- */
/* Similarly, calculate an expression into an X86RMI operand. As with
iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
static AMD64RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e )
{
AMD64RMI* rmi = iselIntExpr_RMI_wrk(env, e);
/* sanity checks ... */
switch (rmi->tag) {
case Armi_Imm:
return rmi;
case Armi_Reg:
vassert(hregClass(rmi->Armi.Reg.reg) == HRcInt64);
vassert(hregIsVirtual(rmi->Armi.Reg.reg));
return rmi;
case Armi_Mem:
vassert(sane_AMode(rmi->Armi.Mem.am));
return rmi;
default:
vpanic("iselIntExpr_RMI: unknown amd64 RMI tag");
}
}
/* DO NOT CALL THIS DIRECTLY ! */
static AMD64RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e )
{
IRType ty = typeOfIRExpr(env->type_env,e);
vassert(ty == Ity_I64 || ty == Ity_I32
|| ty == Ity_I16 || ty == Ity_I8);
/* special case: immediate 64/32/16/8 */
if (e->tag == Iex_Const) {
switch (e->Iex.Const.con->tag) {
case Ico_U64:
if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) {
return AMD64RMI_Imm(toUInt(e->Iex.Const.con->Ico.U64));
}
break;
case Ico_U32:
return AMD64RMI_Imm(e->Iex.Const.con->Ico.U32); break;
case Ico_U16:
return AMD64RMI_Imm(0xFFFF & e->Iex.Const.con->Ico.U16); break;
case Ico_U8:
return AMD64RMI_Imm(0xFF & e->Iex.Const.con->Ico.U8); break;
default:
vpanic("iselIntExpr_RMI.Iex_Const(amd64)");
}
}
/* special case: 64-bit GET */
if (e->tag == Iex_Get && ty == Ity_I64) {
return AMD64RMI_Mem(AMD64AMode_IR(e->Iex.Get.offset,
hregAMD64_RBP()));
}
/* special case: 64-bit load from memory */
if (e->tag == Iex_Load && ty == Ity_I64
&& e->Iex.Load.end == Iend_LE) {
AMD64AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
return AMD64RMI_Mem(am);
}
/* default case: calculate into a register and return that */
{
HReg r = iselIntExpr_R ( env, e );
return AMD64RMI_Reg(r);
}
}
/* --------------------- RIs --------------------- */
/* Calculate an expression into an AMD64RI operand. As with
iselIntExpr_R, the expression can have type 64, 32, 16 or 8
bits. */
static AMD64RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e )
{
AMD64RI* ri = iselIntExpr_RI_wrk(env, e);
/* sanity checks ... */
switch (ri->tag) {
case Ari_Imm:
return ri;
case Ari_Reg:
vassert(hregClass(ri->Ari.Reg.reg) == HRcInt64);
vassert(hregIsVirtual(ri->Ari.Reg.reg));
return ri;
default:
vpanic("iselIntExpr_RI: unknown amd64 RI tag");
}
}
/* DO NOT CALL THIS DIRECTLY ! */
static AMD64RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e )
{
IRType ty = typeOfIRExpr(env->type_env,e);
vassert(ty == Ity_I64 || ty == Ity_I32
|| ty == Ity_I16 || ty == Ity_I8);
/* special case: immediate */
if (e->tag == Iex_Const) {
switch (e->Iex.Const.con->tag) {
case Ico_U64:
if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) {
return AMD64RI_Imm(toUInt(e->Iex.Const.con->Ico.U64));
}
break;
case Ico_U32:
return AMD64RI_Imm(e->Iex.Const.con->Ico.U32);
case Ico_U16:
return AMD64RI_Imm(0xFFFF & e->Iex.Const.con->Ico.U16);
case Ico_U8:
return AMD64RI_Imm(0xFF & e->Iex.Const.con->Ico.U8);
default:
vpanic("iselIntExpr_RMI.Iex_Const(amd64)");
}
}
/* default case: calculate into a register and return that */
{
HReg r = iselIntExpr_R ( env, e );
return AMD64RI_Reg(r);
}
}
/* --------------------- RMs --------------------- */
/* Similarly, calculate an expression into an AMD64RM operand. As
with iselIntExpr_R, the expression can have type 64, 32, 16 or 8
bits. */
static AMD64RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e )
{
AMD64RM* rm = iselIntExpr_RM_wrk(env, e);
/* sanity checks ... */
switch (rm->tag) {
case Arm_Reg:
vassert(hregClass(rm->Arm.Reg.reg) == HRcInt64);
vassert(hregIsVirtual(rm->Arm.Reg.reg));
return rm;
case Arm_Mem:
vassert(sane_AMode(rm->Arm.Mem.am));
return rm;
default:
vpanic("iselIntExpr_RM: unknown amd64 RM tag");
}
}
/* DO NOT CALL THIS DIRECTLY ! */
static AMD64RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e )
{
IRType ty = typeOfIRExpr(env->type_env,e);
vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
/* special case: 64-bit GET */
if (e->tag == Iex_Get && ty == Ity_I64) {
return AMD64RM_Mem(AMD64AMode_IR(e->Iex.Get.offset,
hregAMD64_RBP()));
}
/* special case: load from memory */
/* default case: calculate into a register and return that */
{
HReg r = iselIntExpr_R ( env, e );
return AMD64RM_Reg(r);
}
}
/* --------------------- CONDCODE --------------------- */
/* Generate code to evaluated a bit-typed expression, returning the
condition code which would correspond when the expression would
notionally have returned 1. */
static AMD64CondCode iselCondCode ( ISelEnv* env, IRExpr* e )
{
/* Uh, there's nothing we can sanity check here, unfortunately. */
return iselCondCode_wrk(env,e);
}
/* DO NOT CALL THIS DIRECTLY ! */
static AMD64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
{
MatchInfo mi;
vassert(e);
vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
/* var */
if (e->tag == Iex_RdTmp) {
HReg r64 = lookupIRTemp(env, e->Iex.RdTmp.tmp);
HReg dst = newVRegI(env);
addInstr(env, mk_iMOVsd_RR(r64,dst));
addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(1),dst));
return Acc_NZ;
}
/* Constant 1:Bit */
if (e->tag == Iex_Const) {
HReg r;
vassert(e->Iex.Const.con->tag == Ico_U1);
vassert(e->Iex.Const.con->Ico.U1 == True
|| e->Iex.Const.con->Ico.U1 == False);
r = newVRegI(env);
addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,AMD64RMI_Imm(0),r));
addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,AMD64RMI_Reg(r),r));
return e->Iex.Const.con->Ico.U1 ? Acc_Z : Acc_NZ;
}
/* Not1(...) */
if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
/* Generate code for the arg, and negate the test condition */
return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
}
/* --- patterns rooted at: 64to1 --- */
/* 64to1 */
if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_64to1) {
HReg reg = iselIntExpr_R(env, e->Iex.Unop.arg);
addInstr(env, AMD64Instr_Test64(1,reg));
return Acc_NZ;
}
/* --- patterns rooted at: 32to1 --- */
/* 32to1 */
if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_32to1) {
HReg reg = iselIntExpr_R(env, e->Iex.Unop.arg);
addInstr(env, AMD64Instr_Test64(1,reg));
return Acc_NZ;
}
/* --- patterns rooted at: CmpNEZ8 --- */
/* CmpNEZ8(x) */
if (e->tag == Iex_Unop
&& e->Iex.Unop.op == Iop_CmpNEZ8) {
HReg r = iselIntExpr_R(env, e->Iex.Unop.arg);
addInstr(env, AMD64Instr_Test64(0xFF,r));
return Acc_NZ;
}
/* --- patterns rooted at: CmpNEZ16 --- */
/* CmpNEZ16(x) */
if (e->tag == Iex_Unop
&& e->Iex.Unop.op == Iop_CmpNEZ16) {
HReg r = iselIntExpr_R(env, e->Iex.Unop.arg);
addInstr(env, AMD64Instr_Test64(0xFFFF,r));
return Acc_NZ;
}
/* --- patterns rooted at: CmpNEZ32 --- */
/* CmpNEZ32(x) */
if (e->tag == Iex_Unop
&& e->Iex.Unop.op == Iop_CmpNEZ32) {
HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
AMD64RMI* rmi2 = AMD64RMI_Imm(0);
addInstr(env, AMD64Instr_Alu32R(Aalu_CMP,rmi2,r1));
return Acc_NZ;
}
/* --- patterns rooted at: CmpNEZ64 --- */
/* CmpNEZ64(Or64(x,y)) */
{
DECLARE_PATTERN(p_CmpNEZ64_Or64);
DEFINE_PATTERN(p_CmpNEZ64_Or64,
unop(Iop_CmpNEZ64, binop(Iop_Or64, bind(0), bind(1))));
if (matchIRExpr(&mi, p_CmpNEZ64_Or64, e)) {
HReg r0 = iselIntExpr_R(env, mi.bindee[0]);
AMD64RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
HReg tmp = newVRegI(env);
addInstr(env, mk_iMOVsd_RR(r0, tmp));
addInstr(env, AMD64Instr_Alu64R(Aalu_OR,rmi1,tmp));
return Acc_NZ;
}
}
/* CmpNEZ64(x) */
if (e->tag == Iex_Unop
&& e->Iex.Unop.op == Iop_CmpNEZ64) {
HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
AMD64RMI* rmi2 = AMD64RMI_Imm(0);
addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,r1));
return Acc_NZ;
}
/* --- patterns rooted at: Cmp{EQ,NE}{8,16,32} --- */
/* CmpEQ8 / CmpNE8 */
if (e->tag == Iex_Binop
&& (e->Iex.Binop.op == Iop_CmpEQ8
|| e->Iex.Binop.op == Iop_CmpNE8
|| e->Iex.Binop.op == Iop_CasCmpEQ8
|| e->Iex.Binop.op == Iop_CasCmpNE8)) {
if (isZeroU8(e->Iex.Binop.arg2)) {
HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
addInstr(env, AMD64Instr_Test64(0xFF,r1));
switch (e->Iex.Binop.op) {
case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Acc_Z;
case Iop_CmpNE8: case Iop_CasCmpNE8: return Acc_NZ;
default: vpanic("iselCondCode(amd64): CmpXX8(expr,0:I8)");
}
} else {
HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
HReg r = newVRegI(env);
addInstr(env, mk_iMOVsd_RR(r1,r));
addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,rmi2,r));
addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0xFF),r));
switch (e->Iex.Binop.op) {
case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Acc_Z;
case Iop_CmpNE8: case Iop_CasCmpNE8: return Acc_NZ;
default: vpanic("iselCondCode(amd64): CmpXX8(expr,expr)");
}
}
}
/* CmpEQ16 / CmpNE16 */
if (e->tag == Iex_Binop
&& (e->Iex.Binop.op == Iop_CmpEQ16
|| e->Iex.Binop.op == Iop_CmpNE16
|| e->Iex.Binop.op == Iop_CasCmpEQ16
|| e->Iex.Binop.op == Iop_CasCmpNE16)) {
HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
HReg r = newVRegI(env);
addInstr(env, mk_iMOVsd_RR(r1,r));
addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,rmi2,r));
addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0xFFFF),r));
switch (e->Iex.Binop.op) {
case Iop_CmpEQ16: case Iop_CasCmpEQ16: return Acc_Z;
case Iop_CmpNE16: case Iop_CasCmpNE16: return Acc_NZ;
default: vpanic("iselCondCode(amd64): CmpXX16");
}
}
/* CmpNE64(ccall, 64-bit constant) (--smc-check=all optimisation).
Saves a "movq %rax, %tmp" compared to the default route. */
if (e->tag == Iex_Binop
&& e->Iex.Binop.op == Iop_CmpNE64
&& e->Iex.Binop.arg1->tag == Iex_CCall
&& e->Iex.Binop.arg2->tag == Iex_Const) {
IRExpr* cal = e->Iex.Binop.arg1;
IRExpr* con = e->Iex.Binop.arg2;
HReg tmp = newVRegI(env);
/* clone & partial-eval of generic Iex_CCall and Iex_Const cases */
vassert(cal->Iex.CCall.retty == Ity_I64); /* else ill-typed IR */
vassert(con->Iex.Const.con->tag == Ico_U64);
/* Marshal args, do the call. */
UInt addToSp = 0;
RetLoc rloc = mk_RetLoc_INVALID();
doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
cal->Iex.CCall.cee,
cal->Iex.CCall.retty, cal->Iex.CCall.args );
vassert(is_sane_RetLoc(rloc));
vassert(rloc.pri == RLPri_Int);
vassert(addToSp == 0);
/* */
addInstr(env, AMD64Instr_Imm64(con->Iex.Const.con->Ico.U64, tmp));
addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,
AMD64RMI_Reg(hregAMD64_RAX()), tmp));
return Acc_NZ;
}
/* Cmp*64*(x,y) */
if (e->tag == Iex_Binop
&& (e->Iex.Binop.op == Iop_CmpEQ64
|| e->Iex.Binop.op == Iop_CmpNE64
|| e->Iex.Binop.op == Iop_CmpLT64S
|| e->Iex.Binop.op == Iop_CmpLT64U
|| e->Iex.Binop.op == Iop_CmpLE64S
|| e->Iex.Binop.op == Iop_CmpLE64U
|| e->Iex.Binop.op == Iop_CasCmpEQ64
|| e->Iex.Binop.op == Iop_CasCmpNE64
|| e->Iex.Binop.op == Iop_ExpCmpNE64)) {
HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,r1));
switch (e->Iex.Binop.op) {
case Iop_CmpEQ64: case Iop_CasCmpEQ64: return Acc_Z;
case Iop_CmpNE64:
case Iop_CasCmpNE64: case Iop_ExpCmpNE64: return Acc_NZ;
case Iop_CmpLT64S: return Acc_L;
case Iop_CmpLT64U: return Acc_B;
case Iop_CmpLE64S: return Acc_LE;
case Iop_CmpLE64U: return Acc_BE;
default: vpanic("iselCondCode(amd64): CmpXX64");
}
}
/* Cmp*32*(x,y) */
if (e->tag == Iex_Binop
&& (e->Iex.Binop.op == Iop_CmpEQ32
|| e->Iex.Binop.op == Iop_CmpNE32
|| e->Iex.Binop.op == Iop_CmpLT32S
|| e->Iex.Binop.op == Iop_CmpLT32U
|| e->Iex.Binop.op == Iop_CmpLE32S
|| e->Iex.Binop.op == Iop_CmpLE32U
|| e->Iex.Binop.op == Iop_CasCmpEQ32
|| e->Iex.Binop.op == Iop_CasCmpNE32
|| e->Iex.Binop.op == Iop_ExpCmpNE32)) {
HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
addInstr(env, AMD64Instr_Alu32R(Aalu_CMP,rmi2,r1));
switch (e->Iex.Binop.op) {
case Iop_CmpEQ32: case Iop_CasCmpEQ32: return Acc_Z;
case Iop_CmpNE32:
case Iop_CasCmpNE32: case Iop_ExpCmpNE32: return Acc_NZ;
case Iop_CmpLT32S: return Acc_L;
case Iop_CmpLT32U: return Acc_B;
case Iop_CmpLE32S: return Acc_LE;
case Iop_CmpLE32U: return Acc_BE;
default: vpanic("iselCondCode(amd64): CmpXX32");
}
}
ppIRExpr(e);
vpanic("iselCondCode(amd64)");
}
/*---------------------------------------------------------*/
/*--- ISEL: Integer expressions (128 bit) ---*/
/*---------------------------------------------------------*/
/* Compute a 128-bit value into a register pair, which is returned as
the first two parameters. As with iselIntExpr_R, these may be
either real or virtual regs; in any case they must not be changed
by subsequent code emitted by the caller. */
static void iselInt128Expr ( HReg* rHi, HReg* rLo,
ISelEnv* env, IRExpr* e )
{
iselInt128Expr_wrk(rHi, rLo, env, e);
# if 0
vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
# endif
vassert(hregClass(*rHi) == HRcInt64);
vassert(hregIsVirtual(*rHi));
vassert(hregClass(*rLo) == HRcInt64);
vassert(hregIsVirtual(*rLo));
}
/* DO NOT CALL THIS DIRECTLY ! */
static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
ISelEnv* env, IRExpr* e )
{
vassert(e);
vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
/* read 128-bit IRTemp */
if (e->tag == Iex_RdTmp) {
lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp);
return;
}
/* --------- BINARY ops --------- */
if (e->tag == Iex_Binop) {
switch (e->Iex.Binop.op) {
/* 64 x 64 -> 128 multiply */
case Iop_MullU64:
case Iop_MullS64: {
/* get one operand into %rax, and the other into a R/M.
Need to make an educated guess about which is better in
which. */
HReg tLo = newVRegI(env);
HReg tHi = newVRegI(env);
Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64);
AMD64RM* rmLeft = iselIntExpr_RM(env, e->Iex.Binop.arg1);
HReg rRight = iselIntExpr_R(env, e->Iex.Binop.arg2);