/*---------------------------------------------------------------*/ | |

/*--- begin libvex_ir.h ---*/ | |

/*---------------------------------------------------------------*/ | |

/* | |

This file is part of Valgrind, a dynamic binary instrumentation | |

framework. | |

Copyright (C) 2004-2013 OpenWorks LLP | |

info@open-works.net | |

This program is free software; you can redistribute it and/or | |

modify it under the terms of the GNU General Public License as | |

published by the Free Software Foundation; either version 2 of the | |

License, or (at your option) any later version. | |

This program is distributed in the hope that it will be useful, but | |

WITHOUT ANY WARRANTY; without even the implied warranty of | |

MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |

General Public License for more details. | |

You should have received a copy of the GNU General Public License | |

along with this program; if not, write to the Free Software | |

Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA | |

02110-1301, USA. | |

The GNU General Public License is contained in the file COPYING. | |

Neither the names of the U.S. Department of Energy nor the | |

University of California nor the names of its contributors may be | |

used to endorse or promote products derived from this software | |

without prior written permission. | |

*/ | |

#ifndef __LIBVEX_IR_H | |

#define __LIBVEX_IR_H | |

#include "libvex_basictypes.h" | |

/*---------------------------------------------------------------*/ | |

/*--- High-level IR description ---*/ | |

/*---------------------------------------------------------------*/ | |

/* Vex IR is an architecture-neutral intermediate representation. | |

Unlike some IRs in systems similar to Vex, it is not like assembly | |

language (ie. a list of instructions). Rather, it is more like the | |

IR that might be used in a compiler. | |

Code blocks | |

~~~~~~~~~~~ | |

The code is broken into small code blocks ("superblocks", type: | |

'IRSB'). Each code block typically represents from 1 to perhaps 50 | |

instructions. IRSBs are single-entry, multiple-exit code blocks. | |

Each IRSB contains three things: | |

- a type environment, which indicates the type of each temporary | |

value present in the IRSB | |

- a list of statements, which represent code | |

- a jump that exits from the end the IRSB | |

Because the blocks are multiple-exit, there can be additional | |

conditional exit statements that cause control to leave the IRSB | |

before the final exit. Also because of this, IRSBs can cover | |

multiple non-consecutive sequences of code (up to 3). These are | |

recorded in the type VexGuestExtents (see libvex.h). | |

Statements and expressions | |

~~~~~~~~~~~~~~~~~~~~~~~~~~ | |

Statements (type 'IRStmt') represent operations with side-effects, | |

eg. guest register writes, stores, and assignments to temporaries. | |

Expressions (type 'IRExpr') represent operations without | |

side-effects, eg. arithmetic operations, loads, constants. | |

Expressions can contain sub-expressions, forming expression trees, | |

eg. (3 + (4 * load(addr1)). | |

Storage of guest state | |

~~~~~~~~~~~~~~~~~~~~~~ | |

The "guest state" contains the guest registers of the guest machine | |

(ie. the machine that we are simulating). It is stored by default | |

in a block of memory supplied by the user of the VEX library, | |

generally referred to as the guest state (area). To operate on | |

these registers, one must first read ("Get") them from the guest | |

state into a temporary value. Afterwards, one can write ("Put") | |

them back into the guest state. | |

Get and Put are characterised by a byte offset into the guest | |

state, a small integer which effectively gives the identity of the | |

referenced guest register, and a type, which indicates the size of | |

the value to be transferred. | |

The basic "Get" and "Put" operations are sufficient to model normal | |

fixed registers on the guest. Selected areas of the guest state | |

can be treated as a circular array of registers (type: | |

'IRRegArray'), which can be indexed at run-time. This is done with | |

the "GetI" and "PutI" primitives. This is necessary to describe | |

rotating register files, for example the x87 FPU stack, SPARC | |

register windows, and the Itanium register files. | |

Examples, and flattened vs. unflattened code | |

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |

For example, consider this x86 instruction: | |

addl %eax, %ebx | |

One Vex IR translation for this code would be this: | |

------ IMark(0x24F275, 7, 0) ------ | |

t3 = GET:I32(0) # get %eax, a 32-bit integer | |

t2 = GET:I32(12) # get %ebx, a 32-bit integer | |

t1 = Add32(t3,t2) # addl | |

PUT(0) = t1 # put %eax | |

(For simplicity, this ignores the effects on the condition codes, and | |

the update of the instruction pointer.) | |

The "IMark" is an IR statement that doesn't represent actual code. | |

Instead it indicates the address and length of the original | |

instruction. The numbers 0 and 12 are offsets into the guest state | |

for %eax and %ebx. The full list of offsets for an architecture | |

<ARCH> can be found in the type VexGuest<ARCH>State in the file | |

VEX/pub/libvex_guest_<ARCH>.h. | |

The five statements in this example are: | |

- the IMark | |

- three assignments to temporaries | |

- one register write (put) | |

The six expressions in this example are: | |

- two register reads (gets) | |

- one arithmetic (add) operation | |

- three temporaries (two nested within the Add32, one in the PUT) | |

The above IR is "flattened", ie. all sub-expressions are "atoms", | |

either constants or temporaries. An equivalent, unflattened version | |

would be: | |

PUT(0) = Add32(GET:I32(0), GET:I32(12)) | |

IR is guaranteed to be flattened at instrumentation-time. This makes | |

instrumentation easier. Equivalent flattened and unflattened IR | |

typically results in the same generated code. | |

Another example, this one showing loads and stores: | |

addl %edx,4(%eax) | |

This becomes (again ignoring condition code and instruction pointer | |

updates): | |

------ IMark(0x4000ABA, 3, 0) ------ | |

t3 = Add32(GET:I32(0),0x4:I32) | |

t2 = LDle:I32(t3) | |

t1 = GET:I32(8) | |

t0 = Add32(t2,t1) | |

STle(t3) = t0 | |

The "le" in "LDle" and "STle" is short for "little-endian". | |

No need for deallocations | |

~~~~~~~~~~~~~~~~~~~~~~~~~ | |

Although there are allocation functions for various data structures | |

in this file, there are no deallocation functions. This is because | |

Vex uses a memory allocation scheme that automatically reclaims the | |

memory used by allocated structures once translation is completed. | |

This makes things easier for tools that instruments/transforms code | |

blocks. | |

SSAness and typing | |

~~~~~~~~~~~~~~~~~~ | |

The IR is fully typed. For every IRSB (IR block) it is possible to | |

say unambiguously whether or not it is correctly typed. | |

Incorrectly typed IR has no meaning and the VEX will refuse to | |

process it. At various points during processing VEX typechecks the | |

IR and aborts if any violations are found. This seems overkill but | |

makes it a great deal easier to build a reliable JIT. | |

IR also has the SSA property. SSA stands for Static Single | |

Assignment, and what it means is that each IR temporary may be | |

assigned to only once. This idea became widely used in compiler | |

construction in the mid to late 90s. It makes many IR-level | |

transformations/code improvements easier, simpler and faster. | |

Whenever it typechecks an IR block, VEX also checks the SSA | |

property holds, and will abort if not so. So SSAness is | |

mechanically and rigidly enforced. | |

*/ | |

/*---------------------------------------------------------------*/ | |

/*--- Type definitions for the IR ---*/ | |

/*---------------------------------------------------------------*/ | |

/* General comments about naming schemes: | |

All publically visible functions contain the name of the primary | |

type on which they operate (IRFoo, IRBar, etc). Hence you should | |

be able to identify these functions by grepping for "IR[A-Z]". | |

For some type 'IRFoo': | |

- ppIRFoo is the printing method for IRFoo, printing it to the | |

output channel specified in the LibVEX_Initialise call. | |

- eqIRFoo is a structural equality predicate for IRFoos. | |

- deepCopyIRFoo is a deep copy constructor for IRFoos. | |

It recursively traverses the entire argument tree and | |

produces a complete new tree. All types have a deep copy | |

constructor. | |

- shallowCopyIRFoo is the shallow copy constructor for IRFoos. | |

It creates a new top-level copy of the supplied object, | |

but does not copy any sub-objects. Only some types have a | |

shallow copy constructor. | |

*/ | |

/* ------------------ Types ------------------ */ | |

/* A type indicates the size of a value, and whether it's an integer, a | |

float, or a vector (SIMD) value. */ | |

typedef | |

enum { | |

Ity_INVALID=0x1100, | |

Ity_I1, | |

Ity_I8, | |

Ity_I16, | |

Ity_I32, | |

Ity_I64, | |

Ity_I128, /* 128-bit scalar */ | |

Ity_F32, /* IEEE 754 float */ | |

Ity_F64, /* IEEE 754 double */ | |

Ity_D32, /* 32-bit Decimal floating point */ | |

Ity_D64, /* 64-bit Decimal floating point */ | |

Ity_D128, /* 128-bit Decimal floating point */ | |

Ity_F128, /* 128-bit floating point; implementation defined */ | |

Ity_V128, /* 128-bit SIMD */ | |

Ity_V256 /* 256-bit SIMD */ | |

} | |

IRType; | |

/* Pretty-print an IRType */ | |

extern void ppIRType ( IRType ); | |

/* Get the size (in bytes) of an IRType */ | |

extern Int sizeofIRType ( IRType ); | |

/* Translate 1/2/4/8 into Ity_I{8,16,32,64} respectively. Asserts on | |

any other input. */ | |

extern IRType integerIRTypeOfSize ( Int szB ); | |

/* ------------------ Endianness ------------------ */ | |

/* IREndness is used in load IRExprs and store IRStmts. */ | |

typedef | |

enum { | |

Iend_LE=0x1200, /* little endian */ | |

Iend_BE /* big endian */ | |

} | |

IREndness; | |

/* ------------------ Constants ------------------ */ | |

/* IRConsts are used within 'Const' and 'Exit' IRExprs. */ | |

/* The various kinds of constant. */ | |

typedef | |

enum { | |

Ico_U1=0x1300, | |

Ico_U8, | |

Ico_U16, | |

Ico_U32, | |

Ico_U64, | |

Ico_F32, /* 32-bit IEEE754 floating */ | |

Ico_F32i, /* 32-bit unsigned int to be interpreted literally | |

as a IEEE754 single value. */ | |

Ico_F64, /* 64-bit IEEE754 floating */ | |

Ico_F64i, /* 64-bit unsigned int to be interpreted literally | |

as a IEEE754 double value. */ | |

Ico_V128, /* 128-bit restricted vector constant, with 1 bit | |

(repeated 8 times) for each of the 16 x 1-byte lanes */ | |

Ico_V256 /* 256-bit restricted vector constant, with 1 bit | |

(repeated 8 times) for each of the 32 x 1-byte lanes */ | |

} | |

IRConstTag; | |

/* A constant. Stored as a tagged union. 'tag' indicates what kind of | |

constant this is. 'Ico' is the union that holds the fields. If an | |

IRConst 'c' has c.tag equal to Ico_U32, then it's a 32-bit constant, | |

and its value can be accessed with 'c.Ico.U32'. */ | |

typedef | |

struct _IRConst { | |

IRConstTag tag; | |

union { | |

Bool U1; | |

UChar U8; | |

UShort U16; | |

UInt U32; | |

ULong U64; | |

Float F32; | |

UInt F32i; | |

Double F64; | |

ULong F64i; | |

UShort V128; /* 16-bit value; see Ico_V128 comment above */ | |

UInt V256; /* 32-bit value; see Ico_V256 comment above */ | |

} Ico; | |

} | |

IRConst; | |

/* IRConst constructors */ | |

extern IRConst* IRConst_U1 ( Bool ); | |

extern IRConst* IRConst_U8 ( UChar ); | |

extern IRConst* IRConst_U16 ( UShort ); | |

extern IRConst* IRConst_U32 ( UInt ); | |

extern IRConst* IRConst_U64 ( ULong ); | |

extern IRConst* IRConst_F32 ( Float ); | |

extern IRConst* IRConst_F32i ( UInt ); | |

extern IRConst* IRConst_F64 ( Double ); | |

extern IRConst* IRConst_F64i ( ULong ); | |

extern IRConst* IRConst_V128 ( UShort ); | |

extern IRConst* IRConst_V256 ( UInt ); | |

/* Deep-copy an IRConst */ | |

extern IRConst* deepCopyIRConst ( const IRConst* ); | |

/* Pretty-print an IRConst */ | |

extern void ppIRConst ( const IRConst* ); | |

/* Compare two IRConsts for equality */ | |

extern Bool eqIRConst ( const IRConst*, const IRConst* ); | |

/* ------------------ Call targets ------------------ */ | |

/* Describes a helper function to call. The name part is purely for | |

pretty printing and not actually used. regparms=n tells the back | |

end that the callee has been declared | |

"__attribute__((regparm(n)))", although indirectly using the | |

VEX_REGPARM(n) macro. On some targets (x86) the back end will need | |

to construct a non-standard sequence to call a function declared | |

like this. | |

mcx_mask is a sop to Memcheck. It indicates which args should be | |

considered 'always defined' when lazily computing definedness of | |

the result. Bit 0 of mcx_mask corresponds to args[0], bit 1 to | |

args[1], etc. If a bit is set, the corresponding arg is excluded | |

(hence "x" in "mcx") from definedness checking. | |

*/ | |

typedef | |

struct { | |

Int regparms; | |

const HChar* name; | |

void* addr; | |

UInt mcx_mask; | |

} | |

IRCallee; | |

/* Create an IRCallee. */ | |

extern IRCallee* mkIRCallee ( Int regparms, const HChar* name, void* addr ); | |

/* Deep-copy an IRCallee. */ | |

extern IRCallee* deepCopyIRCallee ( const IRCallee* ); | |

/* Pretty-print an IRCallee. */ | |

extern void ppIRCallee ( const IRCallee* ); | |

/* ------------------ Guest state arrays ------------------ */ | |

/* This describes a section of the guest state that we want to | |

be able to index at run time, so as to be able to describe | |

indexed or rotating register files on the guest. */ | |

typedef | |

struct { | |

Int base; /* guest state offset of start of indexed area */ | |

IRType elemTy; /* type of each element in the indexed area */ | |

Int nElems; /* number of elements in the indexed area */ | |

} | |

IRRegArray; | |

extern IRRegArray* mkIRRegArray ( Int, IRType, Int ); | |

extern IRRegArray* deepCopyIRRegArray ( const IRRegArray* ); | |

extern void ppIRRegArray ( const IRRegArray* ); | |

extern Bool eqIRRegArray ( const IRRegArray*, const IRRegArray* ); | |

/* ------------------ Temporaries ------------------ */ | |

/* This represents a temporary, eg. t1. The IR optimiser relies on the | |

fact that IRTemps are 32-bit ints. Do not change them to be ints of | |

any other size. */ | |

typedef UInt IRTemp; | |

/* Pretty-print an IRTemp. */ | |

extern void ppIRTemp ( IRTemp ); | |

#define IRTemp_INVALID ((IRTemp)0xFFFFFFFF) | |

/* --------------- Primops (arity 1,2,3 and 4) --------------- */ | |

/* Primitive operations that are used in Unop, Binop, Triop and Qop | |

IRExprs. Once we take into account integer, floating point and SIMD | |

operations of all the different sizes, there are quite a lot of them. | |

Most instructions supported by the architectures that Vex supports | |

(x86, PPC, etc) are represented. Some more obscure ones (eg. cpuid) | |

are not; they are instead handled with dirty helpers that emulate | |

their functionality. Such obscure ones are thus not directly visible | |

in the IR, but their effects on guest state (memory and registers) | |

are made visible via the annotations in IRDirty structures. | |

*/ | |

typedef | |

enum { | |

/* -- Do not change this ordering. The IR generators rely on | |

(eg) Iop_Add64 == IopAdd8 + 3. -- */ | |

Iop_INVALID=0x1400, | |

Iop_Add8, Iop_Add16, Iop_Add32, Iop_Add64, | |

Iop_Sub8, Iop_Sub16, Iop_Sub32, Iop_Sub64, | |

/* Signless mul. MullS/MullU is elsewhere. */ | |

Iop_Mul8, Iop_Mul16, Iop_Mul32, Iop_Mul64, | |

Iop_Or8, Iop_Or16, Iop_Or32, Iop_Or64, | |

Iop_And8, Iop_And16, Iop_And32, Iop_And64, | |

Iop_Xor8, Iop_Xor16, Iop_Xor32, Iop_Xor64, | |

Iop_Shl8, Iop_Shl16, Iop_Shl32, Iop_Shl64, | |

Iop_Shr8, Iop_Shr16, Iop_Shr32, Iop_Shr64, | |

Iop_Sar8, Iop_Sar16, Iop_Sar32, Iop_Sar64, | |

/* Integer comparisons. */ | |

Iop_CmpEQ8, Iop_CmpEQ16, Iop_CmpEQ32, Iop_CmpEQ64, | |

Iop_CmpNE8, Iop_CmpNE16, Iop_CmpNE32, Iop_CmpNE64, | |

/* Tags for unary ops */ | |

Iop_Not8, Iop_Not16, Iop_Not32, Iop_Not64, | |

/* Exactly like CmpEQ8/16/32/64, but carrying the additional | |

hint that these compute the success/failure of a CAS | |

operation, and hence are almost certainly applied to two | |

copies of the same value, which in turn has implications for | |

Memcheck's instrumentation. */ | |

Iop_CasCmpEQ8, Iop_CasCmpEQ16, Iop_CasCmpEQ32, Iop_CasCmpEQ64, | |

Iop_CasCmpNE8, Iop_CasCmpNE16, Iop_CasCmpNE32, Iop_CasCmpNE64, | |

/* Exactly like CmpNE8/16/32/64, but carrying the additional | |

hint that these needs expensive definedness tracking. */ | |

Iop_ExpCmpNE8, Iop_ExpCmpNE16, Iop_ExpCmpNE32, Iop_ExpCmpNE64, | |

/* -- Ordering not important after here. -- */ | |

/* Widening multiplies */ | |

Iop_MullS8, Iop_MullS16, Iop_MullS32, Iop_MullS64, | |

Iop_MullU8, Iop_MullU16, Iop_MullU32, Iop_MullU64, | |

/* Wierdo integer stuff */ | |

Iop_Clz64, Iop_Clz32, /* count leading zeroes */ | |

Iop_Ctz64, Iop_Ctz32, /* count trailing zeros */ | |

/* Ctz64/Ctz32/Clz64/Clz32 are UNDEFINED when given arguments of | |

zero. You must ensure they are never given a zero argument. | |

*/ | |

/* Standard integer comparisons */ | |

Iop_CmpLT32S, Iop_CmpLT64S, | |

Iop_CmpLE32S, Iop_CmpLE64S, | |

Iop_CmpLT32U, Iop_CmpLT64U, | |

Iop_CmpLE32U, Iop_CmpLE64U, | |

/* As a sop to Valgrind-Memcheck, the following are useful. */ | |

Iop_CmpNEZ8, Iop_CmpNEZ16, Iop_CmpNEZ32, Iop_CmpNEZ64, | |

Iop_CmpwNEZ32, Iop_CmpwNEZ64, /* all-0s -> all-Os; other -> all-1s */ | |

Iop_Left8, Iop_Left16, Iop_Left32, Iop_Left64, /* \x -> x | -x */ | |

Iop_Max32U, /* unsigned max */ | |

/* PowerPC-style 3-way integer comparisons. Without them it is | |

difficult to simulate PPC efficiently. | |

op(x,y) | x < y = 0x8 else | |

| x > y = 0x4 else | |

| x == y = 0x2 | |

*/ | |

Iop_CmpORD32U, Iop_CmpORD64U, | |

Iop_CmpORD32S, Iop_CmpORD64S, | |

/* Division */ | |

/* TODO: clarify semantics wrt rounding, negative values, whatever */ | |

Iop_DivU32, // :: I32,I32 -> I32 (simple div, no mod) | |

Iop_DivS32, // ditto, signed | |

Iop_DivU64, // :: I64,I64 -> I64 (simple div, no mod) | |

Iop_DivS64, // ditto, signed | |

Iop_DivU64E, // :: I64,I64 -> I64 (dividend is 64-bit arg (hi) | |

// concat with 64 0's (low)) | |

Iop_DivS64E, // ditto, signed | |

Iop_DivU32E, // :: I32,I32 -> I32 (dividend is 32-bit arg (hi) | |

// concat with 32 0's (low)) | |

Iop_DivS32E, // ditto, signed | |

Iop_DivModU64to32, // :: I64,I32 -> I64 | |

// of which lo half is div and hi half is mod | |

Iop_DivModS64to32, // ditto, signed | |

Iop_DivModU128to64, // :: V128,I64 -> V128 | |

// of which lo half is div and hi half is mod | |

Iop_DivModS128to64, // ditto, signed | |

Iop_DivModS64to64, // :: I64,I64 -> I128 | |

// of which lo half is div and hi half is mod | |

/* Integer conversions. Some of these are redundant (eg | |

Iop_64to8 is the same as Iop_64to32 and then Iop_32to8), but | |

having a complete set reduces the typical dynamic size of IR | |

and makes the instruction selectors easier to write. */ | |

/* Widening conversions */ | |

Iop_8Uto16, Iop_8Uto32, Iop_8Uto64, | |

Iop_16Uto32, Iop_16Uto64, | |

Iop_32Uto64, | |

Iop_8Sto16, Iop_8Sto32, Iop_8Sto64, | |

Iop_16Sto32, Iop_16Sto64, | |

Iop_32Sto64, | |

/* Narrowing conversions */ | |

Iop_64to8, Iop_32to8, Iop_64to16, | |

/* 8 <-> 16 bit conversions */ | |

Iop_16to8, // :: I16 -> I8, low half | |

Iop_16HIto8, // :: I16 -> I8, high half | |

Iop_8HLto16, // :: (I8,I8) -> I16 | |

/* 16 <-> 32 bit conversions */ | |

Iop_32to16, // :: I32 -> I16, low half | |

Iop_32HIto16, // :: I32 -> I16, high half | |

Iop_16HLto32, // :: (I16,I16) -> I32 | |

/* 32 <-> 64 bit conversions */ | |

Iop_64to32, // :: I64 -> I32, low half | |

Iop_64HIto32, // :: I64 -> I32, high half | |

Iop_32HLto64, // :: (I32,I32) -> I64 | |

/* 64 <-> 128 bit conversions */ | |

Iop_128to64, // :: I128 -> I64, low half | |

Iop_128HIto64, // :: I128 -> I64, high half | |

Iop_64HLto128, // :: (I64,I64) -> I128 | |

/* 1-bit stuff */ | |

Iop_Not1, /* :: Ity_Bit -> Ity_Bit */ | |

Iop_32to1, /* :: Ity_I32 -> Ity_Bit, just select bit[0] */ | |

Iop_64to1, /* :: Ity_I64 -> Ity_Bit, just select bit[0] */ | |

Iop_1Uto8, /* :: Ity_Bit -> Ity_I8, unsigned widen */ | |

Iop_1Uto32, /* :: Ity_Bit -> Ity_I32, unsigned widen */ | |

Iop_1Uto64, /* :: Ity_Bit -> Ity_I64, unsigned widen */ | |

Iop_1Sto8, /* :: Ity_Bit -> Ity_I8, signed widen */ | |

Iop_1Sto16, /* :: Ity_Bit -> Ity_I16, signed widen */ | |

Iop_1Sto32, /* :: Ity_Bit -> Ity_I32, signed widen */ | |

Iop_1Sto64, /* :: Ity_Bit -> Ity_I64, signed widen */ | |

/* ------ Floating point. We try to be IEEE754 compliant. ------ */ | |

/* --- Simple stuff as mandated by 754. --- */ | |

/* Binary operations, with rounding. */ | |

/* :: IRRoundingMode(I32) x F64 x F64 -> F64 */ | |

Iop_AddF64, Iop_SubF64, Iop_MulF64, Iop_DivF64, | |

/* :: IRRoundingMode(I32) x F32 x F32 -> F32 */ | |

Iop_AddF32, Iop_SubF32, Iop_MulF32, Iop_DivF32, | |

/* Variants of the above which produce a 64-bit result but which | |

round their result to a IEEE float range first. */ | |

/* :: IRRoundingMode(I32) x F64 x F64 -> F64 */ | |

Iop_AddF64r32, Iop_SubF64r32, Iop_MulF64r32, Iop_DivF64r32, | |

/* Unary operations, without rounding. */ | |

/* :: F64 -> F64 */ | |

Iop_NegF64, Iop_AbsF64, | |

/* :: F32 -> F32 */ | |

Iop_NegF32, Iop_AbsF32, | |

/* Unary operations, with rounding. */ | |

/* :: IRRoundingMode(I32) x F64 -> F64 */ | |

Iop_SqrtF64, | |

/* :: IRRoundingMode(I32) x F32 -> F32 */ | |

Iop_SqrtF32, | |

/* Comparison, yielding GT/LT/EQ/UN(ordered), as per the following: | |

0x45 Unordered | |

0x01 LT | |

0x00 GT | |

0x40 EQ | |

This just happens to be the Intel encoding. The values | |

are recorded in the type IRCmpF64Result. | |

*/ | |

/* :: F64 x F64 -> IRCmpF64Result(I32) */ | |

Iop_CmpF64, | |

Iop_CmpF32, | |

Iop_CmpF128, | |

/* --- Int to/from FP conversions. --- */ | |

/* For the most part, these take a first argument :: Ity_I32 (as | |

IRRoundingMode) which is an indication of the rounding mode | |

to use, as per the following encoding ("the standard | |

encoding"): | |

00b to nearest (the default) | |

01b to -infinity | |

10b to +infinity | |

11b to zero | |

This just happens to be the Intel encoding. For reference only, | |

the PPC encoding is: | |

00b to nearest (the default) | |

01b to zero | |

10b to +infinity | |

11b to -infinity | |

Any PPC -> IR front end will have to translate these PPC | |

encodings, as encoded in the guest state, to the standard | |

encodings, to pass to the primops. | |

For reference only, the ARM VFP encoding is: | |

00b to nearest | |

01b to +infinity | |

10b to -infinity | |

11b to zero | |

Again, this will have to be converted to the standard encoding | |

to pass to primops. | |

If one of these conversions gets an out-of-range condition, | |

or a NaN, as an argument, the result is host-defined. On x86 | |

the "integer indefinite" value 0x80..00 is produced. On PPC | |

it is either 0x80..00 or 0x7F..FF depending on the sign of | |

the argument. | |

On ARMvfp, when converting to a signed integer result, the | |

overflow result is 0x80..00 for negative args and 0x7F..FF | |

for positive args. For unsigned integer results it is | |

0x00..00 and 0xFF..FF respectively. | |

Rounding is required whenever the destination type cannot | |

represent exactly all values of the source type. | |

*/ | |

Iop_F64toI16S, /* IRRoundingMode(I32) x F64 -> signed I16 */ | |

Iop_F64toI32S, /* IRRoundingMode(I32) x F64 -> signed I32 */ | |

Iop_F64toI64S, /* IRRoundingMode(I32) x F64 -> signed I64 */ | |

Iop_F64toI64U, /* IRRoundingMode(I32) x F64 -> unsigned I64 */ | |

Iop_F64toI32U, /* IRRoundingMode(I32) x F64 -> unsigned I32 */ | |

Iop_I32StoF64, /* signed I32 -> F64 */ | |

Iop_I64StoF64, /* IRRoundingMode(I32) x signed I64 -> F64 */ | |

Iop_I64UtoF64, /* IRRoundingMode(I32) x unsigned I64 -> F64 */ | |

Iop_I64UtoF32, /* IRRoundingMode(I32) x unsigned I64 -> F32 */ | |

Iop_I32UtoF32, /* IRRoundingMode(I32) x unsigned I32 -> F32 */ | |

Iop_I32UtoF64, /* unsigned I32 -> F64 */ | |

Iop_F32toI32S, /* IRRoundingMode(I32) x F32 -> signed I32 */ | |

Iop_F32toI64S, /* IRRoundingMode(I32) x F32 -> signed I64 */ | |

Iop_F32toI32U, /* IRRoundingMode(I32) x F32 -> unsigned I32 */ | |

Iop_F32toI64U, /* IRRoundingMode(I32) x F32 -> unsigned I64 */ | |

Iop_I32StoF32, /* IRRoundingMode(I32) x signed I32 -> F32 */ | |

Iop_I64StoF32, /* IRRoundingMode(I32) x signed I64 -> F32 */ | |

/* Conversion between floating point formats */ | |

Iop_F32toF64, /* F32 -> F64 */ | |

Iop_F64toF32, /* IRRoundingMode(I32) x F64 -> F32 */ | |

/* Reinterpretation. Take an F64 and produce an I64 with | |

the same bit pattern, or vice versa. */ | |

Iop_ReinterpF64asI64, Iop_ReinterpI64asF64, | |

Iop_ReinterpF32asI32, Iop_ReinterpI32asF32, | |

/* Support for 128-bit floating point */ | |

Iop_F64HLtoF128,/* (high half of F128,low half of F128) -> F128 */ | |

Iop_F128HItoF64,/* F128 -> high half of F128 into a F64 register */ | |

Iop_F128LOtoF64,/* F128 -> low half of F128 into a F64 register */ | |

/* :: IRRoundingMode(I32) x F128 x F128 -> F128 */ | |

Iop_AddF128, Iop_SubF128, Iop_MulF128, Iop_DivF128, | |

/* :: F128 -> F128 */ | |

Iop_NegF128, Iop_AbsF128, | |

/* :: IRRoundingMode(I32) x F128 -> F128 */ | |

Iop_SqrtF128, | |

Iop_I32StoF128, /* signed I32 -> F128 */ | |

Iop_I64StoF128, /* signed I64 -> F128 */ | |

Iop_I32UtoF128, /* unsigned I32 -> F128 */ | |

Iop_I64UtoF128, /* unsigned I64 -> F128 */ | |

Iop_F32toF128, /* F32 -> F128 */ | |

Iop_F64toF128, /* F64 -> F128 */ | |

Iop_F128toI32S, /* IRRoundingMode(I32) x F128 -> signed I32 */ | |

Iop_F128toI64S, /* IRRoundingMode(I32) x F128 -> signed I64 */ | |

Iop_F128toI32U, /* IRRoundingMode(I32) x F128 -> unsigned I32 */ | |

Iop_F128toI64U, /* IRRoundingMode(I32) x F128 -> unsigned I64 */ | |

Iop_F128toF64, /* IRRoundingMode(I32) x F128 -> F64 */ | |

Iop_F128toF32, /* IRRoundingMode(I32) x F128 -> F32 */ | |

/* --- guest x86/amd64 specifics, not mandated by 754. --- */ | |

/* Binary ops, with rounding. */ | |

/* :: IRRoundingMode(I32) x F64 x F64 -> F64 */ | |

Iop_AtanF64, /* FPATAN, arctan(arg1/arg2) */ | |

Iop_Yl2xF64, /* FYL2X, arg1 * log2(arg2) */ | |

Iop_Yl2xp1F64, /* FYL2XP1, arg1 * log2(arg2+1.0) */ | |

Iop_PRemF64, /* FPREM, non-IEEE remainder(arg1/arg2) */ | |

Iop_PRemC3210F64, /* C3210 flags resulting from FPREM, :: I32 */ | |

Iop_PRem1F64, /* FPREM1, IEEE remainder(arg1/arg2) */ | |

Iop_PRem1C3210F64, /* C3210 flags resulting from FPREM1, :: I32 */ | |

Iop_ScaleF64, /* FSCALE, arg1 * (2^RoundTowardsZero(arg2)) */ | |

/* Note that on x86 guest, PRem1{C3210} has the same behaviour | |

as the IEEE mandated RemF64, except it is limited in the | |

range of its operand. Hence the partialness. */ | |

/* Unary ops, with rounding. */ | |

/* :: IRRoundingMode(I32) x F64 -> F64 */ | |

Iop_SinF64, /* FSIN */ | |

Iop_CosF64, /* FCOS */ | |

Iop_TanF64, /* FTAN */ | |

Iop_2xm1F64, /* (2^arg - 1.0) */ | |

Iop_RoundF64toInt, /* F64 value to nearest integral value (still | |

as F64) */ | |

Iop_RoundF32toInt, /* F32 value to nearest integral value (still | |

as F32) */ | |

/* --- guest s390 specifics, not mandated by 754. --- */ | |

/* Fused multiply-add/sub */ | |

/* :: IRRoundingMode(I32) x F32 x F32 x F32 -> F32 | |

(computes arg2 * arg3 +/- arg4) */ | |

Iop_MAddF32, Iop_MSubF32, | |

/* --- guest ppc32/64 specifics, not mandated by 754. --- */ | |

/* Ternary operations, with rounding. */ | |

/* Fused multiply-add/sub, with 112-bit intermediate | |

precision for ppc. | |

Also used to implement fused multiply-add/sub for s390. */ | |

/* :: IRRoundingMode(I32) x F64 x F64 x F64 -> F64 | |

(computes arg2 * arg3 +/- arg4) */ | |

Iop_MAddF64, Iop_MSubF64, | |

/* Variants of the above which produce a 64-bit result but which | |

round their result to a IEEE float range first. */ | |

/* :: IRRoundingMode(I32) x F64 x F64 x F64 -> F64 */ | |

Iop_MAddF64r32, Iop_MSubF64r32, | |

/* :: F64 -> F64 */ | |

Iop_RSqrtEst5GoodF64, /* reciprocal square root estimate, 5 good bits */ | |

Iop_RoundF64toF64_NEAREST, /* frin */ | |

Iop_RoundF64toF64_NegINF, /* frim */ | |

Iop_RoundF64toF64_PosINF, /* frip */ | |

Iop_RoundF64toF64_ZERO, /* friz */ | |

/* :: F64 -> F32 */ | |

Iop_TruncF64asF32, /* do F64->F32 truncation as per 'fsts' */ | |

/* :: IRRoundingMode(I32) x F64 -> F64 */ | |

Iop_RoundF64toF32, /* round F64 to nearest F32 value (still as F64) */ | |

/* NB: pretty much the same as Iop_F64toF32, except no change | |

of type. */ | |

/* ------------------ 32-bit SIMD Integer ------------------ */ | |

/* 32x1 saturating add/sub (ok, well, not really SIMD :) */ | |

Iop_QAdd32S, | |

Iop_QSub32S, | |

/* 16x2 add/sub, also signed/unsigned saturating variants */ | |

Iop_Add16x2, Iop_Sub16x2, | |

Iop_QAdd16Sx2, Iop_QAdd16Ux2, | |

Iop_QSub16Sx2, Iop_QSub16Ux2, | |

/* 16x2 signed/unsigned halving add/sub. For each lane, these | |

compute bits 16:1 of (eg) sx(argL) + sx(argR), | |

or zx(argL) - zx(argR) etc. */ | |

Iop_HAdd16Ux2, Iop_HAdd16Sx2, | |

Iop_HSub16Ux2, Iop_HSub16Sx2, | |

/* 8x4 add/sub, also signed/unsigned saturating variants */ | |

Iop_Add8x4, Iop_Sub8x4, | |

Iop_QAdd8Sx4, Iop_QAdd8Ux4, | |

Iop_QSub8Sx4, Iop_QSub8Ux4, | |

/* 8x4 signed/unsigned halving add/sub. For each lane, these | |

compute bits 8:1 of (eg) sx(argL) + sx(argR), | |

or zx(argL) - zx(argR) etc. */ | |

Iop_HAdd8Ux4, Iop_HAdd8Sx4, | |

Iop_HSub8Ux4, Iop_HSub8Sx4, | |

/* 8x4 sum of absolute unsigned differences. */ | |

Iop_Sad8Ux4, | |

/* MISC (vector integer cmp != 0) */ | |

Iop_CmpNEZ16x2, Iop_CmpNEZ8x4, | |

/* ------------------ 64-bit SIMD FP ------------------------ */ | |

/* Convertion to/from int */ | |

Iop_I32UtoFx2, Iop_I32StoFx2, /* I32x4 -> F32x4 */ | |

Iop_FtoI32Ux2_RZ, Iop_FtoI32Sx2_RZ, /* F32x4 -> I32x4 */ | |

/* Fixed32 format is floating-point number with fixed number of fraction | |

bits. The number of fraction bits is passed as a second argument of | |

type I8. */ | |

Iop_F32ToFixed32Ux2_RZ, Iop_F32ToFixed32Sx2_RZ, /* fp -> fixed-point */ | |

Iop_Fixed32UToF32x2_RN, Iop_Fixed32SToF32x2_RN, /* fixed-point -> fp */ | |

/* Binary operations */ | |

Iop_Max32Fx2, Iop_Min32Fx2, | |

/* Pairwise Min and Max. See integer pairwise operations for more | |

details. */ | |

Iop_PwMax32Fx2, Iop_PwMin32Fx2, | |

/* Note: For the following compares, the arm front-end assumes a | |

nan in a lane of either argument returns zero for that lane. */ | |

Iop_CmpEQ32Fx2, Iop_CmpGT32Fx2, Iop_CmpGE32Fx2, | |

/* Vector Reciprocal Estimate finds an approximate reciprocal of each | |

element in the operand vector, and places the results in the destination | |

vector. */ | |

Iop_RecipEst32Fx2, | |

/* Vector Reciprocal Step computes (2.0 - arg1 * arg2). | |

Note, that if one of the arguments is zero and another one is infinity | |

of arbitrary sign the result of the operation is 2.0. */ | |

Iop_RecipStep32Fx2, | |

/* Vector Reciprocal Square Root Estimate finds an approximate reciprocal | |

square root of each element in the operand vector. */ | |

Iop_RSqrtEst32Fx2, | |

/* Vector Reciprocal Square Root Step computes (3.0 - arg1 * arg2) / 2.0. | |

Note, that of one of the arguments is zero and another one is infiinty | |

of arbitrary sign the result of the operation is 1.5. */ | |

Iop_RSqrtStep32Fx2, | |

/* Unary */ | |

Iop_Neg32Fx2, Iop_Abs32Fx2, | |

/* ------------------ 64-bit SIMD Integer. ------------------ */ | |

/* MISC (vector integer cmp != 0) */ | |

Iop_CmpNEZ8x8, Iop_CmpNEZ16x4, Iop_CmpNEZ32x2, | |

/* ADDITION (normal / unsigned sat / signed sat) */ | |

Iop_Add8x8, Iop_Add16x4, Iop_Add32x2, | |

Iop_QAdd8Ux8, Iop_QAdd16Ux4, Iop_QAdd32Ux2, Iop_QAdd64Ux1, | |

Iop_QAdd8Sx8, Iop_QAdd16Sx4, Iop_QAdd32Sx2, Iop_QAdd64Sx1, | |

/* PAIRWISE operations */ | |

/* Iop_PwFoo16x4( [a,b,c,d], [e,f,g,h] ) = | |

[Foo16(a,b), Foo16(c,d), Foo16(e,f), Foo16(g,h)] */ | |

Iop_PwAdd8x8, Iop_PwAdd16x4, Iop_PwAdd32x2, | |

Iop_PwMax8Sx8, Iop_PwMax16Sx4, Iop_PwMax32Sx2, | |

Iop_PwMax8Ux8, Iop_PwMax16Ux4, Iop_PwMax32Ux2, | |

Iop_PwMin8Sx8, Iop_PwMin16Sx4, Iop_PwMin32Sx2, | |

Iop_PwMin8Ux8, Iop_PwMin16Ux4, Iop_PwMin32Ux2, | |

/* Longening variant is unary. The resulting vector contains two times | |

less elements than operand, but they are two times wider. | |

Example: | |

Iop_PAddL16Ux4( [a,b,c,d] ) = [a+b,c+d] | |

where a+b and c+d are unsigned 32-bit values. */ | |

Iop_PwAddL8Ux8, Iop_PwAddL16Ux4, Iop_PwAddL32Ux2, | |

Iop_PwAddL8Sx8, Iop_PwAddL16Sx4, Iop_PwAddL32Sx2, | |

/* SUBTRACTION (normal / unsigned sat / signed sat) */ | |

Iop_Sub8x8, Iop_Sub16x4, Iop_Sub32x2, | |

Iop_QSub8Ux8, Iop_QSub16Ux4, Iop_QSub32Ux2, Iop_QSub64Ux1, | |

Iop_QSub8Sx8, Iop_QSub16Sx4, Iop_QSub32Sx2, Iop_QSub64Sx1, | |

/* ABSOLUTE VALUE */ | |

Iop_Abs8x8, Iop_Abs16x4, Iop_Abs32x2, | |

/* MULTIPLICATION (normal / high half of signed/unsigned / plynomial ) */ | |

Iop_Mul8x8, Iop_Mul16x4, Iop_Mul32x2, | |

Iop_Mul32Fx2, | |

Iop_MulHi16Ux4, | |

Iop_MulHi16Sx4, | |

/* Plynomial multiplication treats it's arguments as coefficients of | |

polynoms over {0, 1}. */ | |

Iop_PolynomialMul8x8, | |

/* Vector Saturating Doubling Multiply Returning High Half and | |

Vector Saturating Rounding Doubling Multiply Returning High Half */ | |

/* These IROp's multiply corresponding elements in two vectors, double | |

the results, and place the most significant half of the final results | |

in the destination vector. The results are truncated or rounded. If | |

any of the results overflow, they are saturated. */ | |

Iop_QDMulHi16Sx4, Iop_QDMulHi32Sx2, | |

Iop_QRDMulHi16Sx4, Iop_QRDMulHi32Sx2, | |

/* AVERAGING: note: (arg1 + arg2 + 1) >>u 1 */ | |

Iop_Avg8Ux8, | |

Iop_Avg16Ux4, | |

/* MIN/MAX */ | |

Iop_Max8Sx8, Iop_Max16Sx4, Iop_Max32Sx2, | |

Iop_Max8Ux8, Iop_Max16Ux4, Iop_Max32Ux2, | |

Iop_Min8Sx8, Iop_Min16Sx4, Iop_Min32Sx2, | |

Iop_Min8Ux8, Iop_Min16Ux4, Iop_Min32Ux2, | |

/* COMPARISON */ | |

Iop_CmpEQ8x8, Iop_CmpEQ16x4, Iop_CmpEQ32x2, | |

Iop_CmpGT8Ux8, Iop_CmpGT16Ux4, Iop_CmpGT32Ux2, | |

Iop_CmpGT8Sx8, Iop_CmpGT16Sx4, Iop_CmpGT32Sx2, | |

/* COUNT ones / leading zeroes / leading sign bits (not including topmost | |

bit) */ | |

Iop_Cnt8x8, | |

Iop_Clz8x8, Iop_Clz16x4, Iop_Clz32x2, | |

Iop_Cls8x8, Iop_Cls16x4, Iop_Cls32x2, | |

Iop_Clz64x2, | |

/* VECTOR x VECTOR SHIFT / ROTATE */ | |

Iop_Shl8x8, Iop_Shl16x4, Iop_Shl32x2, | |

Iop_Shr8x8, Iop_Shr16x4, Iop_Shr32x2, | |

Iop_Sar8x8, Iop_Sar16x4, Iop_Sar32x2, | |

Iop_Sal8x8, Iop_Sal16x4, Iop_Sal32x2, Iop_Sal64x1, | |

/* VECTOR x SCALAR SHIFT (shift amt :: Ity_I8) */ | |

Iop_ShlN8x8, Iop_ShlN16x4, Iop_ShlN32x2, | |

Iop_ShrN8x8, Iop_ShrN16x4, Iop_ShrN32x2, | |

Iop_SarN8x8, Iop_SarN16x4, Iop_SarN32x2, | |

/* VECTOR x VECTOR SATURATING SHIFT */ | |

Iop_QShl8x8, Iop_QShl16x4, Iop_QShl32x2, Iop_QShl64x1, | |

Iop_QSal8x8, Iop_QSal16x4, Iop_QSal32x2, Iop_QSal64x1, | |

/* VECTOR x INTEGER SATURATING SHIFT */ | |

Iop_QShlNsatSU8x8, Iop_QShlNsatSU16x4, | |

Iop_QShlNsatSU32x2, Iop_QShlNsatSU64x1, | |

Iop_QShlNsatUU8x8, Iop_QShlNsatUU16x4, | |

Iop_QShlNsatUU32x2, Iop_QShlNsatUU64x1, | |

Iop_QShlNsatSS8x8, Iop_QShlNsatSS16x4, | |

Iop_QShlNsatSS32x2, Iop_QShlNsatSS64x1, | |

/* NARROWING (binary) | |

-- narrow 2xI64 into 1xI64, hi half from left arg */ | |

/* For saturated narrowing, I believe there are 4 variants of | |

the basic arithmetic operation, depending on the signedness | |

of argument and result. Here are examples that exemplify | |

what I mean: | |

QNarrow16Uto8U ( UShort x ) if (x >u 255) x = 255; | |

return x[7:0]; | |

QNarrow16Sto8S ( Short x ) if (x <s -128) x = -128; | |

if (x >s 127) x = 127; | |

return x[7:0]; | |

QNarrow16Uto8S ( UShort x ) if (x >u 127) x = 127; | |

return x[7:0]; | |

QNarrow16Sto8U ( Short x ) if (x <s 0) x = 0; | |

if (x >s 255) x = 255; | |

return x[7:0]; | |

*/ | |

Iop_QNarrowBin16Sto8Ux8, | |

Iop_QNarrowBin16Sto8Sx8, Iop_QNarrowBin32Sto16Sx4, | |

Iop_NarrowBin16to8x8, Iop_NarrowBin32to16x4, | |

/* INTERLEAVING */ | |

/* Interleave lanes from low or high halves of | |

operands. Most-significant result lane is from the left | |

arg. */ | |

Iop_InterleaveHI8x8, Iop_InterleaveHI16x4, Iop_InterleaveHI32x2, | |

Iop_InterleaveLO8x8, Iop_InterleaveLO16x4, Iop_InterleaveLO32x2, | |

/* Interleave odd/even lanes of operands. Most-significant result lane | |

is from the left arg. Note that Interleave{Odd,Even}Lanes32x2 are | |

identical to Interleave{HI,LO}32x2 and so are omitted.*/ | |

Iop_InterleaveOddLanes8x8, Iop_InterleaveEvenLanes8x8, | |

Iop_InterleaveOddLanes16x4, Iop_InterleaveEvenLanes16x4, | |

/* CONCATENATION -- build a new value by concatenating either | |

the even or odd lanes of both operands. Note that | |

Cat{Odd,Even}Lanes32x2 are identical to Interleave{HI,LO}32x2 | |

and so are omitted. */ | |

Iop_CatOddLanes8x8, Iop_CatOddLanes16x4, | |

Iop_CatEvenLanes8x8, Iop_CatEvenLanes16x4, | |

/* GET / SET elements of VECTOR | |

GET is binop (I64, I8) -> I<elem_size> | |

SET is triop (I64, I8, I<elem_size>) -> I64 */ | |

/* Note: the arm back-end handles only constant second argument */ | |

Iop_GetElem8x8, Iop_GetElem16x4, Iop_GetElem32x2, | |

Iop_SetElem8x8, Iop_SetElem16x4, Iop_SetElem32x2, | |

/* DUPLICATING -- copy value to all lanes */ | |

Iop_Dup8x8, Iop_Dup16x4, Iop_Dup32x2, | |

/* SLICE -- produces the lowest 64 bits of (arg1:arg2) >> (8 * arg3). | |

arg3 is a shift amount in bytes and may be between 0 and 8 | |

inclusive. When 0, the result is arg2; when 8, the result is arg1. | |

Not all back ends handle all values. The arm32 and arm64 back | |

ends handle only immediate arg3 values. */ | |

Iop_Slice64, // (I64, I64, I8) -> I64 | |

/* REVERSE the order of chunks in vector lanes. Chunks must be | |

smaller than the vector lanes (obviously) and so may be 8-, | |

16- and 32-bit in size. */ | |

/* Examples: | |

Reverse8sIn16_x4([a,b,c,d,e,f,g,h]) = [b,a,d,c,f,e,h,g] | |

Reverse8sIn32_x2([a,b,c,d,e,f,g,h]) = [d,c,b,a,h,g,f,e] | |

Reverse8sIn64_x1([a,b,c,d,e,f,g,h]) = [h,g,f,e,d,c,b,a] */ | |

Iop_Reverse8sIn16_x4, | |

Iop_Reverse8sIn32_x2, Iop_Reverse16sIn32_x2, | |

Iop_Reverse8sIn64_x1, Iop_Reverse16sIn64_x1, Iop_Reverse32sIn64_x1, | |

/* PERMUTING -- copy src bytes to dst, | |

as indexed by control vector bytes: | |

for i in 0 .. 7 . result[i] = argL[ argR[i] ] | |

argR[i] values may only be in the range 0 .. 7, else behaviour | |

is undefined. */ | |

Iop_Perm8x8, | |

/* MISC CONVERSION -- get high bits of each byte lane, a la | |

x86/amd64 pmovmskb */ | |

Iop_GetMSBs8x8, /* I64 -> I8 */ | |

/* Vector Reciprocal Estimate and Vector Reciprocal Square Root Estimate | |

See floating-point equivalents for details. */ | |

Iop_RecipEst32Ux2, Iop_RSqrtEst32Ux2, | |

/* ------------------ Decimal Floating Point ------------------ */ | |

/* ARITHMETIC INSTRUCTIONS 64-bit | |

---------------------------------- | |

IRRoundingMode(I32) X D64 X D64 -> D64 | |

*/ | |

Iop_AddD64, Iop_SubD64, Iop_MulD64, Iop_DivD64, | |

/* ARITHMETIC INSTRUCTIONS 128-bit | |

---------------------------------- | |

IRRoundingMode(I32) X D128 X D128 -> D128 | |

*/ | |

Iop_AddD128, Iop_SubD128, Iop_MulD128, Iop_DivD128, | |

/* SHIFT SIGNIFICAND INSTRUCTIONS | |

* The DFP significand is shifted by the number of digits specified | |

* by the U8 operand. Digits shifted out of the leftmost digit are | |

* lost. Zeros are supplied to the vacated positions on the right. | |

* The sign of the result is the same as the sign of the original | |

* operand. | |

* | |

* D64 x U8 -> D64 left shift and right shift respectively */ | |

Iop_ShlD64, Iop_ShrD64, | |

/* D128 x U8 -> D128 left shift and right shift respectively */ | |

Iop_ShlD128, Iop_ShrD128, | |

/* FORMAT CONVERSION INSTRUCTIONS | |

* D32 -> D64 | |

*/ | |

Iop_D32toD64, | |

/* D64 -> D128 */ | |

Iop_D64toD128, | |

/* I32S -> D128 */ | |

Iop_I32StoD128, | |

/* I32U -> D128 */ | |

Iop_I32UtoD128, | |

/* I64S -> D128 */ | |

Iop_I64StoD128, | |

/* I64U -> D128 */ | |

Iop_I64UtoD128, | |

/* IRRoundingMode(I32) x D64 -> D32 */ | |

Iop_D64toD32, | |

/* IRRoundingMode(I32) x D128 -> D64 */ | |

Iop_D128toD64, | |

/* I32S -> D64 */ | |

Iop_I32StoD64, | |

/* I32U -> D64 */ | |

Iop_I32UtoD64, | |

/* IRRoundingMode(I32) x I64 -> D64 */ | |

Iop_I64StoD64, | |

/* IRRoundingMode(I32) x I64 -> D64 */ | |

Iop_I64UtoD64, | |

/* IRRoundingMode(I32) x D64 -> I32 */ | |

Iop_D64toI32S, | |

/* IRRoundingMode(I32) x D64 -> I32 */ | |

Iop_D64toI32U, | |

/* IRRoundingMode(I32) x D64 -> I64 */ | |

Iop_D64toI64S, | |

/* IRRoundingMode(I32) x D64 -> I64 */ | |

Iop_D64toI64U, | |

/* IRRoundingMode(I32) x D128 -> I32 */ | |

Iop_D128toI32S, | |

/* IRRoundingMode(I32) x D128 -> I32 */ | |

Iop_D128toI32U, | |

/* IRRoundingMode(I32) x D128 -> I64 */ | |

Iop_D128toI64S, | |

/* IRRoundingMode(I32) x D128 -> I64 */ | |

Iop_D128toI64U, | |

/* IRRoundingMode(I32) x F32 -> D32 */ | |

Iop_F32toD32, | |

/* IRRoundingMode(I32) x F32 -> D64 */ | |

Iop_F32toD64, | |

/* IRRoundingMode(I32) x F32 -> D128 */ | |

Iop_F32toD128, | |

/* IRRoundingMode(I32) x F64 -> D32 */ | |

Iop_F64toD32, | |

/* IRRoundingMode(I32) x F64 -> D64 */ | |

Iop_F64toD64, | |

/* IRRoundingMode(I32) x F64 -> D128 */ | |

Iop_F64toD128, | |

/* IRRoundingMode(I32) x F128 -> D32 */ | |

Iop_F128toD32, | |

/* IRRoundingMode(I32) x F128 -> D64 */ | |

Iop_F128toD64, | |

/* IRRoundingMode(I32) x F128 -> D128 */ | |

Iop_F128toD128, | |

/* IRRoundingMode(I32) x D32 -> F32 */ | |

Iop_D32toF32, | |

/* IRRoundingMode(I32) x D32 -> F64 */ | |

Iop_D32toF64, | |

/* IRRoundingMode(I32) x D32 -> F128 */ | |

Iop_D32toF128, | |

/* IRRoundingMode(I32) x D64 -> F32 */ | |

Iop_D64toF32, | |

/* IRRoundingMode(I32) x D64 -> F64 */ | |

Iop_D64toF64, | |

/* IRRoundingMode(I32) x D64 -> F128 */ | |

Iop_D64toF128, | |

/* IRRoundingMode(I32) x D128 -> F32 */ | |

Iop_D128toF32, | |

/* IRRoundingMode(I32) x D128 -> F64 */ | |

Iop_D128toF64, | |

/* IRRoundingMode(I32) x D128 -> F128 */ | |

Iop_D128toF128, | |

/* ROUNDING INSTRUCTIONS | |

* IRRoundingMode(I32) x D64 -> D64 | |

* The D64 operand, if a finite number, it is rounded to a | |

* floating point integer value, i.e. no fractional part. | |

*/ | |

Iop_RoundD64toInt, | |

/* IRRoundingMode(I32) x D128 -> D128 */ | |

Iop_RoundD128toInt, | |

/* COMPARE INSTRUCTIONS | |

* D64 x D64 -> IRCmpD64Result(I32) */ | |

Iop_CmpD64, | |

/* D128 x D128 -> IRCmpD128Result(I32) */ | |

Iop_CmpD128, | |

/* COMPARE BIASED EXPONENET INSTRUCTIONS | |

* D64 x D64 -> IRCmpD64Result(I32) */ | |

Iop_CmpExpD64, | |

/* D128 x D128 -> IRCmpD128Result(I32) */ | |

Iop_CmpExpD128, | |

/* QUANTIZE AND ROUND INSTRUCTIONS | |

* The source operand is converted and rounded to the form with the | |

* immediate exponent specified by the rounding and exponent parameter. | |

* | |

* The second operand is converted and rounded to the form | |

* of the first operand's exponent and the rounded based on the specified | |

* rounding mode parameter. | |

* | |

* IRRoundingMode(I32) x D64 x D64-> D64 */ | |

Iop_QuantizeD64, | |

/* IRRoundingMode(I32) x D128 x D128 -> D128 */ | |

Iop_QuantizeD128, | |

/* IRRoundingMode(I32) x I8 x D64 -> D64 | |

* The Decimal Floating point operand is rounded to the requested | |

* significance given by the I8 operand as specified by the rounding | |

* mode. | |

*/ | |

Iop_SignificanceRoundD64, | |

/* IRRoundingMode(I32) x I8 x D128 -> D128 */ | |

Iop_SignificanceRoundD128, | |

/* EXTRACT AND INSERT INSTRUCTIONS | |

* D64 -> I64 | |

* The exponent of the D32 or D64 operand is extracted. The | |

* extracted exponent is converted to a 64-bit signed binary integer. | |

*/ | |

Iop_ExtractExpD64, | |

/* D128 -> I64 */ | |

Iop_ExtractExpD128, | |

/* D64 -> I64 | |

* The number of significand digits of the D64 operand is extracted. | |

* The number is stored as a 64-bit signed binary integer. | |

*/ | |

Iop_ExtractSigD64, | |

/* D128 -> I64 */ | |

Iop_ExtractSigD128, | |

/* I64 x D64 -> D64 | |

* The exponent is specified by the first I64 operand the signed | |

* significand is given by the second I64 value. The result is a D64 | |

* value consisting of the specified significand and exponent whose | |

* sign is that of the specified significand. | |

*/ | |

Iop_InsertExpD64, | |

/* I64 x D128 -> D128 */ | |

Iop_InsertExpD128, | |

/* Support for 128-bit DFP type */ | |

Iop_D64HLtoD128, Iop_D128HItoD64, Iop_D128LOtoD64, | |

/* I64 -> I64 | |

* Convert 50-bit densely packed BCD string to 60 bit BCD string | |

*/ | |

Iop_DPBtoBCD, | |

/* I64 -> I64 | |

* Convert 60 bit BCD string to 50-bit densely packed BCD string | |

*/ | |

Iop_BCDtoDPB, | |

/* BCD arithmetic instructions, (V128, V128) -> V128 | |

* The BCD format is the same as that used in the BCD<->DPB conversion | |

* routines, except using 124 digits (vs 60) plus the trailing 4-bit | |

* signed code. */ | |

Iop_BCDAdd, Iop_BCDSub, | |

/* Conversion I64 -> D64 */ | |

Iop_ReinterpI64asD64, | |

/* Conversion D64 -> I64 */ | |

Iop_ReinterpD64asI64, | |

/* ------------------ 128-bit SIMD FP. ------------------ */ | |

/* --- 32x4 vector FP --- */ | |

/* ternary :: IRRoundingMode(I32) x V128 x V128 -> V128 */ | |

Iop_Add32Fx4, Iop_Sub32Fx4, Iop_Mul32Fx4, Iop_Div32Fx4, | |

/* binary */ | |

Iop_Max32Fx4, Iop_Min32Fx4, | |

Iop_Add32Fx2, Iop_Sub32Fx2, | |

/* Note: For the following compares, the ppc and arm front-ends assume a | |

nan in a lane of either argument returns zero for that lane. */ | |

Iop_CmpEQ32Fx4, Iop_CmpLT32Fx4, Iop_CmpLE32Fx4, Iop_CmpUN32Fx4, | |

Iop_CmpGT32Fx4, Iop_CmpGE32Fx4, | |

/* Pairwise Max and Min. See integer pairwise operations for details. */ | |

Iop_PwMax32Fx4, Iop_PwMin32Fx4, | |

/* unary */ | |

Iop_Abs32Fx4, | |

Iop_Sqrt32Fx4, | |

Iop_Neg32Fx4, | |

/* Vector Reciprocal Estimate finds an approximate reciprocal of each | |

element in the operand vector, and places the results in the destination | |

vector. */ | |

Iop_RecipEst32Fx4, | |

/* Vector Reciprocal Step computes (2.0 - arg1 * arg2). | |

Note, that if one of the arguments is zero and another one is infinity | |

of arbitrary sign the result of the operation is 2.0. */ | |

Iop_RecipStep32Fx4, | |

/* Vector Reciprocal Square Root Estimate finds an approximate reciprocal | |

square root of each element in the operand vector. */ | |

Iop_RSqrtEst32Fx4, | |

/* Vector Reciprocal Square Root Step computes (3.0 - arg1 * arg2) / 2.0. | |

Note, that of one of the arguments is zero and another one is infiinty | |

of arbitrary sign the result of the operation is 1.5. */ | |

Iop_RSqrtStep32Fx4, | |

/* --- Int to/from FP conversion --- */ | |

/* Unlike the standard fp conversions, these irops take no | |

rounding mode argument. Instead the irop trailers _R{M,P,N,Z} | |

indicate the mode: {-inf, +inf, nearest, zero} respectively. */ | |

Iop_I32UtoFx4, Iop_I32StoFx4, /* I32x4 -> F32x4 */ | |

Iop_FtoI32Ux4_RZ, Iop_FtoI32Sx4_RZ, /* F32x4 -> I32x4 */ | |

Iop_QFtoI32Ux4_RZ, Iop_QFtoI32Sx4_RZ, /* F32x4 -> I32x4 (saturating) */ | |

Iop_RoundF32x4_RM, Iop_RoundF32x4_RP, /* round to fp integer */ | |

Iop_RoundF32x4_RN, Iop_RoundF32x4_RZ, /* round to fp integer */ | |

/* Fixed32 format is floating-point number with fixed number of fraction | |

bits. The number of fraction bits is passed as a second argument of | |

type I8. */ | |

Iop_F32ToFixed32Ux4_RZ, Iop_F32ToFixed32Sx4_RZ, /* fp -> fixed-point */ | |

Iop_Fixed32UToF32x4_RN, Iop_Fixed32SToF32x4_RN, /* fixed-point -> fp */ | |

/* --- Single to/from half conversion --- */ | |

/* FIXME: what kind of rounding in F32x4 -> F16x4 case? */ | |

Iop_F32toF16x4, Iop_F16toF32x4, /* F32x4 <-> F16x4 */ | |

/* --- 32x4 lowest-lane-only scalar FP --- */ | |

/* In binary cases, upper 3/4 is copied from first operand. In | |

unary cases, upper 3/4 is copied from the operand. */ | |

/* binary */ | |

Iop_Add32F0x4, Iop_Sub32F0x4, Iop_Mul32F0x4, Iop_Div32F0x4, | |

Iop_Max32F0x4, Iop_Min32F0x4, | |

Iop_CmpEQ32F0x4, Iop_CmpLT32F0x4, Iop_CmpLE32F0x4, Iop_CmpUN32F0x4, | |

/* unary */ | |

Iop_RecipEst32F0x4, Iop_Sqrt32F0x4, Iop_RSqrtEst32F0x4, | |

/* --- 64x2 vector FP --- */ | |

/* ternary :: IRRoundingMode(I32) x V128 x V128 -> V128 */ | |

Iop_Add64Fx2, Iop_Sub64Fx2, Iop_Mul64Fx2, Iop_Div64Fx2, | |

/* binary */ | |

Iop_Max64Fx2, Iop_Min64Fx2, | |

Iop_CmpEQ64Fx2, Iop_CmpLT64Fx2, Iop_CmpLE64Fx2, Iop_CmpUN64Fx2, | |

/* unary */ | |

Iop_Abs64Fx2, | |

Iop_Sqrt64Fx2, | |

Iop_Neg64Fx2, | |

/* --- 64x2 lowest-lane-only scalar FP --- */ | |

/* In binary cases, upper half is copied from first operand. In | |

unary cases, upper half is copied from the operand. */ | |

/* binary */ | |

Iop_Add64F0x2, Iop_Sub64F0x2, Iop_Mul64F0x2, Iop_Div64F0x2, | |

Iop_Max64F0x2, Iop_Min64F0x2, | |

Iop_CmpEQ64F0x2, Iop_CmpLT64F0x2, Iop_CmpLE64F0x2, Iop_CmpUN64F0x2, | |

/* unary */ | |

Iop_Sqrt64F0x2, | |

/* --- pack / unpack --- */ | |

/* 64 <-> 128 bit vector */ | |

Iop_V128to64, // :: V128 -> I64, low half | |

Iop_V128HIto64, // :: V128 -> I64, high half | |

Iop_64HLtoV128, // :: (I64,I64) -> V128 | |

Iop_64UtoV128, | |

Iop_SetV128lo64, | |

/* Copies lower 64/32/16/8 bits, zeroes out the rest. */ | |

Iop_ZeroHI64ofV128, // :: V128 -> V128 | |

Iop_ZeroHI96ofV128, // :: V128 -> V128 | |

Iop_ZeroHI112ofV128, // :: V128 -> V128 | |

Iop_ZeroHI120ofV128, // :: V128 -> V128 | |

/* 32 <-> 128 bit vector */ | |

Iop_32UtoV128, | |

Iop_V128to32, // :: V128 -> I32, lowest lane | |

Iop_SetV128lo32, // :: (V128,I32) -> V128 | |

/* ------------------ 128-bit SIMD Integer. ------------------ */ | |

/* BITWISE OPS */ | |

Iop_NotV128, | |

Iop_AndV128, Iop_OrV128, Iop_XorV128, | |

/* VECTOR SHIFT (shift amt :: Ity_I8) */ | |

Iop_ShlV128, Iop_ShrV128, | |

/* MISC (vector integer cmp != 0) */ | |

Iop_CmpNEZ8x16, Iop_CmpNEZ16x8, Iop_CmpNEZ32x4, Iop_CmpNEZ64x2, | |

/* ADDITION (normal / U->U sat / S->S sat) */ | |

Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2, | |

Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2, | |

Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2, | |

/* ADDITION, ARM64 specific saturating variants. */ | |

/* Unsigned widen left arg, signed widen right arg, add, saturate S->S. | |

This corresponds to SUQADD. */ | |

Iop_QAddExtUSsatSS8x16, Iop_QAddExtUSsatSS16x8, | |

Iop_QAddExtUSsatSS32x4, Iop_QAddExtUSsatSS64x2, | |

/* Signed widen left arg, unsigned widen right arg, add, saturate U->U. | |

This corresponds to USQADD. */ | |

Iop_QAddExtSUsatUU8x16, Iop_QAddExtSUsatUU16x8, | |

Iop_QAddExtSUsatUU32x4, Iop_QAddExtSUsatUU64x2, | |

/* SUBTRACTION (normal / unsigned sat / signed sat) */ | |

Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2, | |

Iop_QSub8Ux16, Iop_QSub16Ux8, Iop_QSub32Ux4, Iop_QSub64Ux2, | |

Iop_QSub8Sx16, Iop_QSub16Sx8, Iop_QSub32Sx4, Iop_QSub64Sx2, | |

/* MULTIPLICATION (normal / high half of signed/unsigned) */ | |

Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4, | |

Iop_MulHi16Ux8, Iop_MulHi32Ux4, | |

Iop_MulHi16Sx8, Iop_MulHi32Sx4, | |

/* (widening signed/unsigned of even lanes, with lowest lane=zero) */ | |

Iop_MullEven8Ux16, Iop_MullEven16Ux8, Iop_MullEven32Ux4, | |

Iop_MullEven8Sx16, Iop_MullEven16Sx8, Iop_MullEven32Sx4, | |

/* Widening multiplies, all of the form (I64, I64) -> V128 */ | |

Iop_Mull8Ux8, Iop_Mull8Sx8, | |

Iop_Mull16Ux4, Iop_Mull16Sx4, | |

Iop_Mull32Ux2, Iop_Mull32Sx2, | |

/* Signed doubling saturating widening multiplies, (I64, I64) -> V128 */ | |

Iop_QDMull16Sx4, Iop_QDMull32Sx2, | |

/* Vector Saturating Doubling Multiply Returning High Half and | |

Vector Saturating Rounding Doubling Multiply Returning High Half. | |

These IROps multiply corresponding elements in two vectors, double | |

the results, and place the most significant half of the final results | |

in the destination vector. The results are truncated or rounded. If | |

any of the results overflow, they are saturated. To be more precise, | |

for each lane, the computed result is: | |

QDMulHi: | |

hi-half( sign-extend(laneL) *q sign-extend(laneR) *q 2 ) | |

QRDMulHi: | |

hi-half( sign-extend(laneL) *q sign-extend(laneR) *q 2 | |

+q (1 << (lane-width-in-bits - 1)) ) | |

*/ | |

Iop_QDMulHi16Sx8, Iop_QDMulHi32Sx4, /* (V128, V128) -> V128 */ | |

Iop_QRDMulHi16Sx8, Iop_QRDMulHi32Sx4, /* (V128, V128) -> V128 */ | |

/* Polynomial multiplication treats its arguments as | |

coefficients of polynomials over {0, 1}. */ | |

Iop_PolynomialMul8x16, /* (V128, V128) -> V128 */ | |

Iop_PolynomialMull8x8, /* (I64, I64) -> V128 */ | |

/* Vector Polynomial multiplication add. (V128, V128) -> V128 | |

*** Below is the algorithm for the instructions. These Iops could | |

be emulated to get this functionality, but the emulation would | |

be long and messy. | |

Example for polynomial multiply add for vector of bytes | |

do i = 0 to 15 | |

prod[i].bit[0:14] <- 0 | |

srcA <- VR[argL].byte[i] | |

srcB <- VR[argR].byte[i] | |

do j = 0 to 7 | |

do k = 0 to j | |

gbit <- srcA.bit[k] & srcB.bit[j-k] | |

prod[i].bit[j] <- prod[i].bit[j] ^ gbit | |

end | |

end | |

do j = 8 to 14 | |

do k = j-7 to 7 | |

gbit <- (srcA.bit[k] & srcB.bit[j-k]) | |

prod[i].bit[j] <- prod[i].bit[j] ^ gbit | |

end | |

end | |

end | |

do i = 0 to 7 | |

VR[dst].hword[i] <- 0b0 || (prod[2Ã—i] ^ prod[2Ã—i+1]) | |

end | |

*/ | |

Iop_PolynomialMulAdd8x16, Iop_PolynomialMulAdd16x8, | |

Iop_PolynomialMulAdd32x4, Iop_PolynomialMulAdd64x2, | |

/* PAIRWISE operations */ | |

/* Iop_PwFoo16x4( [a,b,c,d], [e,f,g,h] ) = | |

[Foo16(a,b), Foo16(c,d), Foo16(e,f), Foo16(g,h)] */ | |

Iop_PwAdd8x16, Iop_PwAdd16x8, Iop_PwAdd32x4, | |

Iop_PwAdd32Fx2, | |

/* Longening variant is unary. The resulting vector contains two times | |

less elements than operand, but they are two times wider. | |

Example: | |

Iop_PwAddL16Ux4( [a,b,c,d] ) = [a+b,c+d] | |

where a+b and c+d are unsigned 32-bit values. */ | |

Iop_PwAddL8Ux16, Iop_PwAddL16Ux8, Iop_PwAddL32Ux4, | |

Iop_PwAddL8Sx16, Iop_PwAddL16Sx8, Iop_PwAddL32Sx4, | |

/* Other unary pairwise ops */ | |

/* Vector bit matrix transpose. (V128) -> V128 */ | |

/* For each doubleword element of the source vector, an 8-bit x 8-bit | |

* matrix transpose is performed. */ | |

Iop_PwBitMtxXpose64x2, | |

/* ABSOLUTE VALUE */ | |

Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4, Iop_Abs64x2, | |

/* AVERAGING: note: (arg1 + arg2 + 1) >>u 1 */ | |

Iop_Avg8Ux16, Iop_Avg16Ux8, Iop_Avg32Ux4, | |

Iop_Avg8Sx16, Iop_Avg16Sx8, Iop_Avg32Sx4, | |

/* MIN/MAX */ | |

Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, Iop_Max64Sx2, | |

Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, Iop_Max64Ux2, | |

Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, Iop_Min64Sx2, | |

Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, Iop_Min64Ux2, | |

/* COMPARISON */ | |

Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4, Iop_CmpEQ64x2, | |

Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2, | |

Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4, Iop_CmpGT64Ux2, | |

/* COUNT ones / leading zeroes / leading sign bits (not including topmost | |

bit) */ | |

Iop_Cnt8x16, | |

Iop_Clz8x16, Iop_Clz16x8, Iop_Clz32x4, | |

Iop_Cls8x16, Iop_Cls16x8, Iop_Cls32x4, | |

/* VECTOR x SCALAR SHIFT (shift amt :: Ity_I8) */ | |

Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2, | |

Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2, | |

Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2, | |

/* VECTOR x VECTOR SHIFT / ROTATE */ | |

/* FIXME: I'm pretty sure the ARM32 front/back ends interpret these | |

differently from all other targets. The intention is that | |

the shift amount (2nd arg) is interpreted as unsigned and | |

only the lowest log2(lane-bits) bits are relevant. But the | |

ARM32 versions treat the shift amount as an 8 bit signed | |

number. The ARM32 uses should be replaced by the relevant | |

vector x vector bidirectional shifts instead. */ | |

Iop_Shl8x16, Iop_Shl16x8, Iop_Shl32x4, Iop_Shl64x2, | |

Iop_Shr8x16, Iop_Shr16x8, Iop_Shr32x4, Iop_Shr64x2, | |

Iop_Sar8x16, Iop_Sar16x8, Iop_Sar32x4, Iop_Sar64x2, | |

Iop_Sal8x16, Iop_Sal16x8, Iop_Sal32x4, Iop_Sal64x2, | |

Iop_Rol8x16, Iop_Rol16x8, Iop_Rol32x4, Iop_Rol64x2, | |

/* VECTOR x VECTOR SATURATING SHIFT */ | |

Iop_QShl8x16, Iop_QShl16x8, Iop_QShl32x4, Iop_QShl64x2, | |

Iop_QSal8x16, Iop_QSal16x8, Iop_QSal32x4, Iop_QSal64x2, | |

/* VECTOR x INTEGER SATURATING SHIFT */ | |

Iop_QShlNsatSU8x16, Iop_QShlNsatSU16x8, | |

Iop_QShlNsatSU32x4, Iop_QShlNsatSU64x2, | |

Iop_QShlNsatUU8x16, Iop_QShlNsatUU16x8, | |

Iop_QShlNsatUU32x4, Iop_QShlNsatUU64x2, | |

Iop_QShlNsatSS8x16, Iop_QShlNsatSS16x8, | |

Iop_QShlNsatSS32x4, Iop_QShlNsatSS64x2, | |

/* VECTOR x VECTOR BIDIRECTIONAL SATURATING (& MAYBE ROUNDING) SHIFT */ | |

/* All of type (V128, V128) -> V256. */ | |

/* The least significant 8 bits of each lane of the second | |

operand are used as the shift amount, and interpreted signedly. | |

Positive values mean a shift left, negative a shift right. The | |

result is signedly or unsignedly saturated. There are also | |

rounding variants, which add 2^(shift_amount-1) to the value before | |

shifting, but only in the shift-right case. Vacated positions | |

are filled with zeroes. IOW, it's either SHR or SHL, but not SAR. | |

These operations return 129 bits: one bit ("Q") indicating whether | |

saturation occurred, and the shift result. The result type is V256, | |

of which the lower V128 is the shift result, and Q occupies the | |

least significant bit of the upper V128. All other bits of the | |

upper V128 are zero. */ | |

// Unsigned saturation, no rounding | |

Iop_QandUQsh8x16, Iop_QandUQsh16x8, | |

Iop_QandUQsh32x4, Iop_QandUQsh64x2, | |

// Signed saturation, no rounding | |

Iop_QandSQsh8x16, Iop_QandSQsh16x8, | |

Iop_QandSQsh32x4, Iop_QandSQsh64x2, | |

// Unsigned saturation, rounding | |

Iop_QandUQRsh8x16, Iop_QandUQRsh16x8, | |

Iop_QandUQRsh32x4, Iop_QandUQRsh64x2, | |

// Signed saturation, rounding | |

Iop_QandSQRsh8x16, Iop_QandSQRsh16x8, | |

Iop_QandSQRsh32x4, Iop_QandSQRsh64x2, | |

/* VECTOR x VECTOR BIDIRECTIONAL (& MAYBE ROUNDING) SHIFT */ | |

/* All of type (V128, V128) -> V128 */ | |

/* The least significant 8 bits of each lane of the second | |

operand are used as the shift amount, and interpreted signedly. | |

Positive values mean a shift left, negative a shift right. | |

There are also rounding variants, which add 2^(shift_amount-1) | |

to the value before shifting, but only in the shift-right case. | |

For left shifts, the vacated places are filled with zeroes. | |

For right shifts, the vacated places are filled with zeroes | |

for the U variants and sign bits for the S variants. */ | |

// Signed and unsigned, non-rounding | |

Iop_Sh8Sx16, Iop_Sh16Sx8, Iop_Sh32Sx4, Iop_Sh64Sx2, | |

Iop_Sh8Ux16, Iop_Sh16Ux8, Iop_Sh32Ux4, Iop_Sh64Ux2, | |

// Signed and unsigned, rounding | |

Iop_Rsh8Sx16, Iop_Rsh16Sx8, Iop_Rsh32Sx4, Iop_Rsh64Sx2, | |

Iop_Rsh8Ux16, Iop_Rsh16Ux8, Iop_Rsh32Ux4, Iop_Rsh64Ux2, | |

/* The least significant 8 bits of each lane of the second | |

operand are used as the shift amount, and interpreted signedly. | |

Positive values mean a shift left, negative a shift right. The | |

result is signedly or unsignedly saturated. There are also | |

rounding variants, which add 2^(shift_amount-1) to the value before | |

shifting, but only in the shift-right case. Vacated positions | |

are filled with zeroes. IOW, it's either SHR or SHL, but not SAR. | |

*/ | |

/* VECTOR x SCALAR SATURATING (& MAYBE ROUNDING) NARROWING SHIFT RIGHT */ | |

/* All of type (V128, I8) -> V128 */ | |

/* The first argument is shifted right, then narrowed to half the width | |

by saturating it. The second argument is a scalar shift amount that | |

applies to all lanes, and must be a value in the range 1 to lane_width. | |

The shift may be done signedly (Sar variants) or unsignedly (Shr | |

variants). The saturation is done according to the two signedness | |

indicators at the end of the name. For example 64Sto32U means a | |

signed 64 bit value is saturated into an unsigned 32 bit value. | |

Additionally, the QRS variants do rounding, that is, they add the | |

value (1 << (shift_amount-1)) to each source lane before shifting. | |

These operations return 65 bits: one bit ("Q") indicating whether | |

saturation occurred, and the shift result. The result type is V128, | |

of which the lower half is the shift result, and Q occupies the | |

least significant bit of the upper half. All other bits of the | |

upper half are zero. */ | |

// No rounding, sat U->U | |

Iop_QandQShrNnarrow16Uto8Ux8, | |

Iop_QandQShrNnarrow32Uto16Ux4, Iop_QandQShrNnarrow64Uto32Ux2, | |

// No rounding, sat S->S | |

Iop_QandQSarNnarrow16Sto8Sx8, | |

Iop_QandQSarNnarrow32Sto16Sx4, Iop_QandQSarNnarrow64Sto32Sx2, | |

// No rounding, sat S->U | |

Iop_QandQSarNnarrow16Sto8Ux8, | |

Iop_QandQSarNnarrow32Sto16Ux4, Iop_QandQSarNnarrow64Sto32Ux2, | |

// Rounding, sat U->U | |

Iop_QandQRShrNnarrow16Uto8Ux8, | |

Iop_QandQRShrNnarrow32Uto16Ux4, Iop_QandQRShrNnarrow64Uto32Ux2, | |

// Rounding, sat S->S | |

Iop_QandQRSarNnarrow16Sto8Sx8, | |

Iop_QandQRSarNnarrow32Sto16Sx4, Iop_QandQRSarNnarrow64Sto32Sx2, | |

// Rounding, sat S->U | |

Iop_QandQRSarNnarrow16Sto8Ux8, | |

Iop_QandQRSarNnarrow32Sto16Ux4, Iop_QandQRSarNnarrow64Sto32Ux2, | |

/* NARROWING (binary) | |

-- narrow 2xV128 into 1xV128, hi half from left arg */ | |

/* See comments above w.r.t. U vs S issues in saturated narrowing. */ | |

Iop_QNarrowBin16Sto8Ux16, Iop_QNarrowBin32Sto16Ux8, | |

Iop_QNarrowBin16Sto8Sx16, Iop_QNarrowBin32Sto16Sx8, | |

Iop_QNarrowBin16Uto8Ux16, Iop_QNarrowBin32Uto16Ux8, | |

Iop_NarrowBin16to8x16, Iop_NarrowBin32to16x8, | |

Iop_QNarrowBin64Sto32Sx4, Iop_QNarrowBin64Uto32Ux4, | |

Iop_NarrowBin64to32x4, | |

/* NARROWING (unary) -- narrow V128 into I64 */ | |

Iop_NarrowUn16to8x8, Iop_NarrowUn32to16x4, Iop_NarrowUn64to32x2, | |

/* Saturating narrowing from signed source to signed/unsigned | |

destination */ | |

Iop_QNarrowUn16Sto8Sx8, Iop_QNarrowUn32Sto16Sx4, Iop_QNarrowUn64Sto32Sx2, | |

Iop_QNarrowUn16Sto8Ux8, Iop_QNarrowUn32Sto16Ux4, Iop_QNarrowUn64Sto32Ux2, | |

/* Saturating narrowing from unsigned source to unsigned destination */ | |

Iop_QNarrowUn16Uto8Ux8, Iop_QNarrowUn32Uto16Ux4, Iop_QNarrowUn64Uto32Ux2, | |

/* WIDENING -- sign or zero extend each element of the argument | |

vector to the twice original size. The resulting vector consists of | |

the same number of elements but each element and the vector itself | |

are twice as wide. | |

All operations are I64->V128. | |

Example | |

Iop_Widen32Sto64x2( [a, b] ) = [c, d] | |

where c = Iop_32Sto64(a) and d = Iop_32Sto64(b) */ | |

Iop_Widen8Uto16x8, Iop_Widen16Uto32x4, Iop_Widen32Uto64x2, | |

Iop_Widen8Sto16x8, Iop_Widen16Sto32x4, Iop_Widen32Sto64x2, | |

/* INTERLEAVING */ | |

/* Interleave lanes from low or high halves of | |

operands. Most-significant result lane is from the left | |

arg. */ | |

Iop_InterleaveHI8x16, Iop_InterleaveHI16x8, | |

Iop_InterleaveHI32x4, Iop_InterleaveHI64x2, | |

Iop_InterleaveLO8x16, Iop_InterleaveLO16x8, | |

Iop_InterleaveLO32x4, Iop_InterleaveLO64x2, | |

/* Interleave odd/even lanes of operands. Most-significant result lane | |

is from the left arg. */ | |

Iop_InterleaveOddLanes8x16, Iop_InterleaveEvenLanes8x16, | |

Iop_InterleaveOddLanes16x8, Iop_InterleaveEvenLanes16x8, | |

Iop_InterleaveOddLanes32x4, Iop_InterleaveEvenLanes32x4, | |

/* CONCATENATION -- build a new value by concatenating either | |

the even or odd lanes of both operands. Note that | |

Cat{Odd,Even}Lanes64x2 are identical to Interleave{HI,LO}64x2 | |

and so are omitted. */ | |

Iop_CatOddLanes8x16, Iop_CatOddLanes16x8, Iop_CatOddLanes32x4, | |

Iop_CatEvenLanes8x16, Iop_CatEvenLanes16x8, Iop_CatEvenLanes32x4, | |

/* GET elements of VECTOR | |

GET is binop (V128, I8) -> I<elem_size> */ | |

/* Note: the arm back-end handles only constant second argument. */ | |

Iop_GetElem8x16, Iop_GetElem16x8, Iop_GetElem32x4, Iop_GetElem64x2, | |

/* DUPLICATING -- copy value to all lanes */ | |

Iop_Dup8x16, Iop_Dup16x8, Iop_Dup32x4, | |

/* SLICE -- produces the lowest 128 bits of (arg1:arg2) >> (8 * arg3). | |

arg3 is a shift amount in bytes and may be between 0 and 16 | |

inclusive. When 0, the result is arg2; when 16, the result is arg1. | |

Not all back ends handle all values. The arm64 back | |

end handles only immediate arg3 values. */ | |

Iop_SliceV128, // (V128, V128, I8) -> V128 | |

/* REVERSE the order of chunks in vector lanes. Chunks must be | |

smaller than the vector lanes (obviously) and so may be 8-, | |

16- and 32-bit in size. See definitions of 64-bit SIMD | |

versions above for examples. */ | |

Iop_Reverse8sIn16_x8, | |

Iop_Reverse8sIn32_x4, Iop_Reverse16sIn32_x4, | |

Iop_Reverse8sIn64_x2, Iop_Reverse16sIn64_x2, Iop_Reverse32sIn64_x2, | |

Iop_Reverse1sIn8_x16, /* Reverse bits in each byte lane. */ | |

/* PERMUTING -- copy src bytes to dst, | |

as indexed by control vector bytes: | |

for i in 0 .. 15 . result[i] = argL[ argR[i] ] | |

argR[i] values may only be in the range 0 .. 15, else behaviour | |

is undefined. */ | |

Iop_Perm8x16, | |

Iop_Perm32x4, /* ditto, except argR values are restricted to 0 .. 3 */ | |

/* MISC CONVERSION -- get high bits of each byte lane, a la | |

x86/amd64 pmovmskb */ | |

Iop_GetMSBs8x16, /* V128 -> I16 */ | |

/* Vector Reciprocal Estimate and Vector Reciprocal Square Root Estimate | |

See floating-point equivalents for details. */ | |

Iop_RecipEst32Ux4, Iop_RSqrtEst32Ux4, | |

/* ------------------ 256-bit SIMD Integer. ------------------ */ | |

/* Pack/unpack */ | |

Iop_V256to64_0, // V256 -> I64, extract least significant lane | |

Iop_V256to64_1, | |

Iop_V256to64_2, | |

Iop_V256to64_3, // V256 -> I64, extract most significant lane | |

Iop_64x4toV256, // (I64,I64,I64,I64)->V256 | |

// first arg is most significant lane | |

Iop_V256toV128_0, // V256 -> V128, less significant lane | |

Iop_V256toV128_1, // V256 -> V128, more significant lane | |

Iop_V128HLtoV256, // (V128,V128)->V256, first arg is most signif | |

Iop_AndV256, | |

Iop_OrV256, | |

Iop_XorV256, | |

Iop_NotV256, | |

/* MISC (vector integer cmp != 0) */ | |

Iop_CmpNEZ8x32, Iop_CmpNEZ16x16, Iop_CmpNEZ32x8, Iop_CmpNEZ64x4, | |

Iop_Add8x32, Iop_Add16x16, Iop_Add32x8, Iop_Add64x4, | |

Iop_Sub8x32, Iop_Sub16x16, Iop_Sub32x8, Iop_Sub64x4, | |

Iop_CmpEQ8x32, Iop_CmpEQ16x16, Iop_CmpEQ32x8, Iop_CmpEQ64x4, | |

Iop_CmpGT8Sx32, Iop_CmpGT16Sx16, Iop_CmpGT32Sx8, Iop_CmpGT64Sx4, | |

Iop_ShlN16x16, Iop_ShlN32x8, Iop_ShlN64x4, | |

Iop_ShrN16x16, Iop_ShrN32x8, Iop_ShrN64x4, | |

Iop_SarN16x16, Iop_SarN32x8, | |

Iop_Max8Sx32, Iop_Max16Sx16, Iop_Max32Sx8, | |

Iop_Max8Ux32, Iop_Max16Ux16, Iop_Max32Ux8, | |

Iop_Min8Sx32, Iop_Min16Sx16, Iop_Min32Sx8, | |

Iop_Min8Ux32, Iop_Min16Ux16, Iop_Min32Ux8, | |

Iop_Mul16x16, Iop_Mul32x8, | |

Iop_MulHi16Ux16, Iop_MulHi16Sx16, | |

Iop_QAdd8Ux32, Iop_QAdd16Ux16, | |

Iop_QAdd8Sx32, Iop_QAdd16Sx16, | |

Iop_QSub8Ux32, Iop_QSub16Ux16, | |

Iop_QSub8Sx32, Iop_QSub16Sx16, | |

Iop_Avg8Ux32, Iop_Avg16Ux16, | |

Iop_Perm32x8, | |

/* (V128, V128) -> V128 */ | |

Iop_CipherV128, Iop_CipherLV128, Iop_CipherSV128, | |

Iop_NCipherV128, Iop_NCipherLV128, | |

/* Hash instructions, Federal Information Processing Standards | |

* Publication 180-3 Secure Hash Standard. */ | |

/* (V128, I8) -> V128; The I8 input arg is (ST | SIX), where ST and | |

* SIX are fields from the insn. See ISA 2.07 description of | |

* vshasigmad and vshasigmaw insns.*/ | |

Iop_SHA512, Iop_SHA256, | |

/* ------------------ 256-bit SIMD FP. ------------------ */ | |

/* ternary :: IRRoundingMode(I32) x V256 x V256 -> V256 */ | |

Iop_Add64Fx4, Iop_Sub64Fx4, Iop_Mul64Fx4, Iop_Div64Fx4, | |

Iop_Add32Fx8, Iop_Sub32Fx8, Iop_Mul32Fx8, Iop_Div32Fx8, | |

Iop_Sqrt32Fx8, | |

Iop_Sqrt64Fx4, | |

Iop_RSqrtEst32Fx8, | |

Iop_RecipEst32Fx8, | |

Iop_Max32Fx8, Iop_Min32Fx8, | |

Iop_Max64Fx4, Iop_Min64Fx4, | |

Iop_LAST /* must be the last enumerator */ | |

} | |

IROp; | |

/* Pretty-print an op. */ | |

extern void ppIROp ( IROp ); | |

/* Encoding of IEEE754-specified rounding modes. | |

Note, various front and back ends rely on the actual numerical | |

values of these, so do not change them. */ | |

typedef | |

enum { | |

Irrm_NEAREST = 0, // Round to nearest, ties to even | |

Irrm_NegINF = 1, // Round to negative infinity | |

Irrm_PosINF = 2, // Round to positive infinity | |

Irrm_ZERO = 3, // Round toward zero | |

Irrm_NEAREST_TIE_AWAY_0 = 4, // Round to nearest, ties away from 0 | |

Irrm_PREPARE_SHORTER = 5, // Round to prepare for shorter | |

// precision | |

Irrm_AWAY_FROM_ZERO = 6, // Round to away from 0 | |

Irrm_NEAREST_TIE_TOWARD_0 = 7 // Round to nearest, ties towards 0 | |

} | |

IRRoundingMode; | |

/* Binary floating point comparison result values. | |

This is also derived from what IA32 does. */ | |

typedef | |

enum { | |

Ircr_UN = 0x45, | |

Ircr_LT = 0x01, | |

Ircr_GT = 0x00, | |

Ircr_EQ = 0x40 | |

} | |

IRCmpFResult; | |

typedef IRCmpFResult IRCmpF32Result; | |

typedef IRCmpFResult IRCmpF64Result; | |

typedef IRCmpFResult IRCmpF128Result; | |

/* Decimal floating point result values. */ | |

typedef IRCmpFResult IRCmpDResult; | |

typedef IRCmpDResult IRCmpD64Result; | |

typedef IRCmpDResult IRCmpD128Result; | |

/* ------------------ Expressions ------------------ */ | |

typedef struct _IRQop IRQop; /* forward declaration */ | |

typedef struct _IRTriop IRTriop; /* forward declaration */ | |

/* The different kinds of expressions. Their meaning is explained below | |

in the comments for IRExpr. */ | |

typedef | |

enum { | |

Iex_Binder=0x1900, | |

Iex_Get, | |

Iex_GetI, | |

Iex_RdTmp, | |

Iex_Qop, | |

Iex_Triop, | |

Iex_Binop, | |

Iex_Unop, | |

Iex_Load, | |

Iex_Const, | |

Iex_ITE, | |

Iex_CCall, | |

Iex_VECRET, | |

Iex_BBPTR | |

} | |

IRExprTag; | |

/* An expression. Stored as a tagged union. 'tag' indicates what kind | |

of expression this is. 'Iex' is the union that holds the fields. If | |

an IRExpr 'e' has e.tag equal to Iex_Load, then it's a load | |

expression, and the fields can be accessed with | |

'e.Iex.Load.<fieldname>'. | |

For each kind of expression, we show what it looks like when | |

pretty-printed with ppIRExpr(). | |

*/ | |

typedef | |

struct _IRExpr | |

IRExpr; | |

struct _IRExpr { | |

IRExprTag tag; | |

union { | |

/* Used only in pattern matching within Vex. Should not be seen | |

outside of Vex. */ | |

struct { | |

Int binder; | |

} Binder; | |

/* Read a guest register, at a fixed offset in the guest state. | |

ppIRExpr output: GET:<ty>(<offset>), eg. GET:I32(0) | |

*/ | |

struct { | |

Int offset; /* Offset into the guest state */ | |

IRType ty; /* Type of the value being read */ | |

} Get; | |

/* Read a guest register at a non-fixed offset in the guest | |

state. This allows circular indexing into parts of the guest | |

state, which is essential for modelling situations where the | |

identity of guest registers is not known until run time. One | |

example is the x87 FP register stack. | |

The part of the guest state to be treated as a circular array | |

is described in the IRRegArray 'descr' field. It holds the | |

offset of the first element in the array, the type of each | |

element, and the number of elements. | |

The array index is indicated rather indirectly, in a way | |

which makes optimisation easy: as the sum of variable part | |

(the 'ix' field) and a constant offset (the 'bias' field). | |

Since the indexing is circular, the actual array index to use | |

is computed as (ix + bias) % num-of-elems-in-the-array. | |

Here's an example. The description | |

(96:8xF64)[t39,-7] | |

describes an array of 8 F64-typed values, the | |

guest-state-offset of the first being 96. This array is | |

being indexed at (t39 - 7) % 8. | |

It is important to get the array size/type exactly correct | |

since IR optimisation looks closely at such info in order to | |

establish aliasing/non-aliasing between seperate GetI and | |

PutI events, which is used to establish when they can be | |

reordered, etc. Putting incorrect info in will lead to | |

obscure IR optimisation bugs. | |

ppIRExpr output: GETI<descr>[<ix>,<bias] | |

eg. GETI(128:8xI8)[t1,0] | |

*/ | |

struct { | |

IRRegArray* descr; /* Part of guest state treated as circular */ | |

IRExpr* ix; /* Variable part of index into array */ | |

Int bias; /* Constant offset part of index into array */ | |

} GetI; | |

/* The value held by a temporary. | |

ppIRExpr output: t<tmp>, eg. t1 | |

*/ | |

struct { | |

IRTemp tmp; /* The temporary number */ | |

} RdTmp; | |

/* A quaternary operation. | |

ppIRExpr output: <op>(<arg1>, <arg2>, <arg3>, <arg4>), | |

eg. MAddF64r32(t1, t2, t3, t4) | |

*/ | |

struct { | |

IRQop* details; | |

} Qop; | |

/* A ternary operation. | |

ppIRExpr output: <op>(<arg1>, <arg2>, <arg3>), | |

eg. MulF64(1, 2.0, 3.0) | |

*/ | |

struct { | |

IRTriop* details; | |

} Triop; | |

/* A binary operation. | |

ppIRExpr output: <op>(<arg1>, <arg2>), eg. Add32(t1,t2) | |

*/ | |

struct { | |

IROp op; /* op-code */ | |

IRExpr* arg1; /* operand 1 */ | |

IRExpr* arg2; /* operand 2 */ | |

} Binop; | |

/* A unary operation. | |

ppIRExpr output: <op>(<arg>), eg. Neg8(t1) | |

*/ | |

struct { | |

IROp op; /* op-code */ | |

IRExpr* arg; /* operand */ | |

} Unop; | |

/* A load from memory -- a normal load, not a load-linked. | |

Load-Linkeds (and Store-Conditionals) are instead represented | |

by IRStmt.LLSC since Load-Linkeds have side effects and so | |

are not semantically valid IRExpr's. | |

ppIRExpr output: LD<end>:<ty>(<addr>), eg. LDle:I32(t1) | |

*/ | |

struct { | |

IREndness end; /* Endian-ness of the load */ | |

IRType ty; /* Type of the loaded value */ | |

IRExpr* addr; /* Address being loaded from */ | |

} Load; | |

/* A constant-valued expression. | |

ppIRExpr output: <con>, eg. 0x4:I32 | |

*/ | |

struct { | |

IRConst* con; /* The constant itself */ | |

} Const; | |

/* A call to a pure (no side-effects) helper C function. | |

With the 'cee' field, 'name' is the function's name. It is | |

only used for pretty-printing purposes. The address to call | |

(host address, of course) is stored in the 'addr' field | |

inside 'cee'. | |

The 'args' field is a NULL-terminated array of arguments. | |

The stated return IRType, and the implied argument types, | |

must match that of the function being called well enough so | |

that the back end can actually generate correct code for the | |

call. | |

The called function **must** satisfy the following: | |

* no side effects -- must be a pure function, the result of | |

which depends only on the passed parameters. | |

* it may not look at, nor modify, any of the guest state | |

since that would hide guest state transitions from | |

instrumenters | |

* it may not access guest memory, since that would hide | |

guest memory transactions from the instrumenters | |

* it must not assume that arguments are being evaluated in a | |

particular order. The oder of evaluation is unspecified. | |

This is restrictive, but makes the semantics clean, and does | |

not interfere with IR optimisation. | |

If you want to call a helper which can mess with guest state | |

and/or memory, instead use Ist_Dirty. This is a lot more | |

flexible, but you have to give a bunch of details about what | |

the helper does (and you better be telling the truth, | |

otherwise any derived instrumentation will be wrong). Also | |

Ist_Dirty inhibits various IR optimisations and so can cause | |

quite poor code to be generated. Try to avoid it. | |

In principle it would be allowable to have the arg vector | |

contain an IRExpr_VECRET(), although not IRExpr_BBPTR(). However, | |

at the moment there is no requirement for clean helper calls to | |

be able to return V128 or V256 values. Hence this is not allowed. | |

ppIRExpr output: <cee>(<args>):<retty> | |

eg. foo{0x80489304}(t1, t2):I32 | |

*/ | |

struct { | |

IRCallee* cee; /* Function to call. */ | |

IRType retty; /* Type of return value. */ | |

IRExpr** args; /* Vector of argument expressions. */ | |

} CCall; | |

/* A ternary if-then-else operator. It returns iftrue if cond is | |

nonzero, iffalse otherwise. Note that it is STRICT, ie. both | |

iftrue and iffalse are evaluated in all cases. | |

ppIRExpr output: ITE(<cond>,<iftrue>,<iffalse>), | |

eg. ITE(t6,t7,t8) | |

*/ | |

struct { | |

IRExpr* cond; /* Condition */ | |

IRExpr* iftrue; /* True expression */ | |

IRExpr* iffalse; /* False expression */ | |

} ITE; | |

} Iex; | |

}; | |

/* Expression auxiliaries: a ternary expression. */ | |

struct _IRTriop { | |

IROp op; /* op-code */ | |

IRExpr* arg1; /* operand 1 */ | |

IRExpr* arg2; /* operand 2 */ | |

IRExpr* arg3; /* operand 3 */ | |

}; | |

/* Expression auxiliaries: a quarternary expression. */ | |

struct _IRQop { | |

IROp op; /* op-code */ | |

IRExpr* arg1; /* operand 1 */ | |

IRExpr* arg2; /* operand 2 */ | |

IRExpr* arg3; /* operand 3 */ | |

IRExpr* arg4; /* operand 4 */ | |

}; | |

/* Two special kinds of IRExpr, which can ONLY be used in | |

argument lists for dirty helper calls (IRDirty.args) and in NO | |

OTHER PLACES. And then only in very limited ways. */ | |

/* Denotes an argument which (in the helper) takes a pointer to a | |

(naturally aligned) V128 or V256, into which the helper is expected | |

to write its result. Use of IRExpr_VECRET() is strictly | |

controlled. If the helper returns a V128 or V256 value then | |

IRExpr_VECRET() must appear exactly once in the arg list, although | |

it can appear anywhere, and the helper must have a C 'void' return | |

type. If the helper returns any other type, IRExpr_VECRET() may | |

not appear in the argument list. */ | |

/* Denotes an void* argument which is passed to the helper, which at | |

run time will point to the thread's guest state area. This can | |

only appear at most once in an argument list, and it may not appear | |

at all in argument lists for clean helper calls. */ | |

static inline Bool is_IRExpr_VECRET_or_BBPTR ( const IRExpr* e ) { | |

return e->tag == Iex_VECRET || e->tag == Iex_BBPTR; | |

} | |

/* Expression constructors. */ | |

extern IRExpr* IRExpr_Binder ( Int binder ); | |

extern IRExpr* IRExpr_Get ( Int off, IRType ty ); | |

extern IRExpr* IRExpr_GetI ( IRRegArray* descr, IRExpr* ix, Int bias ); | |

extern IRExpr* IRExpr_RdTmp ( IRTemp tmp ); | |

extern IRExpr* IRExpr_Qop ( IROp op, IRExpr* arg1, IRExpr* arg2, | |

IRExpr* arg3, IRExpr* arg4 ); | |

extern IRExpr* IRExpr_Triop ( IROp op, IRExpr* arg1, | |

IRExpr* arg2, IRExpr* arg3 ); | |

extern IRExpr* IRExpr_Binop ( IROp op, IRExpr* arg1, IRExpr* arg2 ); | |

extern IRExpr* IRExpr_Unop ( IROp op, IRExpr* arg ); | |

extern IRExpr* IRExpr_Load ( IREndness end, IRType ty, IRExpr* addr ); | |

extern IRExpr* IRExpr_Const ( IRConst* con ); | |

extern IRExpr* IRExpr_CCall ( IRCallee* cee, IRType retty, IRExpr** args ); | |

extern IRExpr* IRExpr_ITE ( IRExpr* cond, IRExpr* iftrue, IRExpr* iffalse ); | |

extern IRExpr* IRExpr_VECRET ( void ); | |

extern IRExpr* IRExpr_BBPTR ( void ); | |

/* Deep-copy an IRExpr. */ | |

extern IRExpr* deepCopyIRExpr ( const IRExpr* ); | |

/* Pretty-print an IRExpr. */ | |

extern void ppIRExpr ( const IRExpr* ); | |

/* NULL-terminated IRExpr vector constructors, suitable for | |

use as arg lists in clean/dirty helper calls. */ | |

extern IRExpr** mkIRExprVec_0 ( void ); | |

extern IRExpr** mkIRExprVec_1 ( IRExpr* ); | |

extern IRExpr** mkIRExprVec_2 ( IRExpr*, IRExpr* ); | |

extern IRExpr** mkIRExprVec_3 ( IRExpr*, IRExpr*, IRExpr* ); | |

extern IRExpr** mkIRExprVec_4 ( IRExpr*, IRExpr*, IRExpr*, IRExpr* ); | |

extern IRExpr** mkIRExprVec_5 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*, | |

IRExpr* ); | |

extern IRExpr** mkIRExprVec_6 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*, | |

IRExpr*, IRExpr* ); | |

extern IRExpr** mkIRExprVec_7 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*, | |

IRExpr*, IRExpr*, IRExpr* ); | |

extern IRExpr** mkIRExprVec_8 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*, | |

IRExpr*, IRExpr*, IRExpr*, IRExpr*); | |

/* IRExpr copiers: | |

- shallowCopy: shallow-copy (ie. create a new vector that shares the | |

elements with the original). | |

- deepCopy: deep-copy (ie. create a completely new vector). */ | |

extern IRExpr** shallowCopyIRExprVec ( IRExpr** ); | |

extern IRExpr** deepCopyIRExprVec ( IRExpr *const * ); | |

/* Make a constant expression from the given host word taking into | |

account (of course) the host word size. */ | |

extern IRExpr* mkIRExpr_HWord ( HWord ); | |

/* Convenience function for constructing clean helper calls. */ | |

extern | |

IRExpr* mkIRExprCCall ( IRType retty, | |

Int regparms, const HChar* name, void* addr, | |

IRExpr** args ); | |

/* Convenience functions for atoms (IRExprs which are either Iex_Tmp or | |

* Iex_Const). */ | |

static inline Bool isIRAtom ( const IRExpr* e ) { | |

return toBool(e->tag == Iex_RdTmp || e->tag == Iex_Const); | |

} | |

/* Are these two IR atoms identical? Causes an assertion | |

failure if they are passed non-atoms. */ | |

extern Bool eqIRAtom ( const IRExpr*, const IRExpr* ); | |

/* ------------------ Jump kinds ------------------ */ | |

/* This describes hints which can be passed to the dispatcher at guest | |

control-flow transfer points. | |

Re Ijk_InvalICache and Ijk_FlushDCache: the guest state _must_ have | |

two pseudo-registers, guest_CMSTART and guest_CMLEN, which specify | |

the start and length of the region to be invalidated. CM stands | |

for "Cache Management". These are both the size of a guest word. | |

It is the responsibility of the relevant toIR.c to ensure that | |

these are filled in with suitable values before issuing a jump of | |

kind Ijk_InvalICache or Ijk_FlushDCache. | |

Ijk_InvalICache requests invalidation of translations taken from | |

the requested range. Ijk_FlushDCache requests flushing of the D | |

cache for the specified range. | |

Re Ijk_EmWarn and Ijk_EmFail: the guest state must have a | |

pseudo-register guest_EMNOTE, which is 32-bits regardless of the | |

host or guest word size. That register should be made to hold a | |

VexEmNote value to indicate the reason for the exit. | |

In the case of Ijk_EmFail, the exit is fatal (Vex-generated code | |

cannot continue) and so the jump destination can be anything. | |

Re Ijk_Sys_ (syscall jumps): the guest state must have a | |

pseudo-register guest_IP_AT_SYSCALL, which is the size of a guest | |

word. Front ends should set this to be the IP at the most recently | |

executed kernel-entering (system call) instruction. This makes it | |

very much easier (viz, actually possible at all) to back up the | |

guest to restart a syscall that has been interrupted by a signal. | |

*/ | |

typedef | |

enum { | |

Ijk_INVALID=0x1A00, | |

Ijk_Boring, /* not interesting; just goto next */ | |

Ijk_Call, /* guest is doing a call */ | |

Ijk_Ret, /* guest is doing a return */ | |

Ijk_ClientReq, /* do guest client req before continuing */ | |

Ijk_Yield, /* client is yielding to thread scheduler */ | |

Ijk_EmWarn, /* report emulation warning before continuing */ | |

Ijk_EmFail, /* emulation critical (FATAL) error; give up */ | |

Ijk_NoDecode, /* current instruction cannot be decoded */ | |

Ijk_MapFail, /* Vex-provided address translation failed */ | |

Ijk_InvalICache, /* Inval icache for range [CMSTART, +CMLEN) */ | |

Ijk_FlushDCache, /* Flush dcache for range [CMSTART, +CMLEN) */ | |

Ijk_NoRedir, /* Jump to un-redirected guest addr */ | |

Ijk_SigILL, /* current instruction synths SIGILL */ | |

Ijk_SigTRAP, /* current instruction synths SIGTRAP */ | |

Ijk_SigSEGV, /* current instruction synths SIGSEGV */ | |

Ijk_SigBUS, /* current instruction synths SIGBUS */ | |

Ijk_SigFPE_IntDiv, /* current instruction synths SIGFPE - IntDiv */ | |

Ijk_SigFPE_IntOvf, /* current instruction synths SIGFPE - IntOvf */ | |

/* Unfortunately, various guest-dependent syscall kinds. They | |

all mean: do a syscall before continuing. */ | |

Ijk_Sys_syscall, /* amd64/x86 'syscall', ppc 'sc', arm 'svc #0' */ | |

Ijk_Sys_int32, /* amd64/x86 'int $0x20' */ | |

Ijk_Sys_int128, /* amd64/x86 'int $0x80' */ | |

Ijk_Sys_int129, /* amd64/x86 'int $0x81' */ | |

Ijk_Sys_int130, /* amd64/x86 'int $0x82' */ | |

Ijk_Sys_sysenter /* x86 'sysenter'. guest_EIP becomes | |

invalid at the point this happens. */ | |

} | |

IRJumpKind; | |

extern void ppIRJumpKind ( IRJumpKind ); | |

/* ------------------ Dirty helper calls ------------------ */ | |

/* A dirty call is a flexible mechanism for calling (possibly | |

conditionally) a helper function or procedure. The helper function | |

may read, write or modify client memory, and may read, write or | |

modify client state. It can take arguments and optionally return a | |

value. It may return different results and/or do different things | |

when called repeatedly with the same arguments, by means of storing | |

private state. | |

If a value is returned, it is assigned to the nominated return | |

temporary. | |

Dirty calls are statements rather than expressions for obvious | |

reasons. If a dirty call is marked as writing guest state, any | |

pre-existing values derived from the written parts of the guest | |

state are invalid. Similarly, if the dirty call is stated as | |

writing memory, any pre-existing loaded values are invalidated by | |

it. | |

In order that instrumentation is possible, the call must state, and | |

state correctly: | |

* Whether it reads, writes or modifies memory, and if so where. | |

* Whether it reads, writes or modifies guest state, and if so which | |

pieces. Several pieces may be stated, and their extents must be | |

known at translation-time. Each piece is allowed to repeat some | |

number of times at a fixed interval, if required. | |

Normally, code is generated to pass just the args to the helper. | |

However, if IRExpr_BBPTR() is present in the argument list (at most | |

one instance is allowed), then the baseblock pointer is passed for | |

that arg, so that the callee can access the guest state. It is | |

invalid for .nFxState to be zero but IRExpr_BBPTR() to be present, | |

since .nFxState==0 is a claim that the call does not access guest | |

state. | |

IMPORTANT NOTE re GUARDS: Dirty calls are strict, very strict. The | |

arguments and 'mFx' are evaluated REGARDLESS of the guard value. | |

The order of argument evaluation is unspecified. The guard | |

expression is evaluated AFTER the arguments and 'mFx' have been | |

evaluated. 'mFx' is expected (by Memcheck) to be a defined value | |

even if the guard evaluates to false. | |

*/ | |

#define VEX_N_FXSTATE 7 /* enough for FXSAVE/FXRSTOR on x86 */ | |

/* Effects on resources (eg. registers, memory locations) */ | |

typedef | |

enum { | |

Ifx_None=0x1B00, /* no effect */ | |

Ifx_Read, /* reads the resource */ | |

Ifx_Write, /* writes the resource */ | |

Ifx_Modify, /* modifies the resource */ | |

} | |

IREffect; | |

/* Pretty-print an IREffect */ | |

extern void ppIREffect ( IREffect ); | |

typedef | |

struct _IRDirty { | |

/* What to call, and details of args/results. .guard must be | |

non-NULL. If .tmp is not IRTemp_INVALID, then the call | |

returns a result which is placed in .tmp. If at runtime the | |

guard evaluates to false, .tmp has an 0x555..555 bit pattern | |

written to it. Hence conditional calls that assign .tmp are | |

allowed. */ | |

IRCallee* cee; /* where to call */ | |

IRExpr* guard; /* :: Ity_Bit. Controls whether call happens */ | |

/* The args vector may contain IRExpr_BBPTR() and/or | |

IRExpr_VECRET(), in both cases, at most once. */ | |

IRExpr** args; /* arg vector, ends in NULL. */ | |

IRTemp tmp; /* to assign result to, or IRTemp_INVALID if none */ | |

/* Mem effects; we allow only one R/W/M region to be stated */ | |

IREffect mFx; /* indicates memory effects, if any */ | |

IRExpr* mAddr; /* of access, or NULL if mFx==Ifx_None */ | |

Int mSize; /* of access, or zero if mFx==Ifx_None */ | |

/* Guest state effects; up to N allowed */ | |

Int nFxState; /* must be 0 .. VEX_N_FXSTATE */ | |

struct { | |

IREffect fx:16; /* read, write or modify? Ifx_None is invalid. */ | |

UShort offset; | |

UShort size; | |

UChar nRepeats; | |

UChar repeatLen; | |

} fxState[VEX_N_FXSTATE]; | |

/* The access can be repeated, as specified by nRepeats and | |

repeatLen. To describe only a single access, nRepeats and | |

repeatLen should be zero. Otherwise, repeatLen must be a | |

multiple of size and greater than size. */ | |

/* Overall, the parts of the guest state denoted by (offset, | |

size, nRepeats, repeatLen) is | |

[offset, +size) | |

and, if nRepeats > 0, | |

for (i = 1; i <= nRepeats; i++) | |

[offset + i * repeatLen, +size) | |

A convenient way to enumerate all segments is therefore | |

for (i = 0; i < 1 + nRepeats; i++) | |

[offset + i * repeatLen, +size) | |

*/ | |

} | |

IRDirty; | |

/* Pretty-print a dirty call */ | |

extern void ppIRDirty ( const IRDirty* ); | |

/* Allocate an uninitialised dirty call */ | |

extern IRDirty* emptyIRDirty ( void ); | |

/* Deep-copy a dirty call */ | |

extern IRDirty* deepCopyIRDirty ( const IRDirty* ); | |

/* A handy function which takes some of the tedium out of constructing | |

dirty helper calls. The called function impliedly does not return | |

any value and has a constant-True guard. The call is marked as | |

accessing neither guest state nor memory (hence the "unsafe" | |

designation) -- you can change this marking later if need be. A | |

suitable IRCallee is constructed from the supplied bits. */ | |

extern | |

IRDirty* unsafeIRDirty_0_N ( Int regparms, const HChar* name, void* addr, | |

IRExpr** args ); | |

/* Similarly, make a zero-annotation dirty call which returns a value, | |

and assign that to the given temp. */ | |

extern | |

IRDirty* unsafeIRDirty_1_N ( IRTemp dst, | |

Int regparms, const HChar* name, void* addr, | |

IRExpr** args ); | |

/* --------------- Memory Bus Events --------------- */ | |

typedef | |

enum { | |

Imbe_Fence=0x1C00, | |

/* Needed only on ARM. It cancels a reservation made by a | |

preceding Linked-Load, and needs to be handed through to the | |

back end, just as LL and SC themselves are. */ | |

Imbe_CancelReservation | |

} | |

IRMBusEvent; | |

extern void ppIRMBusEvent ( IRMBusEvent ); | |

/* --------------- Compare and Swap --------------- */ | |

/* This denotes an atomic compare and swap operation, either | |

a single-element one or a double-element one. | |

In the single-element case: | |

.addr is the memory address. | |

.end is the endianness with which memory is accessed | |

If .addr contains the same value as .expdLo, then .dataLo is | |

written there, else there is no write. In both cases, the | |

original value at .addr is copied into .oldLo. | |

Types: .expdLo, .dataLo and .oldLo must all have the same type. | |

It may be any integral type, viz: I8, I16, I32 or, for 64-bit | |

guests, I64. | |

.oldHi must be IRTemp_INVALID, and .expdHi and .dataHi must | |

be NULL. | |

In the double-element case: | |

.addr is the memory address. | |

.end is the endianness with which memory is accessed | |

The operation is the same: | |

If .addr contains the same value as .expdHi:.expdLo, then | |

.dataHi:.dataLo is written there, else there is no write. In | |

both cases the original value at .addr is copied into | |

.oldHi:.oldLo. | |

Types: .expdHi, .expdLo, .dataHi, .dataLo, .oldHi, .oldLo must | |

all have the same type, which may be any integral type, viz: I8, | |

I16, I32 or, for 64-bit guests, I64. | |

The double-element case is complicated by the issue of | |

endianness. In all cases, the two elements are understood to be | |

located adjacently in memory, starting at the address .addr. | |

If .end is Iend_LE, then the .xxxLo component is at the lower | |

address and the .xxxHi component is at the higher address, and | |

each component is itself stored little-endianly. | |

If .end is Iend_BE, then the .xxxHi component is at the lower | |

address and the .xxxLo component is at the higher address, and | |

each component is itself stored big-endianly. | |

This allows representing more cases than most architectures can | |

handle. For example, x86 cannot do DCAS on 8- or 16-bit elements. | |

How to know if the CAS succeeded? | |

* if .oldLo == .expdLo (resp. .oldHi:.oldLo == .expdHi:.expdLo), | |

then the CAS succeeded, .dataLo (resp. .dataHi:.dataLo) is now | |

stored at .addr, and the original value there was .oldLo (resp | |

.oldHi:.oldLo). | |

* if .oldLo != .expdLo (resp. .oldHi:.oldLo != .expdHi:.expdLo), | |

then the CAS failed, and the original value at .addr was .oldLo | |

(resp. .oldHi:.oldLo). | |

Hence it is easy to know whether or not the CAS succeeded. | |

*/ | |

typedef | |

struct { | |

IRTemp oldHi; /* old value of *addr is written here */ | |

IRTemp oldLo; | |

IREndness end; /* endianness of the data in memory */ | |

IRExpr* addr; /* store address */ | |

IRExpr* expdHi; /* expected old value at *addr */ | |

IRExpr* expdLo; | |

IRExpr* dataHi; /* new value for *addr */ | |

IRExpr* dataLo; | |

} | |

IRCAS; | |

extern void ppIRCAS ( const IRCAS* cas ); | |

extern IRCAS* mkIRCAS ( IRTemp oldHi, IRTemp oldLo, | |

IREndness end, IRExpr* addr, | |

IRExpr* expdHi, IRExpr* expdLo, | |

IRExpr* dataHi, IRExpr* dataLo ); | |

extern IRCAS* deepCopyIRCAS ( const IRCAS* ); | |

/* ------------------ Circular Array Put ------------------ */ | |

typedef | |

struct { | |

IRRegArray* descr; /* Part of guest state treated as circular */ | |

IRExpr* ix; /* Variable part of index into array */ | |

Int bias; /* Constant offset part of index into array */ | |

IRExpr* data; /* The value to write */ | |

} IRPutI; | |

extern void ppIRPutI ( const IRPutI* puti ); | |

extern IRPutI* mkIRPutI ( IRRegArray* descr, IRExpr* ix, | |

Int bias, IRExpr* data ); | |

extern IRPutI* deepCopyIRPutI ( const IRPutI* ); | |

/* --------------- Guarded loads and stores --------------- */ | |

/* Conditional stores are straightforward. They are the same as | |

normal stores, with an extra 'guard' field :: Ity_I1 that | |

determines whether or not the store actually happens. If not, | |

memory is unmodified. | |

The semantics of this is that 'addr' and 'data' are fully evaluated | |

even in the case where 'guard' evaluates to zero (false). | |

*/ | |

typedef | |

struct { | |

IREndness end; /* Endianness of the store */ | |

IRExpr* addr; /* store address */ | |

IRExpr* data; /* value to write */ | |

IRExpr* guard; /* Guarding value */ | |

} | |

IRStoreG; | |

/* Conditional loads are a little more complex. 'addr' is the | |

address, 'guard' is the guarding condition. If the load takes | |

place, the loaded value is placed in 'dst'. If it does not take | |

place, 'alt' is copied to 'dst'. However, the loaded value is not | |

placed directly in 'dst' -- it is first subjected to the conversion | |

specified by 'cvt'. | |

For example, imagine doing a conditional 8-bit load, in which the | |

loaded value is zero extended to 32 bits. Hence: | |

* 'dst' and 'alt' must have type I32 | |

* 'cvt' must be a unary op which converts I8 to I32. In this | |

example, it would be ILGop_8Uto32. | |

There is no explicit indication of the type at which the load is | |

done, since that is inferrable from the arg type of 'cvt'. Note | |

that the types of 'alt' and 'dst' and the result type of 'cvt' must | |

all be the same. | |

Semantically, 'addr' is evaluated even in the case where 'guard' | |

evaluates to zero (false), and 'alt' is evaluated even when 'guard' | |

evaluates to one (true). That is, 'addr' and 'alt' are always | |

evaluated. | |

*/ | |

typedef | |

enum { | |

ILGop_INVALID=0x1D00, | |

ILGop_Ident32, /* 32 bit, no conversion */ | |

ILGop_16Uto32, /* 16 bit load, Z-widen to 32 */ | |

ILGop_16Sto32, /* 16 bit load, S-widen to 32 */ | |

ILGop_8Uto32, /* 8 bit load, Z-widen to 32 */ | |

ILGop_8Sto32 /* 8 bit load, S-widen to 32 */ | |

} | |

IRLoadGOp; | |

typedef | |

struct { | |

IREndness end; /* Endianness of the load */ | |

IRLoadGOp cvt; /* Conversion to apply to the loaded value */ | |

IRTemp dst; /* Destination (LHS) of assignment */ | |

IRExpr* addr; /* Address being loaded from */ | |

IRExpr* alt; /* Value if load is not done. */ | |

IRExpr* guard; /* Guarding value */ | |

} | |

IRLoadG; | |

extern void ppIRStoreG ( const IRStoreG* sg ); | |

extern void ppIRLoadGOp ( IRLoadGOp cvt ); | |

extern void ppIRLoadG ( const IRLoadG* lg ); | |

extern IRStoreG* mkIRStoreG ( IREndness end, | |

IRExpr* addr, IRExpr* data, | |

IRExpr* guard ); | |

extern IRLoadG* mkIRLoadG ( IREndness end, IRLoadGOp cvt, | |

IRTemp dst, IRExpr* addr, IRExpr* alt, | |

IRExpr* guard ); | |

/* ------------------ Statements ------------------ */ | |

/* The different kinds of statements. Their meaning is explained | |

below in the comments for IRStmt. | |

Those marked META do not represent code, but rather extra | |

information about the code. These statements can be removed | |

without affecting the functional behaviour of the code, however | |

they are required by some IR consumers such as tools that | |

instrument the code. | |

*/ | |

typedef | |

enum { | |

Ist_NoOp=0x1E00, | |

Ist_IMark, /* META */ | |

Ist_AbiHint, /* META */ | |

Ist_Put, | |

Ist_PutI, | |

Ist_WrTmp, | |

Ist_Store, | |

Ist_LoadG, | |

Ist_StoreG, | |

Ist_CAS, | |

Ist_LLSC, | |

Ist_Dirty, | |

Ist_MBE, | |

Ist_Exit | |

} | |

IRStmtTag; | |

/* A statement. Stored as a tagged union. 'tag' indicates what kind | |

of expression this is. 'Ist' is the union that holds the fields. | |

If an IRStmt 'st' has st.tag equal to Iex_Store, then it's a store | |

statement, and the fields can be accessed with | |

'st.Ist.Store.<fieldname>'. | |

For each kind of statement, we show what it looks like when | |

pretty-printed with ppIRStmt(). | |

*/ | |

typedef | |

struct _IRStmt { | |

IRStmtTag tag; | |

union { | |

/* A no-op (usually resulting from IR optimisation). Can be | |

omitted without any effect. | |

ppIRStmt output: IR-NoOp | |

*/ | |

struct { | |

} NoOp; | |

/* META: instruction mark. Marks the start of the statements | |

that represent a single machine instruction (the end of | |

those statements is marked by the next IMark or the end of | |

the IRSB). Contains the address and length of the | |

instruction. | |

It also contains a delta value. The delta must be | |

subtracted from a guest program counter value before | |

attempting to establish, by comparison with the address | |

and length values, whether or not that program counter | |

value refers to this instruction. For x86, amd64, ppc32, | |

ppc64 and arm, the delta value is zero. For Thumb | |

instructions, the delta value is one. This is because, on | |

Thumb, guest PC values (guest_R15T) are encoded using the | |

top 31 bits of the instruction address and a 1 in the lsb; | |

hence they appear to be (numerically) 1 past the start of | |

the instruction they refer to. IOW, guest_R15T on ARM | |

holds a standard ARM interworking address. | |

ppIRStmt output: ------ IMark(<addr>, <len>, <delta>) ------, | |

eg. ------ IMark(0x4000792, 5, 0) ------, | |

*/ | |

struct { | |

Addr64 addr; /* instruction address */ | |

Int len; /* instruction length */ | |

UChar delta; /* addr = program counter as encoded in guest state | |

- delta */ | |

} IMark; | |

/* META: An ABI hint, which says something about this | |

platform's ABI. | |

At the moment, the only AbiHint is one which indicates | |

that a given chunk of address space, [base .. base+len-1], | |

has become undefined. This is used on amd64-linux and | |

some ppc variants to pass stack-redzoning hints to whoever | |

wants to see them. It also indicates the address of the | |

next (dynamic) instruction that will be executed. This is | |

to help Memcheck to origin tracking. | |

ppIRStmt output: ====== AbiHint(<base>, <len>, <nia>) ====== | |

eg. ====== AbiHint(t1, 16, t2) ====== | |

*/ | |

struct { | |

IRExpr* base; /* Start of undefined chunk */ | |

Int len; /* Length of undefined chunk */ | |

IRExpr* nia; /* Address of next (guest) insn */ | |

} AbiHint; | |

/* Write a guest register, at a fixed offset in the guest state. | |

ppIRStmt output: PUT(<offset>) = <data>, eg. PUT(60) = t1 | |

*/ | |

struct { | |

Int offset; /* Offset into the guest state */ | |

IRExpr* data; /* The value to write */ | |

} Put; | |

/* Write a guest register, at a non-fixed offset in the guest | |

state. See the comment for GetI expressions for more | |

information. | |

ppIRStmt output: PUTI<descr>[<ix>,<bias>] = <data>, | |

eg. PUTI(64:8xF64)[t5,0] = t1 | |

*/ | |

struct { | |

IRPutI* details; | |

} PutI; | |

/* Assign a value to a temporary. Note that SSA rules require | |

each tmp is only assigned to once. IR sanity checking will | |

reject any block containing a temporary which is not assigned | |

to exactly once. | |

ppIRStmt output: t<tmp> = <data>, eg. t1 = 3 | |

*/ | |

struct { | |

IRTemp tmp; /* Temporary (LHS of assignment) */ | |

IRExpr* data; /* Expression (RHS of assignment) */ | |

} WrTmp; | |

/* Write a value to memory. This is a normal store, not a | |

Store-Conditional. To represent a Store-Conditional, | |

instead use IRStmt.LLSC. | |

ppIRStmt output: ST<end>(<addr>) = <data>, eg. STle(t1) = t2 | |

*/ | |

struct { | |

IREndness end; /* Endianness of the store */ | |

IRExpr* addr; /* store address */ | |

IRExpr* data; /* value to write */ | |

} Store; | |

/* Guarded store. Note that this is defined to evaluate all | |

expression fields (addr, data) even if the guard evaluates | |

to false. | |

ppIRStmt output: | |

if (<guard>) ST<end>(<addr>) = <data> */ | |

struct { | |

IRStoreG* details; | |

} StoreG; | |

/* Guarded load. Note that this is defined to evaluate all | |

expression fields (addr, alt) even if the guard evaluates | |

to false. | |

ppIRStmt output: | |

t<tmp> = if (<guard>) <cvt>(LD<end>(<addr>)) else <alt> */ | |

struct { | |

IRLoadG* details; | |

} LoadG; | |

/* Do an atomic compare-and-swap operation. Semantics are | |

described above on a comment at the definition of IRCAS. | |

ppIRStmt output: | |

t<tmp> = CAS<end>(<addr> :: <expected> -> <new>) | |

eg | |

t1 = CASle(t2 :: t3->Add32(t3,1)) | |

which denotes a 32-bit atomic increment | |

of a value at address t2 | |

A double-element CAS may also be denoted, in which case <tmp>, | |

<expected> and <new> are all pairs of items, separated by | |

commas. | |

*/ | |

struct { | |

IRCAS* details; | |

} CAS; | |

/* Either Load-Linked or Store-Conditional, depending on | |

STOREDATA. | |

If STOREDATA is NULL then this is a Load-Linked, meaning | |

that data is loaded from memory as normal, but a | |

'reservation' for the address is also lodged in the | |

hardware. | |

result = Load-Linked(addr, end) | |

The data transfer type is the type of RESULT (I32, I64, | |

etc). ppIRStmt output: | |

result = LD<end>-Linked(<addr>), eg. LDbe-Linked(t1) | |

If STOREDATA is not NULL then this is a Store-Conditional, | |

hence: | |

result = Store-Conditional(addr, storedata, end) | |

The data transfer type is the type of STOREDATA and RESULT | |

has type Ity_I1. The store may fail or succeed depending | |

on the state of a previously lodged reservation on this | |

address. RESULT is written 1 if the store succeeds and 0 | |

if it fails. eg ppIRStmt output: | |

result = ( ST<end>-Cond(<addr>) = <storedata> ) | |

eg t3 = ( STbe-Cond(t1, t2) ) | |

In all cases, the address must be naturally aligned for | |

the transfer type -- any misaligned addresses should be | |

caught by a dominating IR check and side exit. This | |

alignment restriction exists because on at least some | |

LL/SC platforms (ppc), stwcx. etc will trap w/ SIGBUS on | |

misaligned addresses, and we have to actually generate | |

stwcx. on the host, and we don't want it trapping on the | |

host. | |

Summary of rules for transfer type: | |

STOREDATA == NULL (LL): | |

transfer type = type of RESULT | |

STOREDATA != NULL (SC): | |

transfer type = type of STOREDATA, and RESULT :: Ity_I1 | |

*/ | |

struct { | |

IREndness end; | |

IRTemp result; | |

IRExpr* addr; | |

IRExpr* storedata; /* NULL => LL, non-NULL => SC */ | |

} LLSC; | |

/* Call (possibly conditionally) a C function that has side | |

effects (ie. is "dirty"). See the comments above the | |

IRDirty type declaration for more information. | |

ppIRStmt output: | |

t<tmp> = DIRTY <guard> <effects> | |

::: <callee>(<args>) | |

eg. | |

t1 = DIRTY t27 RdFX-gst(16,4) RdFX-gst(60,4) | |

::: foo{0x380035f4}(t2) | |

*/ | |

struct { | |

IRDirty* details; | |

} Dirty; | |

/* A memory bus event - a fence, or acquisition/release of the | |

hardware bus lock. IR optimisation treats all these as fences | |

across which no memory references may be moved. | |

ppIRStmt output: MBusEvent-Fence, | |

MBusEvent-BusLock, MBusEvent-BusUnlock. | |

*/ | |

struct { | |

IRMBusEvent event; | |

} MBE; | |

/* Conditional exit from the middle of an IRSB. | |

ppIRStmt output: if (<guard>) goto {<jk>} <dst> | |

eg. if (t69) goto {Boring} 0x4000AAA:I32 | |

If <guard> is true, the guest state is also updated by | |

PUT-ing <dst> at <offsIP>. This is done because a | |

taken exit must update the guest program counter. | |

*/ | |

struct { | |

IRExpr* guard; /* Conditional expression */ | |

IRConst* dst; /* Jump target (constant only) */ | |

IRJumpKind jk; /* Jump kind */ | |

Int offsIP; /* Guest state offset for IP */ | |

} Exit; | |

} Ist; | |

} | |

IRStmt; | |

/* Statement constructors. */ | |

extern IRStmt* IRStmt_NoOp ( void ); | |

extern IRStmt* IRStmt_IMark ( Addr64 addr, Int len, UChar delta ); | |

extern IRStmt* IRStmt_AbiHint ( IRExpr* base, Int len, IRExpr* nia ); | |

extern IRStmt* IRStmt_Put ( Int off, IRExpr* data ); | |

extern IRStmt* IRStmt_PutI ( IRPutI* details ); | |

extern IRStmt* IRStmt_WrTmp ( IRTemp tmp, IRExpr* data ); | |

extern IRStmt* IRStmt_Store ( IREndness end, IRExpr* addr, IRExpr* data ); | |

extern IRStmt* IRStmt_StoreG ( IREndness end, IRExpr* addr, IRExpr* data, | |

IRExpr* guard ); | |

extern IRStmt* IRStmt_LoadG ( IREndness end, IRLoadGOp cvt, IRTemp dst, | |

IRExpr* addr, IRExpr* alt, IRExpr* guard ); | |

extern IRStmt* IRStmt_CAS ( IRCAS* details ); | |

extern IRStmt* IRStmt_LLSC ( IREndness end, IRTemp result, | |

IRExpr* addr, IRExpr* storedata ); | |

extern IRStmt* IRStmt_Dirty ( IRDirty* details ); | |

extern IRStmt* IRStmt_MBE ( IRMBusEvent event ); | |

extern IRStmt* IRStmt_Exit ( IRExpr* guard, IRJumpKind jk, IRConst* dst, | |

Int offsIP ); | |

/* Deep-copy an IRStmt. */ | |

extern IRStmt* deepCopyIRStmt ( const IRStmt* ); | |

/* Pretty-print an IRStmt. */ | |

extern void ppIRStmt ( const IRStmt* ); | |

/* ------------------ Basic Blocks ------------------ */ | |

/* Type environments: a bunch of statements, expressions, etc, are | |

incomplete without an environment indicating the type of each | |

IRTemp. So this provides one. IR temporaries are really just | |

unsigned ints and so this provides an array, 0 .. n_types_used-1 of | |

them. | |

*/ | |

typedef | |

struct { | |

IRType* types; | |

Int types_size; | |

Int types_used; | |

} | |

IRTypeEnv; | |

/* Obtain a new IRTemp */ | |

extern IRTemp newIRTemp ( IRTypeEnv*, IRType ); | |

/* Deep-copy a type environment */ | |

extern IRTypeEnv* deepCopyIRTypeEnv ( const IRTypeEnv* ); | |

/* Pretty-print a type environment */ | |

extern void ppIRTypeEnv ( const IRTypeEnv* ); | |

/* Code blocks, which in proper compiler terminology are superblocks | |

(single entry, multiple exit code sequences) contain: | |

- A table giving a type for each temp (the "type environment") | |

- An expandable array of statements | |

- An expression of type 32 or 64 bits, depending on the | |

guest's word size, indicating the next destination if the block | |

executes all the way to the end, without a side exit | |

- An indication of any special actions (JumpKind) needed | |

for this final jump. | |

- Offset of the IP field in the guest state. This will be | |

updated before the final jump is done. | |

"IRSB" stands for "IR Super Block". | |

*/ | |

typedef | |

struct { | |

IRTypeEnv* tyenv; | |

IRStmt** stmts; | |

Int stmts_size; | |

Int stmts_used; | |

IRExpr* next; | |

IRJumpKind jumpkind; | |

Int offsIP; | |

} | |

IRSB; | |

/* Allocate a new, uninitialised IRSB */ | |

extern IRSB* emptyIRSB ( void ); | |

/* Deep-copy an IRSB */ | |

extern IRSB* deepCopyIRSB ( const IRSB* ); | |

/* Deep-copy an IRSB, except for the statements list, which set to be | |

a new, empty, list of statements. */ | |

extern IRSB* deepCopyIRSBExceptStmts ( const IRSB* ); | |

/* Pretty-print an IRSB */ | |

extern void ppIRSB ( const IRSB* ); | |

/* Append an IRStmt to an IRSB */ | |

extern void addStmtToIRSB ( IRSB*, IRStmt* ); | |

/*---------------------------------------------------------------*/ | |

/*--- Helper functions for the IR ---*/ | |

/*---------------------------------------------------------------*/ | |

/* For messing with IR type environments */ | |

extern IRTypeEnv* emptyIRTypeEnv ( void ); | |

/* What is the type of this expression? */ | |

extern IRType typeOfIRConst ( const IRConst* ); | |

extern IRType typeOfIRTemp ( const IRTypeEnv*, IRTemp ); | |

extern IRType typeOfIRExpr ( const IRTypeEnv*, const IRExpr* ); | |

/* What are the arg and result type for this IRLoadGOp? */ | |

extern void typeOfIRLoadGOp ( IRLoadGOp cvt, | |

/*OUT*/IRType* t_res, | |

/*OUT*/IRType* t_arg ); | |

/* Sanity check a BB of IR */ | |

extern void sanityCheckIRSB ( const IRSB* bb, | |

const HChar* caller, | |

Bool require_flatness, | |

IRType guest_word_size ); | |

extern Bool isFlatIRStmt ( const IRStmt* ); | |

/* Is this any value actually in the enumeration 'IRType' ? */ | |

extern Bool isPlausibleIRType ( IRType ty ); | |

/*---------------------------------------------------------------*/ | |

/*--- IR injection ---*/ | |

/*---------------------------------------------------------------*/ | |

void vex_inject_ir(IRSB *, IREndness); | |

#endif /* ndef __LIBVEX_IR_H */ | |

/*---------------------------------------------------------------*/ | |

/*--- libvex_ir.h ---*/ | |

/*---------------------------------------------------------------*/ |