blob: 2887b089588b662c136a939d782dda22f2c74153 [file] [log] [blame]
/*---------------------------------------------------------------*/
/*--- begin guest_amd64_helpers.c ---*/
/*---------------------------------------------------------------*/
/*
This file is part of Valgrind, a dynamic binary instrumentation
framework.
Copyright (C) 2004-2013 OpenWorks LLP
info@open-works.net
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA.
The GNU General Public License is contained in the file COPYING.
Neither the names of the U.S. Department of Energy nor the
University of California nor the names of its contributors may be
used to endorse or promote products derived from this software
without prior written permission.
*/
#include "libvex_basictypes.h"
#include "libvex_emnote.h"
#include "libvex_guest_amd64.h"
#include "libvex_ir.h"
#include "libvex.h"
#include "main_util.h"
#include "main_globals.h"
#include "guest_generic_bb_to_IR.h"
#include "guest_amd64_defs.h"
#include "guest_generic_x87.h"
/* This file contains helper functions for amd64 guest code.
Calls to these functions are generated by the back end.
These calls are of course in the host machine code and
this file will be compiled to host machine code, so that
all makes sense.
Only change the signatures of these helper functions very
carefully. If you change the signature here, you'll have to change
the parameters passed to it in the IR calls constructed by
guest-amd64/toIR.c.
The convention used is that all functions called from generated
code are named amd64g_<something>, and any function whose name lacks
that prefix is not called from generated code. Note that some
LibVEX_* functions can however be called by VEX's client, but that
is not the same as calling them from VEX-generated code.
*/
/* Set to 1 to get detailed profiling info about use of the flag
machinery. */
#define PROFILE_RFLAGS 0
/*---------------------------------------------------------------*/
/*--- %rflags run-time helpers. ---*/
/*---------------------------------------------------------------*/
/* Do 64x64 -> 128 signed/unsigned multiplies, for computing flags
after imulq/mulq. */
static void mullS64 ( Long u, Long v, Long* rHi, Long* rLo )
{
ULong u0, v0, w0;
Long u1, v1, w1, w2, t;
u0 = u & 0xFFFFFFFFULL;
u1 = u >> 32;
v0 = v & 0xFFFFFFFFULL;
v1 = v >> 32;
w0 = u0 * v0;
t = u1 * v0 + (w0 >> 32);
w1 = t & 0xFFFFFFFFULL;
w2 = t >> 32;
w1 = u0 * v1 + w1;
*rHi = u1 * v1 + w2 + (w1 >> 32);
*rLo = u * v;
}
static void mullU64 ( ULong u, ULong v, ULong* rHi, ULong* rLo )
{
ULong u0, v0, w0;
ULong u1, v1, w1,w2,t;
u0 = u & 0xFFFFFFFFULL;
u1 = u >> 32;
v0 = v & 0xFFFFFFFFULL;
v1 = v >> 32;
w0 = u0 * v0;
t = u1 * v0 + (w0 >> 32);
w1 = t & 0xFFFFFFFFULL;
w2 = t >> 32;
w1 = u0 * v1 + w1;
*rHi = u1 * v1 + w2 + (w1 >> 32);
*rLo = u * v;
}
static const UChar parity_table[256] = {
AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
};
/* generalised left-shifter */
static inline Long lshift ( Long x, Int n )
{
if (n >= 0)
return (ULong)x << n;
else
return x >> (-n);
}
/* identity on ULong */
static inline ULong idULong ( ULong x )
{
return x;
}
#define PREAMBLE(__data_bits) \
/* const */ ULong DATA_MASK \
= __data_bits==8 \
? 0xFFULL \
: (__data_bits==16 \
? 0xFFFFULL \
: (__data_bits==32 \
? 0xFFFFFFFFULL \
: 0xFFFFFFFFFFFFFFFFULL)); \
/* const */ ULong SIGN_MASK = 1ULL << (__data_bits - 1); \
/* const */ ULong CC_DEP1 = cc_dep1_formal; \
/* const */ ULong CC_DEP2 = cc_dep2_formal; \
/* const */ ULong CC_NDEP = cc_ndep_formal; \
/* Four bogus assignments, which hopefully gcc can */ \
/* optimise away, and which stop it complaining about */ \
/* unused variables. */ \
SIGN_MASK = SIGN_MASK; \
DATA_MASK = DATA_MASK; \
CC_DEP2 = CC_DEP2; \
CC_NDEP = CC_NDEP;
/*-------------------------------------------------------------*/
#define ACTIONS_ADD(DATA_BITS,DATA_UTYPE) \
{ \
PREAMBLE(DATA_BITS); \
{ ULong cf, pf, af, zf, sf, of; \
ULong argL, argR, res; \
argL = CC_DEP1; \
argR = CC_DEP2; \
res = argL + argR; \
cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
pf = parity_table[(UChar)res]; \
af = (res ^ argL ^ argR) & 0x10; \
zf = ((DATA_UTYPE)res == 0) << 6; \
sf = lshift(res, 8 - DATA_BITS) & 0x80; \
of = lshift((argL ^ argR ^ -1) & (argL ^ res), \
12 - DATA_BITS) & AMD64G_CC_MASK_O; \
return cf | pf | af | zf | sf | of; \
} \
}
/*-------------------------------------------------------------*/
#define ACTIONS_SUB(DATA_BITS,DATA_UTYPE) \
{ \
PREAMBLE(DATA_BITS); \
{ ULong cf, pf, af, zf, sf, of; \
ULong argL, argR, res; \
argL = CC_DEP1; \
argR = CC_DEP2; \
res = argL - argR; \
cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \
pf = parity_table[(UChar)res]; \
af = (res ^ argL ^ argR) & 0x10; \
zf = ((DATA_UTYPE)res == 0) << 6; \
sf = lshift(res, 8 - DATA_BITS) & 0x80; \
of = lshift((argL ^ argR) & (argL ^ res), \
12 - DATA_BITS) & AMD64G_CC_MASK_O; \
return cf | pf | af | zf | sf | of; \
} \
}
/*-------------------------------------------------------------*/
#define ACTIONS_ADC(DATA_BITS,DATA_UTYPE) \
{ \
PREAMBLE(DATA_BITS); \
{ ULong cf, pf, af, zf, sf, of; \
ULong argL, argR, oldC, res; \
oldC = CC_NDEP & AMD64G_CC_MASK_C; \
argL = CC_DEP1; \
argR = CC_DEP2 ^ oldC; \
res = (argL + argR) + oldC; \
if (oldC) \
cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL; \
else \
cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
pf = parity_table[(UChar)res]; \
af = (res ^ argL ^ argR) & 0x10; \
zf = ((DATA_UTYPE)res == 0) << 6; \
sf = lshift(res, 8 - DATA_BITS) & 0x80; \
of = lshift((argL ^ argR ^ -1) & (argL ^ res), \
12 - DATA_BITS) & AMD64G_CC_MASK_O; \
return cf | pf | af | zf | sf | of; \
} \
}
/*-------------------------------------------------------------*/
#define ACTIONS_SBB(DATA_BITS,DATA_UTYPE) \
{ \
PREAMBLE(DATA_BITS); \
{ ULong cf, pf, af, zf, sf, of; \
ULong argL, argR, oldC, res; \
oldC = CC_NDEP & AMD64G_CC_MASK_C; \
argL = CC_DEP1; \
argR = CC_DEP2 ^ oldC; \
res = (argL - argR) - oldC; \
if (oldC) \
cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR; \
else \
cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \
pf = parity_table[(UChar)res]; \
af = (res ^ argL ^ argR) & 0x10; \
zf = ((DATA_UTYPE)res == 0) << 6; \
sf = lshift(res, 8 - DATA_BITS) & 0x80; \
of = lshift((argL ^ argR) & (argL ^ res), \
12 - DATA_BITS) & AMD64G_CC_MASK_O; \
return cf | pf | af | zf | sf | of; \
} \
}
/*-------------------------------------------------------------*/
#define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE) \
{ \
PREAMBLE(DATA_BITS); \
{ ULong cf, pf, af, zf, sf, of; \
cf = 0; \
pf = parity_table[(UChar)CC_DEP1]; \
af = 0; \
zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
of = 0; \
return cf | pf | af | zf | sf | of; \
} \
}
/*-------------------------------------------------------------*/
#define ACTIONS_INC(DATA_BITS,DATA_UTYPE) \
{ \
PREAMBLE(DATA_BITS); \
{ ULong cf, pf, af, zf, sf, of; \
ULong argL, argR, res; \
res = CC_DEP1; \
argL = res - 1; \
argR = 1; \
cf = CC_NDEP & AMD64G_CC_MASK_C; \
pf = parity_table[(UChar)res]; \
af = (res ^ argL ^ argR) & 0x10; \
zf = ((DATA_UTYPE)res == 0) << 6; \
sf = lshift(res, 8 - DATA_BITS) & 0x80; \
of = ((res & DATA_MASK) == SIGN_MASK) << 11; \
return cf | pf | af | zf | sf | of; \
} \
}
/*-------------------------------------------------------------*/
#define ACTIONS_DEC(DATA_BITS,DATA_UTYPE) \
{ \
PREAMBLE(DATA_BITS); \
{ ULong cf, pf, af, zf, sf, of; \
ULong argL, argR, res; \
res = CC_DEP1; \
argL = res + 1; \
argR = 1; \
cf = CC_NDEP & AMD64G_CC_MASK_C; \
pf = parity_table[(UChar)res]; \
af = (res ^ argL ^ argR) & 0x10; \
zf = ((DATA_UTYPE)res == 0) << 6; \
sf = lshift(res, 8 - DATA_BITS) & 0x80; \
of = ((res & DATA_MASK) \
== ((ULong)SIGN_MASK - 1)) << 11; \
return cf | pf | af | zf | sf | of; \
} \
}
/*-------------------------------------------------------------*/
#define ACTIONS_SHL(DATA_BITS,DATA_UTYPE) \
{ \
PREAMBLE(DATA_BITS); \
{ ULong cf, pf, af, zf, sf, of; \
cf = (CC_DEP2 >> (DATA_BITS - 1)) & AMD64G_CC_MASK_C; \
pf = parity_table[(UChar)CC_DEP1]; \
af = 0; /* undefined */ \
zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
/* of is defined if shift count == 1 */ \
of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \
& AMD64G_CC_MASK_O; \
return cf | pf | af | zf | sf | of; \
} \
}
/*-------------------------------------------------------------*/
#define ACTIONS_SHR(DATA_BITS,DATA_UTYPE) \
{ \
PREAMBLE(DATA_BITS); \
{ ULong cf, pf, af, zf, sf, of; \
cf = CC_DEP2 & 1; \
pf = parity_table[(UChar)CC_DEP1]; \
af = 0; /* undefined */ \
zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
/* of is defined if shift count == 1 */ \
of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \
& AMD64G_CC_MASK_O; \
return cf | pf | af | zf | sf | of; \
} \
}
/*-------------------------------------------------------------*/
/* ROL: cf' = lsb(result). of' = msb(result) ^ lsb(result). */
/* DEP1 = result, NDEP = old flags */
#define ACTIONS_ROL(DATA_BITS,DATA_UTYPE) \
{ \
PREAMBLE(DATA_BITS); \
{ ULong fl \
= (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C)) \
| (AMD64G_CC_MASK_C & CC_DEP1) \
| (AMD64G_CC_MASK_O & (lshift(CC_DEP1, \
11-(DATA_BITS-1)) \
^ lshift(CC_DEP1, 11))); \
return fl; \
} \
}
/*-------------------------------------------------------------*/
/* ROR: cf' = msb(result). of' = msb(result) ^ msb-1(result). */
/* DEP1 = result, NDEP = old flags */
#define ACTIONS_ROR(DATA_BITS,DATA_UTYPE) \
{ \
PREAMBLE(DATA_BITS); \
{ ULong fl \
= (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C)) \
| (AMD64G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1))) \
| (AMD64G_CC_MASK_O & (lshift(CC_DEP1, \
11-(DATA_BITS-1)) \
^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1))); \
return fl; \
} \
}
/*-------------------------------------------------------------*/
#define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE, NARROWtoU, \
DATA_U2TYPE, NARROWto2U) \
{ \
PREAMBLE(DATA_BITS); \
{ ULong cf, pf, af, zf, sf, of; \
DATA_UTYPE hi; \
DATA_UTYPE lo \
= NARROWtoU( ((DATA_UTYPE)CC_DEP1) \
* ((DATA_UTYPE)CC_DEP2) ); \
DATA_U2TYPE rr \
= NARROWto2U( \
((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1)) \
* ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) ); \
hi = NARROWtoU(rr >>/*u*/ DATA_BITS); \
cf = (hi != 0); \
pf = parity_table[(UChar)lo]; \
af = 0; /* undefined */ \
zf = (lo == 0) << 6; \
sf = lshift(lo, 8 - DATA_BITS) & 0x80; \
of = cf << 11; \
return cf | pf | af | zf | sf | of; \
} \
}
/*-------------------------------------------------------------*/
#define ACTIONS_SMUL(DATA_BITS, DATA_STYPE, NARROWtoS, \
DATA_S2TYPE, NARROWto2S) \
{ \
PREAMBLE(DATA_BITS); \
{ ULong cf, pf, af, zf, sf, of; \
DATA_STYPE hi; \
DATA_STYPE lo \
= NARROWtoS( ((DATA_S2TYPE)(DATA_STYPE)CC_DEP1) \
* ((DATA_S2TYPE)(DATA_STYPE)CC_DEP2) ); \
DATA_S2TYPE rr \
= NARROWto2S( \
((DATA_S2TYPE)((DATA_STYPE)CC_DEP1)) \
* ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) ); \
hi = NARROWtoS(rr >>/*s*/ DATA_BITS); \
cf = (hi != (lo >>/*s*/ (DATA_BITS-1))); \
pf = parity_table[(UChar)lo]; \
af = 0; /* undefined */ \
zf = (lo == 0) << 6; \
sf = lshift(lo, 8 - DATA_BITS) & 0x80; \
of = cf << 11; \
return cf | pf | af | zf | sf | of; \
} \
}
/*-------------------------------------------------------------*/
#define ACTIONS_UMULQ \
{ \
PREAMBLE(64); \
{ ULong cf, pf, af, zf, sf, of; \
ULong lo, hi; \
mullU64( (ULong)CC_DEP1, (ULong)CC_DEP2, &hi, &lo ); \
cf = (hi != 0); \
pf = parity_table[(UChar)lo]; \
af = 0; /* undefined */ \
zf = (lo == 0) << 6; \
sf = lshift(lo, 8 - 64) & 0x80; \
of = cf << 11; \
return cf | pf | af | zf | sf | of; \
} \
}
/*-------------------------------------------------------------*/
#define ACTIONS_SMULQ \
{ \
PREAMBLE(64); \
{ ULong cf, pf, af, zf, sf, of; \
Long lo, hi; \
mullS64( (Long)CC_DEP1, (Long)CC_DEP2, &hi, &lo ); \
cf = (hi != (lo >>/*s*/ (64-1))); \
pf = parity_table[(UChar)lo]; \
af = 0; /* undefined */ \
zf = (lo == 0) << 6; \
sf = lshift(lo, 8 - 64) & 0x80; \
of = cf << 11; \
return cf | pf | af | zf | sf | of; \
} \
}
/*-------------------------------------------------------------*/
#define ACTIONS_ANDN(DATA_BITS,DATA_UTYPE) \
{ \
PREAMBLE(DATA_BITS); \
{ ULong cf, pf, af, zf, sf, of; \
cf = 0; \
pf = 0; \
af = 0; \
zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
of = 0; \
return cf | pf | af | zf | sf | of; \
} \
}
/*-------------------------------------------------------------*/
#define ACTIONS_BLSI(DATA_BITS,DATA_UTYPE) \
{ \
PREAMBLE(DATA_BITS); \
{ ULong cf, pf, af, zf, sf, of; \
cf = ((DATA_UTYPE)CC_DEP2 != 0); \
pf = 0; \
af = 0; \
zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
of = 0; \
return cf | pf | af | zf | sf | of; \
} \
}
/*-------------------------------------------------------------*/
#define ACTIONS_BLSMSK(DATA_BITS,DATA_UTYPE) \
{ \
PREAMBLE(DATA_BITS); \
{ Long cf, pf, af, zf, sf, of; \
cf = ((DATA_UTYPE)CC_DEP2 == 0); \
pf = 0; \
af = 0; \
zf = 0; \
sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
of = 0; \
return cf | pf | af | zf | sf | of; \
} \
}
/*-------------------------------------------------------------*/
#define ACTIONS_BLSR(DATA_BITS,DATA_UTYPE) \
{ \
PREAMBLE(DATA_BITS); \
{ ULong cf, pf, af, zf, sf, of; \
cf = ((DATA_UTYPE)CC_DEP2 == 0); \
pf = 0; \
af = 0; \
zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
of = 0; \
return cf | pf | af | zf | sf | of; \
} \
}
/*-------------------------------------------------------------*/
#if PROFILE_RFLAGS
static Bool initted = False;
/* C flag, fast route */
static UInt tabc_fast[AMD64G_CC_OP_NUMBER];
/* C flag, slow route */
static UInt tabc_slow[AMD64G_CC_OP_NUMBER];
/* table for calculate_cond */
static UInt tab_cond[AMD64G_CC_OP_NUMBER][16];
/* total entry counts for calc_all, calc_c, calc_cond. */
static UInt n_calc_all = 0;
static UInt n_calc_c = 0;
static UInt n_calc_cond = 0;
#define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond)))
static void showCounts ( void )
{
Int op, co;
HChar ch;
vex_printf("\nTotal calls: calc_all=%u calc_cond=%u calc_c=%u\n",
n_calc_all, n_calc_cond, n_calc_c);
vex_printf(" cSLOW cFAST O NO B NB Z NZ BE NBE"
" S NS P NP L NL LE NLE\n");
vex_printf(" -----------------------------------------------------"
"----------------------------------------\n");
for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) {
ch = ' ';
if (op > 0 && (op-1) % 4 == 0)
ch = 'B';
if (op > 0 && (op-1) % 4 == 1)
ch = 'W';
if (op > 0 && (op-1) % 4 == 2)
ch = 'L';
if (op > 0 && (op-1) % 4 == 3)
ch = 'Q';
vex_printf("%2d%c: ", op, ch);
vex_printf("%6u ", tabc_slow[op]);
vex_printf("%6u ", tabc_fast[op]);
for (co = 0; co < 16; co++) {
Int n = tab_cond[op][co];
if (n >= 1000) {
vex_printf(" %3dK", n / 1000);
} else
if (n >= 0) {
vex_printf(" %3d ", n );
} else {
vex_printf(" ");
}
}
vex_printf("\n");
}
vex_printf("\n");
}
static void initCounts ( void )
{
Int op, co;
initted = True;
for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) {
tabc_fast[op] = tabc_slow[op] = 0;
for (co = 0; co < 16; co++)
tab_cond[op][co] = 0;
}
}
#endif /* PROFILE_RFLAGS */
/* CALLED FROM GENERATED CODE: CLEAN HELPER */
/* Calculate all the 6 flags from the supplied thunk parameters.
Worker function, not directly called from generated code. */
static
ULong amd64g_calculate_rflags_all_WRK ( ULong cc_op,
ULong cc_dep1_formal,
ULong cc_dep2_formal,
ULong cc_ndep_formal )
{
switch (cc_op) {
case AMD64G_CC_OP_COPY:
return cc_dep1_formal
& (AMD64G_CC_MASK_O | AMD64G_CC_MASK_S | AMD64G_CC_MASK_Z
| AMD64G_CC_MASK_A | AMD64G_CC_MASK_C | AMD64G_CC_MASK_P);
case AMD64G_CC_OP_ADDB: ACTIONS_ADD( 8, UChar );
case AMD64G_CC_OP_ADDW: ACTIONS_ADD( 16, UShort );
case AMD64G_CC_OP_ADDL: ACTIONS_ADD( 32, UInt );
case AMD64G_CC_OP_ADDQ: ACTIONS_ADD( 64, ULong );
case AMD64G_CC_OP_ADCB: ACTIONS_ADC( 8, UChar );
case AMD64G_CC_OP_ADCW: ACTIONS_ADC( 16, UShort );
case AMD64G_CC_OP_ADCL: ACTIONS_ADC( 32, UInt );
case AMD64G_CC_OP_ADCQ: ACTIONS_ADC( 64, ULong );
case AMD64G_CC_OP_SUBB: ACTIONS_SUB( 8, UChar );
case AMD64G_CC_OP_SUBW: ACTIONS_SUB( 16, UShort );
case AMD64G_CC_OP_SUBL: ACTIONS_SUB( 32, UInt );
case AMD64G_CC_OP_SUBQ: ACTIONS_SUB( 64, ULong );
case AMD64G_CC_OP_SBBB: ACTIONS_SBB( 8, UChar );
case AMD64G_CC_OP_SBBW: ACTIONS_SBB( 16, UShort );
case AMD64G_CC_OP_SBBL: ACTIONS_SBB( 32, UInt );
case AMD64G_CC_OP_SBBQ: ACTIONS_SBB( 64, ULong );
case AMD64G_CC_OP_LOGICB: ACTIONS_LOGIC( 8, UChar );
case AMD64G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort );
case AMD64G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt );
case AMD64G_CC_OP_LOGICQ: ACTIONS_LOGIC( 64, ULong );
case AMD64G_CC_OP_INCB: ACTIONS_INC( 8, UChar );
case AMD64G_CC_OP_INCW: ACTIONS_INC( 16, UShort );
case AMD64G_CC_OP_INCL: ACTIONS_INC( 32, UInt );
case AMD64G_CC_OP_INCQ: ACTIONS_INC( 64, ULong );
case AMD64G_CC_OP_DECB: ACTIONS_DEC( 8, UChar );
case AMD64G_CC_OP_DECW: ACTIONS_DEC( 16, UShort );
case AMD64G_CC_OP_DECL: ACTIONS_DEC( 32, UInt );
case AMD64G_CC_OP_DECQ: ACTIONS_DEC( 64, ULong );
case AMD64G_CC_OP_SHLB: ACTIONS_SHL( 8, UChar );
case AMD64G_CC_OP_SHLW: ACTIONS_SHL( 16, UShort );
case AMD64G_CC_OP_SHLL: ACTIONS_SHL( 32, UInt );
case AMD64G_CC_OP_SHLQ: ACTIONS_SHL( 64, ULong );
case AMD64G_CC_OP_SHRB: ACTIONS_SHR( 8, UChar );
case AMD64G_CC_OP_SHRW: ACTIONS_SHR( 16, UShort );
case AMD64G_CC_OP_SHRL: ACTIONS_SHR( 32, UInt );
case AMD64G_CC_OP_SHRQ: ACTIONS_SHR( 64, ULong );
case AMD64G_CC_OP_ROLB: ACTIONS_ROL( 8, UChar );
case AMD64G_CC_OP_ROLW: ACTIONS_ROL( 16, UShort );
case AMD64G_CC_OP_ROLL: ACTIONS_ROL( 32, UInt );
case AMD64G_CC_OP_ROLQ: ACTIONS_ROL( 64, ULong );
case AMD64G_CC_OP_RORB: ACTIONS_ROR( 8, UChar );
case AMD64G_CC_OP_RORW: ACTIONS_ROR( 16, UShort );
case AMD64G_CC_OP_RORL: ACTIONS_ROR( 32, UInt );
case AMD64G_CC_OP_RORQ: ACTIONS_ROR( 64, ULong );
case AMD64G_CC_OP_UMULB: ACTIONS_UMUL( 8, UChar, toUChar,
UShort, toUShort );
case AMD64G_CC_OP_UMULW: ACTIONS_UMUL( 16, UShort, toUShort,
UInt, toUInt );
case AMD64G_CC_OP_UMULL: ACTIONS_UMUL( 32, UInt, toUInt,
ULong, idULong );
case AMD64G_CC_OP_UMULQ: ACTIONS_UMULQ;
case AMD64G_CC_OP_SMULB: ACTIONS_SMUL( 8, Char, toUChar,
Short, toUShort );
case AMD64G_CC_OP_SMULW: ACTIONS_SMUL( 16, Short, toUShort,
Int, toUInt );
case AMD64G_CC_OP_SMULL: ACTIONS_SMUL( 32, Int, toUInt,
Long, idULong );
case AMD64G_CC_OP_SMULQ: ACTIONS_SMULQ;
case AMD64G_CC_OP_ANDN32: ACTIONS_ANDN( 32, UInt );
case AMD64G_CC_OP_ANDN64: ACTIONS_ANDN( 64, ULong );
case AMD64G_CC_OP_BLSI32: ACTIONS_BLSI( 32, UInt );
case AMD64G_CC_OP_BLSI64: ACTIONS_BLSI( 64, ULong );
case AMD64G_CC_OP_BLSMSK32: ACTIONS_BLSMSK( 32, UInt );
case AMD64G_CC_OP_BLSMSK64: ACTIONS_BLSMSK( 64, ULong );
case AMD64G_CC_OP_BLSR32: ACTIONS_BLSR( 32, UInt );
case AMD64G_CC_OP_BLSR64: ACTIONS_BLSR( 64, ULong );
default:
/* shouldn't really make these calls from generated code */
vex_printf("amd64g_calculate_rflags_all_WRK(AMD64)"
"( %llu, 0x%llx, 0x%llx, 0x%llx )\n",
cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal );
vpanic("amd64g_calculate_rflags_all_WRK(AMD64)");
}
}
/* CALLED FROM GENERATED CODE: CLEAN HELPER */
/* Calculate all the 6 flags from the supplied thunk parameters. */
ULong amd64g_calculate_rflags_all ( ULong cc_op,
ULong cc_dep1,
ULong cc_dep2,
ULong cc_ndep )
{
# if PROFILE_RFLAGS
if (!initted) initCounts();
n_calc_all++;
if (SHOW_COUNTS_NOW) showCounts();
# endif
return
amd64g_calculate_rflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep );
}
/* CALLED FROM GENERATED CODE: CLEAN HELPER */
/* Calculate just the carry flag from the supplied thunk parameters. */
ULong amd64g_calculate_rflags_c ( ULong cc_op,
ULong cc_dep1,
ULong cc_dep2,
ULong cc_ndep )
{
# if PROFILE_RFLAGS
if (!initted) initCounts();
n_calc_c++;
tabc_fast[cc_op]++;
if (SHOW_COUNTS_NOW) showCounts();
# endif
/* Fast-case some common ones. */
switch (cc_op) {
case AMD64G_CC_OP_COPY:
return (cc_dep1 >> AMD64G_CC_SHIFT_C) & 1;
case AMD64G_CC_OP_LOGICQ:
case AMD64G_CC_OP_LOGICL:
case AMD64G_CC_OP_LOGICW:
case AMD64G_CC_OP_LOGICB:
return 0;
// case AMD64G_CC_OP_SUBL:
// return ((UInt)cc_dep1) < ((UInt)cc_dep2)
// ? AMD64G_CC_MASK_C : 0;
// case AMD64G_CC_OP_SUBW:
// return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF))
// ? AMD64G_CC_MASK_C : 0;
// case AMD64G_CC_OP_SUBB:
// return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF))
// ? AMD64G_CC_MASK_C : 0;
// case AMD64G_CC_OP_INCL:
// case AMD64G_CC_OP_DECL:
// return cc_ndep & AMD64G_CC_MASK_C;
default:
break;
}
# if PROFILE_RFLAGS
tabc_fast[cc_op]--;
tabc_slow[cc_op]++;
# endif
return amd64g_calculate_rflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep)
& AMD64G_CC_MASK_C;
}
/* CALLED FROM GENERATED CODE: CLEAN HELPER */
/* returns 1 or 0 */
ULong amd64g_calculate_condition ( ULong/*AMD64Condcode*/ cond,
ULong cc_op,
ULong cc_dep1,
ULong cc_dep2,
ULong cc_ndep )
{
ULong rflags = amd64g_calculate_rflags_all_WRK(cc_op, cc_dep1,
cc_dep2, cc_ndep);
ULong of,sf,zf,cf,pf;
ULong inv = cond & 1;
# if PROFILE_RFLAGS
if (!initted) initCounts();
tab_cond[cc_op][cond]++;
n_calc_cond++;
if (SHOW_COUNTS_NOW) showCounts();
# endif
switch (cond) {
case AMD64CondNO:
case AMD64CondO: /* OF == 1 */
of = rflags >> AMD64G_CC_SHIFT_O;
return 1 & (inv ^ of);
case AMD64CondNZ:
case AMD64CondZ: /* ZF == 1 */
zf = rflags >> AMD64G_CC_SHIFT_Z;
return 1 & (inv ^ zf);
case AMD64CondNB:
case AMD64CondB: /* CF == 1 */
cf = rflags >> AMD64G_CC_SHIFT_C;
return 1 & (inv ^ cf);
break;
case AMD64CondNBE:
case AMD64CondBE: /* (CF or ZF) == 1 */
cf = rflags >> AMD64G_CC_SHIFT_C;
zf = rflags >> AMD64G_CC_SHIFT_Z;
return 1 & (inv ^ (cf | zf));
break;
case AMD64CondNS:
case AMD64CondS: /* SF == 1 */
sf = rflags >> AMD64G_CC_SHIFT_S;
return 1 & (inv ^ sf);
case AMD64CondNP:
case AMD64CondP: /* PF == 1 */
pf = rflags >> AMD64G_CC_SHIFT_P;
return 1 & (inv ^ pf);
case AMD64CondNL:
case AMD64CondL: /* (SF xor OF) == 1 */
sf = rflags >> AMD64G_CC_SHIFT_S;
of = rflags >> AMD64G_CC_SHIFT_O;
return 1 & (inv ^ (sf ^ of));
break;
case AMD64CondNLE:
case AMD64CondLE: /* ((SF xor OF) or ZF) == 1 */
sf = rflags >> AMD64G_CC_SHIFT_S;
of = rflags >> AMD64G_CC_SHIFT_O;
zf = rflags >> AMD64G_CC_SHIFT_Z;
return 1 & (inv ^ ((sf ^ of) | zf));
break;
default:
/* shouldn't really make these calls from generated code */
vex_printf("amd64g_calculate_condition"
"( %llu, %llu, 0x%llx, 0x%llx, 0x%llx )\n",
cond, cc_op, cc_dep1, cc_dep2, cc_ndep );
vpanic("amd64g_calculate_condition");
}
}
/* VISIBLE TO LIBVEX CLIENT */
ULong LibVEX_GuestAMD64_get_rflags ( /*IN*/const VexGuestAMD64State* vex_state )
{
ULong rflags = amd64g_calculate_rflags_all_WRK(
vex_state->guest_CC_OP,
vex_state->guest_CC_DEP1,
vex_state->guest_CC_DEP2,
vex_state->guest_CC_NDEP
);
Long dflag = vex_state->guest_DFLAG;
vassert(dflag == 1 || dflag == -1);
if (dflag == -1)
rflags |= (1<<10);
if (vex_state->guest_IDFLAG == 1)
rflags |= (1<<21);
if (vex_state->guest_ACFLAG == 1)
rflags |= (1<<18);
return rflags;
}
/* VISIBLE TO LIBVEX CLIENT */
void
LibVEX_GuestAMD64_put_rflag_c ( ULong new_carry_flag,
/*MOD*/VexGuestAMD64State* vex_state )
{
ULong oszacp = amd64g_calculate_rflags_all_WRK(
vex_state->guest_CC_OP,
vex_state->guest_CC_DEP1,
vex_state->guest_CC_DEP2,
vex_state->guest_CC_NDEP
);
if (new_carry_flag & 1) {
oszacp |= AMD64G_CC_MASK_C;
} else {
oszacp &= ~AMD64G_CC_MASK_C;
}
vex_state->guest_CC_OP = AMD64G_CC_OP_COPY;
vex_state->guest_CC_DEP1 = oszacp;
vex_state->guest_CC_DEP2 = 0;
vex_state->guest_CC_NDEP = 0;
}
/*---------------------------------------------------------------*/
/*--- %rflags translation-time function specialisers. ---*/
/*--- These help iropt specialise calls the above run-time ---*/
/*--- %rflags functions. ---*/
/*---------------------------------------------------------------*/
/* Used by the optimiser to try specialisations. Returns an
equivalent expression, or NULL if none. */
static Bool isU64 ( IRExpr* e, ULong n )
{
return toBool( e->tag == Iex_Const
&& e->Iex.Const.con->tag == Ico_U64
&& e->Iex.Const.con->Ico.U64 == n );
}
IRExpr* guest_amd64_spechelper ( const HChar* function_name,
IRExpr** args,
IRStmt** precedingStmts,
Int n_precedingStmts )
{
# define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
# define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
# define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
# define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
# define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
Int i, arity = 0;
for (i = 0; args[i]; i++)
arity++;
# if 0
vex_printf("spec request:\n");
vex_printf(" %s ", function_name);
for (i = 0; i < arity; i++) {
vex_printf(" ");
ppIRExpr(args[i]);
}
vex_printf("\n");
# endif
/* --------- specialising "amd64g_calculate_condition" --------- */
if (vex_streq(function_name, "amd64g_calculate_condition")) {
/* specialise calls to above "calculate condition" function */
IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2;
vassert(arity == 5);
cond = args[0];
cc_op = args[1];
cc_dep1 = args[2];
cc_dep2 = args[3];
/*---------------- ADDQ ----------------*/
if (isU64(cc_op, AMD64G_CC_OP_ADDQ) && isU64(cond, AMD64CondZ)) {
/* long long add, then Z --> test (dst+src == 0) */
return unop(Iop_1Uto64,
binop(Iop_CmpEQ64,
binop(Iop_Add64, cc_dep1, cc_dep2),
mkU64(0)));
}
/*---------------- ADDL ----------------*/
if (isU64(cc_op, AMD64G_CC_OP_ADDL) && isU64(cond, AMD64CondO)) {
/* This is very commonly generated by Javascript JITs, for
the idiom "do a 32-bit add and jump to out-of-line code if
an overflow occurs". */
/* long add, then O (overflow)
--> ((dep1 ^ dep2 ^ -1) & (dep1 ^ (dep1 + dep2)))[31]
--> (((dep1 ^ dep2 ^ -1) & (dep1 ^ (dep1 +64 dep2))) >>u 31) & 1
--> (((not(dep1 ^ dep2)) & (dep1 ^ (dep1 +64 dep2))) >>u 31) & 1
*/
vassert(isIRAtom(cc_dep1));
vassert(isIRAtom(cc_dep2));
return
binop(Iop_And64,
binop(Iop_Shr64,
binop(Iop_And64,
unop(Iop_Not64,
binop(Iop_Xor64, cc_dep1, cc_dep2)),
binop(Iop_Xor64,
cc_dep1,
binop(Iop_Add64, cc_dep1, cc_dep2))),
mkU8(31)),
mkU64(1));
}
/*---------------- SUBQ ----------------*/
/* 0, */
if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondO)) {
/* long long sub/cmp, then O (overflow)
--> ((dep1 ^ dep2) & (dep1 ^ (dep1 - dep2)))[63]
--> ((dep1 ^ dep2) & (dep1 ^ (dep1 - dep2))) >>u 63
*/
vassert(isIRAtom(cc_dep1));
vassert(isIRAtom(cc_dep2));
return binop(Iop_Shr64,
binop(Iop_And64,
binop(Iop_Xor64, cc_dep1, cc_dep2),
binop(Iop_Xor64,
cc_dep1,
binop(Iop_Sub64, cc_dep1, cc_dep2))),
mkU8(64));
}
if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNO)) {
/* No action. Never yet found a test case. */
}
/* 2, 3 */
if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondB)) {
/* long long sub/cmp, then B (unsigned less than)
--> test dst <u src */
return unop(Iop_1Uto64,
binop(Iop_CmpLT64U, cc_dep1, cc_dep2));
}
if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNB)) {
/* long long sub/cmp, then NB (unsigned greater than or equal)
--> test src <=u dst */
/* Note, args are opposite way round from the usual */
return unop(Iop_1Uto64,
binop(Iop_CmpLE64U, cc_dep2, cc_dep1));
}
/* 4, 5 */
if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondZ)) {
/* long long sub/cmp, then Z --> test dst==src */
return unop(Iop_1Uto64,
binop(Iop_CmpEQ64,cc_dep1,cc_dep2));
}
if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNZ)) {
/* long long sub/cmp, then NZ --> test dst!=src */
return unop(Iop_1Uto64,
binop(Iop_CmpNE64,cc_dep1,cc_dep2));
}
/* 6, 7 */
if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondBE)) {
/* long long sub/cmp, then BE (unsigned less than or equal)
--> test dst <=u src */
return unop(Iop_1Uto64,
binop(Iop_CmpLE64U, cc_dep1, cc_dep2));
}
if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNBE)) {
/* long long sub/cmp, then NBE (unsigned greater than)
--> test !(dst <=u src) */
return binop(Iop_Xor64,
unop(Iop_1Uto64,
binop(Iop_CmpLE64U, cc_dep1, cc_dep2)),
mkU64(1));
}
/* 8, 9 */
if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondS)) {
/* long long sub/cmp, then S (negative)
--> (dst-src)[63]
--> (dst-src) >>u 63 */
return binop(Iop_Shr64,
binop(Iop_Sub64, cc_dep1, cc_dep2),
mkU8(63));
}
if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNS)) {
/* long long sub/cmp, then NS (not negative)
--> (dst-src)[63] ^ 1
--> ((dst-src) >>u 63) ^ 1 */
return binop(Iop_Xor64,
binop(Iop_Shr64,
binop(Iop_Sub64, cc_dep1, cc_dep2),
mkU8(63)),
mkU64(1));
}
/* 12, 13 */
if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondL)) {
/* long long sub/cmp, then L (signed less than)
--> test dst <s src */
return unop(Iop_1Uto64,
binop(Iop_CmpLT64S, cc_dep1, cc_dep2));
}
if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNL)) {
/* long long sub/cmp, then NL (signed greater than or equal)
--> test dst >=s src
--> test src <=s dst */
return unop(Iop_1Uto64,
binop(Iop_CmpLE64S, cc_dep2, cc_dep1));
}
/* 14, 15 */
if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondLE)) {
/* long long sub/cmp, then LE (signed less than or equal)
--> test dst <=s src */
return unop(Iop_1Uto64,
binop(Iop_CmpLE64S, cc_dep1, cc_dep2));
}
if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNLE)) {
/* long sub/cmp, then NLE (signed greater than)
--> test !(dst <=s src)
--> test (dst >s src)
--> test (src <s dst) */
return unop(Iop_1Uto64,
binop(Iop_CmpLT64S, cc_dep2, cc_dep1));
}
/*---------------- SUBL ----------------*/
/* 0, */
if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondO)) {
/* This is very commonly generated by Javascript JITs, for
the idiom "do a 32-bit subtract and jump to out-of-line
code if an overflow occurs". */
/* long sub/cmp, then O (overflow)
--> ((dep1 ^ dep2) & (dep1 ^ (dep1 - dep2)))[31]
--> (((dep1 ^ dep2) & (dep1 ^ (dep1 -64 dep2))) >>u 31) & 1
*/
vassert(isIRAtom(cc_dep1));
vassert(isIRAtom(cc_dep2));
return
binop(Iop_And64,
binop(Iop_Shr64,
binop(Iop_And64,
binop(Iop_Xor64, cc_dep1, cc_dep2),
binop(Iop_Xor64,
cc_dep1,
binop(Iop_Sub64, cc_dep1, cc_dep2))),
mkU8(31)),
mkU64(1));
}
if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNO)) {
/* No action. Never yet found a test case. */
}
/* 2, 3 */
if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondB)) {
/* long sub/cmp, then B (unsigned less than)
--> test dst <u src */
return unop(Iop_1Uto64,
binop(Iop_CmpLT32U,
unop(Iop_64to32, cc_dep1),
unop(Iop_64to32, cc_dep2)));
}
if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNB)) {
/* long sub/cmp, then NB (unsigned greater than or equal)
--> test src <=u dst */
/* Note, args are opposite way round from the usual */
return unop(Iop_1Uto64,
binop(Iop_CmpLE32U,
unop(Iop_64to32, cc_dep2),
unop(Iop_64to32, cc_dep1)));
}
/* 4, 5 */
if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondZ)) {
/* long sub/cmp, then Z --> test dst==src */
return unop(Iop_1Uto64,
binop(Iop_CmpEQ32,
unop(Iop_64to32, cc_dep1),
unop(Iop_64to32, cc_dep2)));
}
if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNZ)) {
/* long sub/cmp, then NZ --> test dst!=src */
return unop(Iop_1Uto64,
binop(Iop_CmpNE32,
unop(Iop_64to32, cc_dep1),
unop(Iop_64to32, cc_dep2)));
}
/* 6, 7 */
if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondBE)) {
/* long sub/cmp, then BE (unsigned less than or equal)
--> test dst <=u src */
return unop(Iop_1Uto64,
binop(Iop_CmpLE32U,
unop(Iop_64to32, cc_dep1),
unop(Iop_64to32, cc_dep2)));
}
if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNBE)) {
/* long sub/cmp, then NBE (unsigned greater than)
--> test src <u dst */
/* Note, args are opposite way round from the usual */
return unop(Iop_1Uto64,
binop(Iop_CmpLT32U,
unop(Iop_64to32, cc_dep2),
unop(Iop_64to32, cc_dep1)));
}
/* 8, 9 */
if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondS)) {
/* long sub/cmp, then S (negative)
--> (dst-src)[31]
--> ((dst -64 src) >>u 31) & 1
Pointless to narrow the args to 32 bit before the subtract. */
return binop(Iop_And64,
binop(Iop_Shr64,
binop(Iop_Sub64, cc_dep1, cc_dep2),
mkU8(31)),
mkU64(1));
}
if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNS)) {
/* long sub/cmp, then NS (not negative)
--> (dst-src)[31] ^ 1
--> (((dst -64 src) >>u 31) & 1) ^ 1
Pointless to narrow the args to 32 bit before the subtract. */
return binop(Iop_Xor64,
binop(Iop_And64,
binop(Iop_Shr64,
binop(Iop_Sub64, cc_dep1, cc_dep2),
mkU8(31)),
mkU64(1)),
mkU64(1));
}
/* 12, 13 */
if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondL)) {
/* long sub/cmp, then L (signed less than)
--> test dst <s src */
return unop(Iop_1Uto64,
binop(Iop_CmpLT32S,
unop(Iop_64to32, cc_dep1),
unop(Iop_64to32, cc_dep2)));
}
if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNL)) {
/* long sub/cmp, then NL (signed greater than or equal)
--> test dst >=s src
--> test src <=s dst */
return unop(Iop_1Uto64,
binop(Iop_CmpLE32S,
unop(Iop_64to32, cc_dep2),
unop(Iop_64to32, cc_dep1)));
}
/* 14, 15 */
if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondLE)) {
/* long sub/cmp, then LE (signed less than or equal)
--> test dst <=s src */
return unop(Iop_1Uto64,
binop(Iop_CmpLE32S,
unop(Iop_64to32, cc_dep1),
unop(Iop_64to32, cc_dep2)));
}
if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNLE)) {
/* long sub/cmp, then NLE (signed greater than)
--> test !(dst <=s src)
--> test (dst >s src)
--> test (src <s dst) */
return unop(Iop_1Uto64,
binop(Iop_CmpLT32S,
unop(Iop_64to32, cc_dep2),
unop(Iop_64to32, cc_dep1)));
}
/*---------------- SUBW ----------------*/
/* 4, 5 */
if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondZ)) {
/* word sub/cmp, then Z --> test dst==src */
return unop(Iop_1Uto64,
binop(Iop_CmpEQ16,
unop(Iop_64to16,cc_dep1),
unop(Iop_64to16,cc_dep2)));
}
if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondNZ)) {
/* word sub/cmp, then NZ --> test dst!=src */
return unop(Iop_1Uto64,
binop(Iop_CmpNE16,
unop(Iop_64to16,cc_dep1),
unop(Iop_64to16,cc_dep2)));
}
/* 6, */
if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondBE)) {
/* word sub/cmp, then BE (unsigned less than or equal)
--> test dst <=u src */
return unop(Iop_1Uto64,
binop(Iop_CmpLE64U,
binop(Iop_Shl64, cc_dep1, mkU8(48)),
binop(Iop_Shl64, cc_dep2, mkU8(48))));
}
/* 14, */
if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondLE)) {
/* word sub/cmp, then LE (signed less than or equal)
--> test dst <=s src */
return unop(Iop_1Uto64,
binop(Iop_CmpLE64S,
binop(Iop_Shl64,cc_dep1,mkU8(48)),
binop(Iop_Shl64,cc_dep2,mkU8(48))));
}
/*---------------- SUBB ----------------*/
/* 2, 3 */
if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondB)) {
/* byte sub/cmp, then B (unsigned less than)
--> test dst <u src */
return unop(Iop_1Uto64,
binop(Iop_CmpLT64U,
binop(Iop_And64, cc_dep1, mkU64(0xFF)),
binop(Iop_And64, cc_dep2, mkU64(0xFF))));
}
if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNB)) {
/* byte sub/cmp, then NB (unsigned greater than or equal)
--> test src <=u dst */
/* Note, args are opposite way round from the usual */
return unop(Iop_1Uto64,
binop(Iop_CmpLE64U,
binop(Iop_And64, cc_dep2, mkU64(0xFF)),
binop(Iop_And64, cc_dep1, mkU64(0xFF))));
}
/* 4, 5 */
if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondZ)) {
/* byte sub/cmp, then Z --> test dst==src */
return unop(Iop_1Uto64,
binop(Iop_CmpEQ8,
unop(Iop_64to8,cc_dep1),
unop(Iop_64to8,cc_dep2)));
}
if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNZ)) {
/* byte sub/cmp, then NZ --> test dst!=src */
return unop(Iop_1Uto64,
binop(Iop_CmpNE8,
unop(Iop_64to8,cc_dep1),
unop(Iop_64to8,cc_dep2)));
}
/* 6, */
if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondBE)) {
/* byte sub/cmp, then BE (unsigned less than or equal)
--> test dst <=u src */
return unop(Iop_1Uto64,
binop(Iop_CmpLE64U,
binop(Iop_And64, cc_dep1, mkU64(0xFF)),
binop(Iop_And64, cc_dep2, mkU64(0xFF))));
}
/* 8, 9 */
if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondS)
&& isU64(cc_dep2, 0)) {
/* byte sub/cmp of zero, then S --> test (dst-0 <s 0)
--> test dst <s 0
--> (ULong)dst[7]
This is yet another scheme by which gcc figures out if the
top bit of a byte is 1 or 0. See also LOGICB/CondS below. */
/* Note: isU64(cc_dep2, 0) is correct, even though this is
for an 8-bit comparison, since the args to the helper
function are always U64s. */
return binop(Iop_And64,
binop(Iop_Shr64,cc_dep1,mkU8(7)),
mkU64(1));
}
if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNS)
&& isU64(cc_dep2, 0)) {
/* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0)
--> test !(dst <s 0)
--> (ULong) !dst[7]
*/
return binop(Iop_Xor64,
binop(Iop_And64,
binop(Iop_Shr64,cc_dep1,mkU8(7)),
mkU64(1)),
mkU64(1));
}
/*---------------- LOGICQ ----------------*/
if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondZ)) {
/* long long and/or/xor, then Z --> test dst==0 */
return unop(Iop_1Uto64,
binop(Iop_CmpEQ64, cc_dep1, mkU64(0)));
}
if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondNZ)) {
/* long long and/or/xor, then NZ --> test dst!=0 */
return unop(Iop_1Uto64,
binop(Iop_CmpNE64, cc_dep1, mkU64(0)));
}
if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondL)) {
/* long long and/or/xor, then L
LOGIC sets SF and ZF according to the
result and makes OF be zero. L computes SF ^ OF, but
OF is zero, so this reduces to SF -- which will be 1 iff
the result is < signed 0. Hence ...
*/
return unop(Iop_1Uto64,
binop(Iop_CmpLT64S,
cc_dep1,
mkU64(0)));
}
/*---------------- LOGICL ----------------*/
if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondZ)) {
/* long and/or/xor, then Z --> test dst==0 */
return unop(Iop_1Uto64,
binop(Iop_CmpEQ32,
unop(Iop_64to32, cc_dep1),
mkU32(0)));
}
if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondNZ)) {
/* long and/or/xor, then NZ --> test dst!=0 */
return unop(Iop_1Uto64,
binop(Iop_CmpNE32,
unop(Iop_64to32, cc_dep1),
mkU32(0)));
}
if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondLE)) {
/* long and/or/xor, then LE
This is pretty subtle. LOGIC sets SF and ZF according to the
result and makes OF be zero. LE computes (SF ^ OF) | ZF, but
OF is zero, so this reduces to SF | ZF -- which will be 1 iff
the result is <=signed 0. Hence ...
*/
return unop(Iop_1Uto64,
binop(Iop_CmpLE32S,
unop(Iop_64to32, cc_dep1),
mkU32(0)));
}
if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondS)) {
/* long and/or/xor, then S --> (ULong)result[31] */
return binop(Iop_And64,
binop(Iop_Shr64, cc_dep1, mkU8(31)),
mkU64(1));
}
if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondNS)) {
/* long and/or/xor, then S --> (ULong) ~ result[31] */
return binop(Iop_Xor64,
binop(Iop_And64,
binop(Iop_Shr64, cc_dep1, mkU8(31)),
mkU64(1)),
mkU64(1));
}
/*---------------- LOGICW ----------------*/
if (isU64(cc_op, AMD64G_CC_OP_LOGICW) && isU64(cond, AMD64CondZ)) {
/* word and/or/xor, then Z --> test dst==0 */
return unop(Iop_1Uto64,
binop(Iop_CmpEQ64,
binop(Iop_And64, cc_dep1, mkU64(0xFFFF)),
mkU64(0)));
}
if (isU64(cc_op, AMD64G_CC_OP_LOGICW) && isU64(cond, AMD64CondNZ)) {
/* word and/or/xor, then NZ --> test dst!=0 */
return unop(Iop_1Uto64,
binop(Iop_CmpNE64,
binop(Iop_And64, cc_dep1, mkU64(0xFFFF)),
mkU64(0)));
}
/*---------------- LOGICB ----------------*/
if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondZ)) {
/* byte and/or/xor, then Z --> test dst==0 */
return unop(Iop_1Uto64,
binop(Iop_CmpEQ64, binop(Iop_And64,cc_dep1,mkU64(255)),
mkU64(0)));
}
if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondNZ)) {
/* byte and/or/xor, then NZ --> test dst!=0 */
return unop(Iop_1Uto64,
binop(Iop_CmpNE64, binop(Iop_And64,cc_dep1,mkU64(255)),
mkU64(0)));
}
if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondS)) {
/* this is an idiom gcc sometimes uses to find out if the top
bit of a byte register is set: eg testb %al,%al; js ..
Since it just depends on the top bit of the byte, extract
that bit and explicitly get rid of all the rest. This
helps memcheck avoid false positives in the case where any
of the other bits in the byte are undefined. */
/* byte and/or/xor, then S --> (UInt)result[7] */
return binop(Iop_And64,
binop(Iop_Shr64,cc_dep1,mkU8(7)),
mkU64(1));
}
if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondNS)) {
/* byte and/or/xor, then NS --> (UInt)!result[7] */
return binop(Iop_Xor64,
binop(Iop_And64,
binop(Iop_Shr64,cc_dep1,mkU8(7)),
mkU64(1)),
mkU64(1));
}
/*---------------- INCB ----------------*/
if (isU64(cc_op, AMD64G_CC_OP_INCB) && isU64(cond, AMD64CondLE)) {
/* 8-bit inc, then LE --> sign bit of the arg */
return binop(Iop_And64,
binop(Iop_Shr64,
binop(Iop_Sub64, cc_dep1, mkU64(1)),
mkU8(7)),
mkU64(1));
}
/*---------------- INCW ----------------*/
if (isU64(cc_op, AMD64G_CC_OP_INCW) && isU64(cond, AMD64CondZ)) {
/* 16-bit inc, then Z --> test dst == 0 */
return unop(Iop_1Uto64,
binop(Iop_CmpEQ64,
binop(Iop_Shl64,cc_dep1,mkU8(48)),
mkU64(0)));
}
/*---------------- DECL ----------------*/
if (isU64(cc_op, AMD64G_CC_OP_DECL) && isU64(cond, AMD64CondZ)) {
/* dec L, then Z --> test dst == 0 */
return unop(Iop_1Uto64,
binop(Iop_CmpEQ32,
unop(Iop_64to32, cc_dep1),
mkU32(0)));
}
/*---------------- DECW ----------------*/
if (isU64(cc_op, AMD64G_CC_OP_DECW) && isU64(cond, AMD64CondNZ)) {
/* 16-bit dec, then NZ --> test dst != 0 */
return unop(Iop_1Uto64,
binop(Iop_CmpNE64,
binop(Iop_Shl64,cc_dep1,mkU8(48)),
mkU64(0)));
}
/*---------------- COPY ----------------*/
/* This can happen, as a result of amd64 FP compares: "comisd ... ;
jbe" for example. */
if (isU64(cc_op, AMD64G_CC_OP_COPY) &&
(isU64(cond, AMD64CondBE) || isU64(cond, AMD64CondNBE))) {
/* COPY, then BE --> extract C and Z from dep1, and test (C
or Z == 1). */
/* COPY, then NBE --> extract C and Z from dep1, and test (C
or Z == 0). */
ULong nnn = isU64(cond, AMD64CondBE) ? 1 : 0;
return
unop(
Iop_1Uto64,
binop(
Iop_CmpEQ64,
binop(
Iop_And64,
binop(
Iop_Or64,
binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)),
binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z))
),
mkU64(1)
),
mkU64(nnn)
)
);
}
if (isU64(cc_op, AMD64G_CC_OP_COPY) && isU64(cond, AMD64CondB)) {
/* COPY, then B --> extract C dep1, and test (C == 1). */
return
unop(
Iop_1Uto64,
binop(
Iop_CmpNE64,
binop(
Iop_And64,
binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)),
mkU64(1)
),
mkU64(0)
)
);
}
if (isU64(cc_op, AMD64G_CC_OP_COPY)
&& (isU64(cond, AMD64CondZ) || isU64(cond, AMD64CondNZ))) {
/* COPY, then Z --> extract Z from dep1, and test (Z == 1). */
/* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */
UInt nnn = isU64(cond, AMD64CondZ) ? 1 : 0;
return
unop(
Iop_1Uto64,
binop(
Iop_CmpEQ64,
binop(
Iop_And64,
binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z)),
mkU64(1)
),
mkU64(nnn)
)
);
}
if (isU64(cc_op, AMD64G_CC_OP_COPY) && isU64(cond, AMD64CondP)) {
/* COPY, then P --> extract P from dep1, and test (P == 1). */
return
unop(
Iop_1Uto64,
binop(
Iop_CmpNE64,
binop(
Iop_And64,
binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_P)),
mkU64(1)
),
mkU64(0)
)
);
}
return NULL;
}
/* --------- specialising "amd64g_calculate_rflags_c" --------- */
if (vex_streq(function_name, "amd64g_calculate_rflags_c")) {
/* specialise calls to above "calculate_rflags_c" function */
IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
vassert(arity == 4);
cc_op = args[0];
cc_dep1 = args[1];
cc_dep2 = args[2];
cc_ndep = args[3];
if (isU64(cc_op, AMD64G_CC_OP_SUBQ)) {
/* C after sub denotes unsigned less than */
return unop(Iop_1Uto64,
binop(Iop_CmpLT64U,
cc_dep1,
cc_dep2));
}
if (isU64(cc_op, AMD64G_CC_OP_SUBL)) {
/* C after sub denotes unsigned less than */
return unop(Iop_1Uto64,
binop(Iop_CmpLT32U,
unop(Iop_64to32, cc_dep1),
unop(Iop_64to32, cc_dep2)));
}
if (isU64(cc_op, AMD64G_CC_OP_SUBB)) {
/* C after sub denotes unsigned less than */
return unop(Iop_1Uto64,
binop(Iop_CmpLT64U,
binop(Iop_And64,cc_dep1,mkU64(0xFF)),
binop(Iop_And64,cc_dep2,mkU64(0xFF))));
}
if (isU64(cc_op, AMD64G_CC_OP_LOGICQ)
|| isU64(cc_op, AMD64G_CC_OP_LOGICL)
|| isU64(cc_op, AMD64G_CC_OP_LOGICW)
|| isU64(cc_op, AMD64G_CC_OP_LOGICB)) {
/* cflag after logic is zero */
return mkU64(0);
}
if (isU64(cc_op, AMD64G_CC_OP_DECL) || isU64(cc_op, AMD64G_CC_OP_INCL)
|| isU64(cc_op, AMD64G_CC_OP_DECQ) || isU64(cc_op, AMD64G_CC_OP_INCQ)) {
/* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */
return cc_ndep;
}
# if 0
if (cc_op->tag == Iex_Const) {
vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n");
}
# endif
return NULL;
}
# undef unop
# undef binop
# undef mkU64
# undef mkU32
# undef mkU8
return NULL;
}
/*---------------------------------------------------------------*/
/*--- Supporting functions for x87 FPU activities. ---*/
/*---------------------------------------------------------------*/
static inline Bool host_is_little_endian ( void )
{
UInt x = 0x76543210;
UChar* p = (UChar*)(&x);
return toBool(*p == 0x10);
}
/* Inspect a value and its tag, as per the x87 'FXAM' instruction. */
/* CALLED FROM GENERATED CODE: CLEAN HELPER */
ULong amd64g_calculate_FXAM ( ULong tag, ULong dbl )
{
Bool mantissaIsZero;
Int bexp;
UChar sign;
UChar* f64;
vassert(host_is_little_endian());
/* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */
f64 = (UChar*)(&dbl);
sign = toUChar( (f64[7] >> 7) & 1 );
/* First off, if the tag indicates the register was empty,
return 1,0,sign,1 */
if (tag == 0) {
/* vex_printf("Empty\n"); */
return AMD64G_FC_MASK_C3 | 0 | (sign << AMD64G_FC_SHIFT_C1)
| AMD64G_FC_MASK_C0;
}
bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
bexp &= 0x7FF;
mantissaIsZero
= toBool(
(f64[6] & 0x0F) == 0
&& (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0
);
/* If both exponent and mantissa are zero, the value is zero.
Return 1,0,sign,0. */
if (bexp == 0 && mantissaIsZero) {
/* vex_printf("Zero\n"); */
return AMD64G_FC_MASK_C3 | 0
| (sign << AMD64G_FC_SHIFT_C1) | 0;
}
/* If exponent is zero but mantissa isn't, it's a denormal.
Return 1,1,sign,0. */
if (bexp == 0 && !mantissaIsZero) {
/* vex_printf("Denormal\n"); */
return AMD64G_FC_MASK_C3 | AMD64G_FC_MASK_C2
| (sign << AMD64G_FC_SHIFT_C1) | 0;
}
/* If the exponent is 7FF and the mantissa is zero, this is an infinity.
Return 0,1,sign,1. */
if (bexp == 0x7FF && mantissaIsZero) {
/* vex_printf("Inf\n"); */
return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1)
| AMD64G_FC_MASK_C0;
}
/* If the exponent is 7FF and the mantissa isn't zero, this is a NaN.
Return 0,0,sign,1. */
if (bexp == 0x7FF && !mantissaIsZero) {
/* vex_printf("NaN\n"); */
return 0 | 0 | (sign << AMD64G_FC_SHIFT_C1) | AMD64G_FC_MASK_C0;
}
/* Uh, ok, we give up. It must be a normal finite number.
Return 0,1,sign,0.
*/
/* vex_printf("normal\n"); */
return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1) | 0;
}
/* This is used to implement both 'frstor' and 'fldenv'. The latter
appears to differ from the former only in that the 8 FP registers
themselves are not transferred into the guest state. */
static
VexEmNote do_put_x87 ( Bool moveRegs,
/*IN*/UChar* x87_state,
/*OUT*/VexGuestAMD64State* vex_state )
{
Int stno, preg;
UInt tag;
ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
Fpu_State* x87 = (Fpu_State*)x87_state;
UInt ftop = (x87->env[FP_ENV_STAT] >> 11) & 7;
UInt tagw = x87->env[FP_ENV_TAG];
UInt fpucw = x87->env[FP_ENV_CTRL];
UInt c3210 = x87->env[FP_ENV_STAT] & 0x4700;
VexEmNote ew;
UInt fpround;
ULong pair;
/* Copy registers and tags */
for (stno = 0; stno < 8; stno++) {
preg = (stno + ftop) & 7;
tag = (tagw >> (2*preg)) & 3;
if (tag == 3) {
/* register is empty */
/* hmm, if it's empty, does it still get written? Probably
safer to say it does. If we don't, memcheck could get out
of sync, in that it thinks all FP registers are defined by
this helper, but in reality some have not been updated. */
if (moveRegs)
vexRegs[preg] = 0; /* IEEE754 64-bit zero */
vexTags[preg] = 0;
} else {
/* register is non-empty */
if (moveRegs)
convert_f80le_to_f64le( &x87->reg[10*stno],
(UChar*)&vexRegs[preg] );
vexTags[preg] = 1;
}
}
/* stack pointer */
vex_state->guest_FTOP = ftop;
/* status word */
vex_state->guest_FC3210 = c3210;
/* handle the control word, setting FPROUND and detecting any
emulation warnings. */
pair = amd64g_check_fldcw ( (ULong)fpucw );
fpround = (UInt)pair & 0xFFFFFFFFULL;
ew = (VexEmNote)(pair >> 32);
vex_state->guest_FPROUND = fpround & 3;
/* emulation warnings --> caller */
return ew;
}
/* Create an x87 FPU state from the guest state, as close as
we can approximate it. */
static
void do_get_x87 ( /*IN*/VexGuestAMD64State* vex_state,
/*OUT*/UChar* x87_state )
{
Int i, stno, preg;
UInt tagw;
ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
Fpu_State* x87 = (Fpu_State*)x87_state;
UInt ftop = vex_state->guest_FTOP;
UInt c3210 = vex_state->guest_FC3210;
for (i = 0; i < 14; i++)
x87->env[i] = 0;
x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF;
x87->env[FP_ENV_STAT]
= toUShort(((ftop & 7) << 11) | (c3210 & 0x4700));
x87->env[FP_ENV_CTRL]
= toUShort(amd64g_create_fpucw( vex_state->guest_FPROUND ));
/* Dump the register stack in ST order. */
tagw = 0;
for (stno = 0; stno < 8; stno++) {
preg = (stno + ftop) & 7;
if (vexTags[preg] == 0) {
/* register is empty */
tagw |= (3 << (2*preg));
convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
&x87->reg[10*stno] );
} else {
/* register is full. */
tagw |= (0 << (2*preg));
convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
&x87->reg[10*stno] );
}
}
x87->env[FP_ENV_TAG] = toUShort(tagw);
}
/* CALLED FROM GENERATED CODE */
/* DIRTY HELPER (reads guest state, writes guest mem) */
/* NOTE: only handles 32-bit format (no REX.W on the insn) */
void amd64g_dirtyhelper_FXSAVE_ALL_EXCEPT_XMM ( VexGuestAMD64State* gst,
HWord addr )
{
/* Derived from values obtained from
vendor_id : AuthenticAMD
cpu family : 15
model : 12
model name : AMD Athlon(tm) 64 Processor 3200+
stepping : 0
cpu MHz : 2200.000
cache size : 512 KB
*/
/* Somewhat roundabout, but at least it's simple. */
Fpu_State tmp;
UShort* addrS = (UShort*)addr;
UChar* addrC = (UChar*)addr;
UInt mxcsr;
UShort fp_tags;
UInt summary_tags;
Int r, stno;
UShort *srcS, *dstS;
do_get_x87( gst, (UChar*)&tmp );
mxcsr = amd64g_create_mxcsr( gst->guest_SSEROUND );
/* Now build the proper fxsave image from the x87 image we just
made. */
addrS[0] = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */
addrS[1] = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */
/* set addrS[2] in an endian-independent way */
summary_tags = 0;
fp_tags = tmp.env[FP_ENV_TAG];
for (r = 0; r < 8; r++) {
if ( ((fp_tags >> (2*r)) & 3) != 3 )
summary_tags |= (1 << r);
}
addrC[4] = toUChar(summary_tags); /* FTW: tag summary byte */
addrC[5] = 0; /* pad */
/* FOP: faulting fpu opcode. From experimentation, the real CPU
does not write this field. (?!) */
addrS[3] = 0; /* BOGUS */
/* RIP (Last x87 instruction pointer). From experimentation, the
real CPU does not write this field. (?!) */
addrS[4] = 0; /* BOGUS */
addrS[5] = 0; /* BOGUS */
addrS[6] = 0; /* BOGUS */
addrS[7] = 0; /* BOGUS */
/* RDP (Last x87 data pointer). From experimentation, the real CPU
does not write this field. (?!) */
addrS[8] = 0; /* BOGUS */
addrS[9] = 0; /* BOGUS */
addrS[10] = 0; /* BOGUS */
addrS[11] = 0; /* BOGUS */
addrS[12] = toUShort(mxcsr); /* MXCSR */
addrS[13] = toUShort(mxcsr >> 16);
addrS[14] = 0xFFFF; /* MXCSR mask (lo16) */
addrS[15] = 0x0000; /* MXCSR mask (hi16) */
/* Copy in the FP registers, in ST order. */
for (stno = 0; stno < 8; stno++) {
srcS = (UShort*)(&tmp.reg[10*stno]);
dstS = (UShort*)(&addrS[16 + 8*stno]);
dstS[0] = srcS[0];
dstS[1] = srcS[1];
dstS[2] = srcS[2];
dstS[3] = srcS[3];
dstS[4] = srcS[4];
dstS[5] = 0;
dstS[6] = 0;
dstS[7] = 0;
}
/* That's the first 160 bytes of the image done. Now only %xmm0
.. %xmm15 remain to be copied, and we let the generated IR do
that, so as to make Memcheck's definedness flow for the non-XMM
parts independant from that of the all the other control and
status words in the structure. This avoids the false positives
shown in #291310. */
}
/* CALLED FROM GENERATED CODE */
/* DIRTY HELPER (writes guest state, reads guest mem) */
VexEmNote amd64g_dirtyhelper_FXRSTOR_ALL_EXCEPT_XMM ( VexGuestAMD64State* gst,
HWord addr )
{
Fpu_State tmp;
VexEmNote warnX87 = EmNote_NONE;
VexEmNote warnXMM = EmNote_NONE;
UShort* addrS = (UShort*)addr;
UChar* addrC = (UChar*)addr;
UShort fp_tags;
Int r, stno, i;
/* Don't restore %xmm0 .. %xmm15, for the same reasons that
amd64g_dirtyhelper_FXSAVE_ALL_EXCEPT_XMM doesn't save them. See
comment in that function for details. */
/* Copy the x87 registers out of the image, into a temporary
Fpu_State struct. */
for (i = 0; i < 14; i++) tmp.env[i] = 0;
for (i = 0; i < 80; i++) tmp.reg[i] = 0;
/* fill in tmp.reg[0..7] */
for (stno = 0; stno < 8; stno++) {
UShort* dstS = (UShort*)(&tmp.reg[10*stno]);
UShort* srcS = (UShort*)(&addrS[16 + 8*stno]);
dstS[0] = srcS[0];
dstS[1] = srcS[1];
dstS[2] = srcS[2];
dstS[3] = srcS[3];
dstS[4] = srcS[4];
}
/* fill in tmp.env[0..13] */
tmp.env[FP_ENV_CTRL] = addrS[0]; /* FCW: fpu control word */
tmp.env[FP_ENV_STAT] = addrS[1]; /* FCW: fpu status word */
fp_tags = 0;
for (r = 0; r < 8; r++) {
if (addrC[4] & (1<<r))
fp_tags |= (0 << (2*r)); /* EMPTY */
else
fp_tags |= (3 << (2*r)); /* VALID -- not really precise enough. */
}
tmp.env[FP_ENV_TAG] = fp_tags;
/* Now write 'tmp' into the guest state. */
warnX87 = do_put_x87( True/*moveRegs*/, (UChar*)&tmp, gst );
{ UInt w32 = (((UInt)addrS[12]) & 0xFFFF)
| ((((UInt)addrS[13]) & 0xFFFF) << 16);
ULong w64 = amd64g_check_ldmxcsr( (ULong)w32 );
warnXMM = (VexEmNote)(w64 >> 32);
gst->guest_SSEROUND = w64 & 0xFFFFFFFFULL;
}
/* Prefer an X87 emwarn over an XMM one, if both exist. */
if (warnX87 != EmNote_NONE)
return warnX87;
else
return warnXMM;
}
/* DIRTY HELPER (writes guest state) */
/* Initialise the x87 FPU state as per 'finit'. */
void amd64g_dirtyhelper_FINIT ( VexGuestAMD64State* gst )
{
Int i;
gst->guest_FTOP = 0;
for (i = 0; i < 8; i++) {
gst->guest_FPTAG[i] = 0; /* empty */
gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */
}
gst->guest_FPROUND = (ULong)Irrm_NEAREST;
gst->guest_FC3210 = 0;
}
/* CALLED FROM GENERATED CODE */
/* DIRTY HELPER (reads guest memory) */
ULong amd64g_dirtyhelper_loadF80le ( Addr addrU )
{
ULong f64;
convert_f80le_to_f64le ( (UChar*)addrU, (UChar*)&f64 );
return f64;
}
/* CALLED FROM GENERATED CODE */
/* DIRTY HELPER (writes guest memory) */
void amd64g_dirtyhelper_storeF80le ( Addr addrU, ULong f64 )
{
convert_f64le_to_f80le( (UChar*)&f64, (UChar*)addrU );
}
/* CALLED FROM GENERATED CODE */
/* CLEAN HELPER */
/* mxcsr[15:0] contains a SSE native format MXCSR value.
Extract from it the required SSEROUND value and any resulting
emulation warning, and return (warn << 32) | sseround value.
*/
ULong amd64g_check_ldmxcsr ( ULong mxcsr )
{
/* Decide on a rounding mode. mxcsr[14:13] holds it. */
/* NOTE, encoded exactly as per enum IRRoundingMode. */
ULong rmode = (mxcsr >> 13) & 3;
/* Detect any required emulation warnings. */
VexEmNote ew = EmNote_NONE;
if ((mxcsr & 0x1F80) != 0x1F80) {
/* unmasked exceptions! */
ew = EmWarn_X86_sseExns;
}
else
if (mxcsr & (1<<15)) {
/* FZ is set */
ew = EmWarn_X86_fz;
}
else
if (mxcsr & (1<<6)) {
/* DAZ is set */
ew = EmWarn_X86_daz;
}
return (((ULong)ew) << 32) | ((ULong)rmode);
}
/* CALLED FROM GENERATED CODE */
/* CLEAN HELPER */
/* Given sseround as an IRRoundingMode value, create a suitable SSE
native format MXCSR value. */
ULong amd64g_create_mxcsr ( ULong sseround )
{
sseround &= 3;
return 0x1F80 | (sseround << 13);
}
/* CLEAN HELPER */
/* fpucw[15:0] contains a x87 native format FPU control word.
Extract from it the required FPROUND value and any resulting
emulation warning, and return (warn << 32) | fpround value.
*/
ULong amd64g_check_fldcw ( ULong fpucw )
{
/* Decide on a rounding mode. fpucw[11:10] holds it. */
/* NOTE, encoded exactly as per enum IRRoundingMode. */
ULong rmode = (fpucw >> 10) & 3;
/* Detect any required emulation warnings. */
VexEmNote ew = EmNote_NONE;
if ((fpucw & 0x3F) != 0x3F) {
/* unmasked exceptions! */
ew = EmWarn_X86_x87exns;
}
else
if (((fpucw >> 8) & 3) != 3) {
/* unsupported precision */
ew = EmWarn_X86_x87precision;
}
return (((ULong)ew) << 32) | ((ULong)rmode);
}
/* CLEAN HELPER */
/* Given fpround as an IRRoundingMode value, create a suitable x87
native format FPU control word. */
ULong amd64g_create_fpucw ( ULong fpround )
{
fpround &= 3;
return 0x037F | (fpround << 10);
}
/* This is used to implement 'fldenv'.
Reads 28 bytes at x87_state[0 .. 27]. */
/* CALLED FROM GENERATED CODE */
/* DIRTY HELPER */
VexEmNote amd64g_dirtyhelper_FLDENV ( /*OUT*/VexGuestAMD64State* vex_state,
/*IN*/HWord x87_state)
{
return do_put_x87( False, (UChar*)x87_state, vex_state );
}
/* CALLED FROM GENERATED CODE */
/* DIRTY HELPER */
/* Create an x87 FPU env from the guest state, as close as we can
approximate it. Writes 28 bytes at x87_state[0..27]. */
void amd64g_dirtyhelper_FSTENV ( /*IN*/VexGuestAMD64State* vex_state,
/*OUT*/HWord x87_state )
{
Int i, stno, preg;
UInt tagw;
UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
Fpu_State* x87 = (Fpu_State*)x87_state;
UInt ftop = vex_state->guest_FTOP;
ULong c3210 = vex_state->guest_FC3210;
for (i = 0; i < 14; i++)
x87->env[i] = 0;
x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF;
x87->env[FP_ENV_STAT]
= toUShort(toUInt( ((ftop & 7) << 11) | (c3210 & 0x4700) ));
x87->env[FP_ENV_CTRL]
= toUShort(toUInt( amd64g_create_fpucw( vex_state->guest_FPROUND ) ));
/* Compute the x87 tag word. */
tagw = 0;
for (stno = 0; stno < 8; stno++) {
preg = (stno + ftop) & 7;
if (vexTags[preg] == 0) {
/* register is empty */
tagw |= (3 << (2*preg));
} else {
/* register is full. */
tagw |= (0 << (2*preg));
}
}
x87->env[FP_ENV_TAG] = toUShort(tagw);
/* We don't dump the x87 registers, tho. */
}
/* This is used to implement 'fnsave'.
Writes 108 bytes at x87_state[0 .. 107]. */
/* CALLED FROM GENERATED CODE */
/* DIRTY HELPER */
void amd64g_dirtyhelper_FNSAVE ( /*IN*/VexGuestAMD64State* vex_state,
/*OUT*/HWord x87_state)
{
do_get_x87( vex_state, (UChar*)x87_state );
}
/* This is used to implement 'fnsaves'.
Writes 94 bytes at x87_state[0 .. 93]. */
/* CALLED FROM GENERATED CODE */
/* DIRTY HELPER */
void amd64g_dirtyhelper_FNSAVES ( /*IN*/VexGuestAMD64State* vex_state,
/*OUT*/HWord x87_state)
{
Int i, stno, preg;
UInt tagw;
ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
Fpu_State_16* x87 = (Fpu_State_16*)x87_state;
UInt ftop = vex_state->guest_FTOP;
UInt c3210 = vex_state->guest_FC3210;
for (i = 0; i < 7; i++)
x87->env[i] = 0;
x87->env[FPS_ENV_STAT]
= toUShort(((ftop & 7) << 11) | (c3210 & 0x4700));
x87->env[FPS_ENV_CTRL]
= toUShort(amd64g_create_fpucw( vex_state->guest_FPROUND ));
/* Dump the register stack in ST order. */
tagw = 0;
for (stno = 0; stno < 8; stno++) {
preg = (stno + ftop) & 7;
if (vexTags[preg] == 0) {
/* register is empty */
tagw |= (3 << (2*preg));
convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
&x87->reg[10*stno] );
} else {
/* register is full. */
tagw |= (0 << (2*preg));
convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
&x87->reg[10*stno] );
}
}
x87->env[FPS_ENV_TAG] = toUShort(tagw);
}
/* This is used to implement 'frstor'.
Reads 108 bytes at x87_state[0 .. 107]. */
/* CALLED FROM GENERATED CODE */
/* DIRTY HELPER */
VexEmNote amd64g_dirtyhelper_FRSTOR ( /*OUT*/VexGuestAMD64State* vex_state,
/*IN*/HWord x87_state)
{
return do_put_x87( True, (UChar*)x87_state, vex_state );
}
/* This is used to implement 'frstors'.
Reads 94 bytes at x87_state[0 .. 93]. */
/* CALLED FROM GENERATED CODE */
/* DIRTY HELPER */
VexEmNote amd64g_dirtyhelper_FRSTORS ( /*OUT*/VexGuestAMD64State* vex_state,
/*IN*/HWord x87_state)
{
Int stno, preg;
UInt tag;
ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
Fpu_State_16* x87 = (Fpu_State_16*)x87_state;
UInt ftop = (x87->env[FPS_ENV_STAT] >> 11) & 7;
UInt tagw = x87->env[FPS_ENV_TAG];
UInt fpucw = x87->env[FPS_ENV_CTRL];
UInt c3210 = x87->env[FPS_ENV_STAT] & 0x4700;
VexEmNote ew;
UInt fpround;
ULong pair;
/* Copy registers and tags */
for (stno = 0; stno < 8; stno++) {
preg = (stno + ftop) & 7;
tag = (tagw >> (2*preg)) & 3;
if (tag == 3) {
/* register is empty */
/* hmm, if it's empty, does it still get written? Probably
safer to say it does. If we don't, memcheck could get out
of sync, in that it thinks all FP registers are defined by
this helper, but in reality some have not been updated. */
vexRegs[preg] = 0; /* IEEE754 64-bit zero */
vexTags[preg] = 0;
} else {
/* register is non-empty */
convert_f80le_to_f64le( &x87->reg[10*stno],
(UChar*)&vexRegs[preg] );
vexTags[preg] = 1;
}
}
/* stack pointer */
vex_state->guest_FTOP = ftop;
/* status word */
vex_state->guest_FC3210 = c3210;
/* handle the control word, setting FPROUND and detecting any
emulation warnings. */
pair = amd64g_check_fldcw ( (ULong)fpucw );
fpround = (UInt)pair & 0xFFFFFFFFULL;
ew = (VexEmNote)(pair >> 32);
vex_state->guest_FPROUND = fpround & 3;
/* emulation warnings --> caller */
return ew;
}
/*---------------------------------------------------------------*/
/*--- Misc integer helpers, including rotates and CPUID. ---*/
/*---------------------------------------------------------------*/
/* Claim to be the following CPU, which is probably representative of
the lowliest (earliest) amd64 offerings. It can do neither sse3
nor cx16.
vendor_id : AuthenticAMD
cpu family : 15
model : 5
model name : AMD Opteron (tm) Processor 848
stepping : 10
cpu MHz : 1797.682
cache size : 1024 KB
fpu : yes
fpu_exception : yes
cpuid level : 1
wp : yes
flags : fpu vme de pse tsc msr pae mce cx8 apic sep
mtrr pge mca cmov pat pse36 clflush mmx fxsr
sse sse2 syscall nx mmxext lm 3dnowext 3dnow
bogomips : 3600.62
TLB size : 1088 4K pages
clflush size : 64
cache_alignment : 64
address sizes : 40 bits physical, 48 bits virtual
power management: ts fid vid ttp
2012-Feb-21: don't claim 3dnow or 3dnowext, since in fact
we don't support them. See #291568. 3dnow is 80000001.EDX.31
and 3dnowext is 80000001.EDX.30.
*/
void amd64g_dirtyhelper_CPUID_baseline ( VexGuestAMD64State* st )
{
# define SET_ABCD(_a,_b,_c,_d) \
do { st->guest_RAX = (ULong)(_a); \
st->guest_RBX = (ULong)(_b); \
st->guest_RCX = (ULong)(_c); \
st->guest_RDX = (ULong)(_d); \
} while (0)
switch (0xFFFFFFFF & st->guest_RAX) {
case 0x00000000:
SET_ABCD(0x00000001, 0x68747541, 0x444d4163, 0x69746e65);
break;
case 0x00000001:
SET_ABCD(0x00000f5a, 0x01000800, 0x00000000, 0x078bfbff);
break;
case 0x80000000:
SET_ABCD(0x80000018, 0x68747541, 0x444d4163, 0x69746e65);
break;
case 0x80000001:
/* Don't claim to support 3dnow or 3dnowext. 0xe1d3fbff is
the original it-is-supported value that the h/w provides.
See #291568. */
SET_ABCD(0x00000f5a, 0x00000505, 0x00000000, /*0xe1d3fbff*/
0x21d3fbff);
break;
case 0x80000002:
SET_ABCD(0x20444d41, 0x6574704f, 0x206e6f72, 0x296d7428);
break;
case 0x80000003:
SET_ABCD(0x6f725020, 0x73736563, 0x3820726f, 0x00003834);
break;
case 0x80000004:
SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
break;
case 0x80000005:
SET_ABCD(0xff08ff08, 0xff20ff20, 0x40020140, 0x40020140);
break;
case 0x80000006:
SET_ABCD(0x00000000, 0x42004200, 0x04008140, 0x00000000);
break;
case 0x80000007:
SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x0000000f);
break;
case 0x80000008:
SET_ABCD(0x00003028, 0x00000000, 0x00000000, 0x00000000);
break;
default:
SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
break;
}
# undef SET_ABCD
}
/* Claim to be the following CPU (2 x ...), which is sse3 and cx16
capable.
vendor_id : GenuineIntel
cpu family : 6
model : 15
model name : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz
stepping : 6
cpu MHz : 2394.000
cache size : 4096 KB
physical id : 0
siblings : 2
core id : 0
cpu cores : 2
fpu : yes
fpu_exception : yes
cpuid level : 10
wp : yes
flags : fpu vme de pse tsc msr pae mce cx8 apic sep
mtrr pge mca cmov pat pse36 clflush dts acpi
mmx fxsr sse sse2 ss ht tm syscall nx lm
constant_tsc pni monitor ds_cpl vmx est tm2
cx16 xtpr lahf_lm
bogomips : 4798.78
clflush size : 64
cache_alignment : 64
address sizes : 36 bits physical, 48 bits virtual
power management:
*/
void amd64g_dirtyhelper_CPUID_sse3_and_cx16 ( VexGuestAMD64State* st )
{
# define SET_ABCD(_a,_b,_c,_d) \
do { st->guest_RAX = (ULong)(_a); \
st->guest_RBX = (ULong)(_b); \
st->guest_RCX = (ULong)(_c); \
st->guest_RDX = (ULong)(_d); \
} while (0)
switch (0xFFFFFFFF & st->guest_RAX) {
case 0x00000000:
SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69);
break;
case 0x00000001:
SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff);
break;
case 0x00000002:
SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049);
break;
case 0x00000003:
SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
break;
case 0x00000004: {
switch (0xFFFFFFFF & st->guest_RCX) {
case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f,
0x0000003f, 0x00000001); break;
case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f,
0x0000003f, 0x00000001); break;
case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f,
0x00000fff, 0x00000001); break;
default: SET_ABCD(0x00000000, 0x00000000,
0x00000000, 0x00000000); break;
}
break;
}
case 0x00000005:
SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020);
break;
case 0x00000006:
SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000);
break;
case 0x00000007:
SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
break;
case 0x00000008:
SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000);
break;
case 0x00000009:
SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
break;
case 0x0000000a:
unhandled_eax_value:
SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000);
break;
case 0x80000000:
SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
break;
case 0x80000001:
SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100800);
break;
case 0x80000002:
SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
break;
case 0x80000003:
SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020);
break;
case 0x80000004:
SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847);
break;
case 0x80000005:
SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
break;
case 0x80000006:
SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000);
break;
case 0x80000007:
SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
break;
case 0x80000008:
SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
break;
default:
goto unhandled_eax_value;
}
# undef SET_ABCD
}
/* Claim to be the following CPU (4 x ...), which is sse4.2 and cx16
capable.
vendor_id : GenuineIntel
cpu family : 6
model : 37
model name : Intel(R) Core(TM) i5 CPU 670 @ 3.47GHz
stepping : 2
cpu MHz : 3334.000
cache size : 4096 KB
physical id : 0
siblings : 4
core id : 0
cpu cores : 2
apicid : 0