| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| /** |
| * @author Alexander V. Astapchuk |
| */ |
| |
| /** |
| * @file |
| * @brief Main encoding routines and structures. |
| */ |
| |
| #ifndef __ENC_BASE_H_INCLUDED__ |
| #define __ENC_BASE_H_INCLUDED__ |
| |
| #include "enc_defs.h" |
| |
| |
| #include <stdlib.h> |
| #include <assert.h> |
| #include <memory.h> |
| |
| ENCODER_NAMESPACE_START |
| struct MnemonicInfo; |
| struct OpcodeInfo; |
| struct Rex; |
| |
| /** |
| * @brief Basic facilities for generation of processor's instructions. |
| * |
| * The class EncoderBase represents the basic facilities for the encoding of |
| * processor's instructions on IA32 and EM64T platforms. |
| * |
| * The class provides general interface to generate the instructions as well |
| * as to retrieve some static data about instructions (number of arguments, |
| * their roles, etc). |
| * |
| * Currently, the EncoderBase class is used for both LIL and Jitrino code |
| * generators. Each of these code generators has its own wrapper to adapt |
| * this general interface for specific needs - see encoder.h for LIL wrappers |
| * and Ia32Encoder.h for Jitrino's adapter. |
| * |
| * Interface is provided through static methods, no instances of EncoderBase |
| * to be created. |
| * |
| * @todo RIP-based addressing on EM64T - it's not yet supported currently. |
| */ |
| class EncoderBase { |
| public: |
| class Operands; |
| struct MnemonicDesc; |
| /** |
| * @brief Generates processor's instruction. |
| * |
| * @param stream - a buffer to generate into |
| * @param mn - \link Mnemonic mnemonic \endlink of the instruction |
| * @param opnds - operands for the instruction |
| * @returns (stream + length of the just generated instruction) |
| */ |
| static char * encode(char * stream, Mnemonic mn, const Operands& opnds); |
| static char * getOpndLocation(int index); |
| |
| /** |
| * @brief Generates the smallest possible number of NOP-s. |
| * |
| * Effectively generates the smallest possible number of instructions, |
| * which are NOP-s for CPU. Normally used to make a code alignment. |
| * |
| * The method inserts exactly number of bytes specified. It's a caller's |
| * responsibility to make sure the buffer is big enough. |
| * |
| * @param stream - buffer where to generate code into, can not be NULL |
| * @param howMany - how many bytes to fill with NOP-s |
| * @return \c (stream+howMany) |
| */ |
| static char * nops(char * stream, unsigned howMany); |
| |
| /** |
| * @brief Inserts a prefix into the code buffer. |
| * |
| * The method writes no more than one byte into the buffer. This is a |
| * caller's responsibility to make sure the buffer is big enough. |
| * |
| * @param stream - buffer where to insert the prefix |
| * @param pref - prefix to be inserted. If it's InstPrefix_Null, then |
| * no action performed and return value is \c stream. |
| * @return \c (stream+1) if pref is not InstPrefix_Null, or \c stream |
| * otherwise |
| */ |
| static char * prefix(char* stream, InstPrefix pref); |
| |
| /** |
| * @brief Determines if operand with opndExt suites the position with instExt. |
| */ |
| static bool extAllowed(OpndExt opndExt, OpndExt instExt); |
| |
| /** |
| * @brief Returns #MnemonicDesc by the given Mnemonic. |
| */ |
| static const MnemonicDesc * getMnemonicDesc(Mnemonic mn) |
| { |
| assert(mn < Mnemonic_Count); |
| return mnemonics + mn; |
| } |
| |
| /** |
| * @brief Returns a Mnemonic for the given name. |
| * |
| * The lookup is case insensitive, if no mnemonic found for the given |
| * string, then Mnemonic_Null returned. |
| */ |
| static Mnemonic str2mnemonic(const char * mn_name); |
| |
| /** |
| * @brief Returns a string representation of the given Mnemonic. |
| * |
| * If invalid mnemonic passed, then the behavior is unpredictable. |
| */ |
| static const char * getMnemonicString(Mnemonic mn) |
| { |
| return getMnemonicDesc(mn)->name; |
| } |
| |
| static const char * toStr(Mnemonic mn) |
| { |
| return getMnemonicDesc(mn)->name; |
| } |
| |
| |
| /** |
| * @brief Description of operand. |
| * |
| * Description of an operand in opcode - its kind, size or RegName if |
| * operand must be a particular register. |
| */ |
| struct OpndDesc { |
| /** |
| * @brief Location of the operand. |
| * |
| * May be a mask, i.e. OpndKind_Imm|OpndKind_Mem. |
| */ |
| OpndKind kind; |
| /** |
| * @brief Size of the operand. |
| */ |
| OpndSize size; |
| /** |
| * @brief Extention of the operand. |
| */ |
| OpndExt ext; |
| /** |
| * @brief Appropriate RegName if operand must reside on a particular |
| * register (i.e. CWD/CDQ instructions), RegName_Null |
| * otherwise. |
| */ |
| RegName reg; |
| }; |
| |
| /** |
| * @brief Description of operands' roles in instruction. |
| */ |
| struct OpndRolesDesc { |
| /** |
| * @brief Total number of operands in the operation. |
| */ |
| unsigned count; |
| /** |
| * @brief Number of defs in the operation. |
| */ |
| unsigned defCount; |
| /** |
| * @brief Number of uses in the operation. |
| */ |
| unsigned useCount; |
| /** |
| * @brief Operand roles, bit-packed. |
| * |
| * A bit-packed info about operands' roles. Each operand's role is |
| * described by two bits, counted from right-to-left - the less |
| * significant bits (0,1) represent operand#0. |
| * |
| * The mask is build by ORing #OpndRole_Def and #OpndRole_Use |
| * appropriately and shifting left, i.e. operand#0's role would be |
| * - '(OpndRole_Def|OpndRole_Use)' |
| * - opnd#1's role would be 'OpndRole_Use<<2' |
| * - and operand#2's role would be, say, 'OpndRole_Def<<4'. |
| */ |
| unsigned roles; |
| }; |
| |
| /** |
| * @brief Extracts appropriate OpndRole for a given operand. |
| * |
| * The order of operands is left-to-right, i.e. for MOV, it |
| * would be 'MOV op0, op1' |
| */ |
| static OpndRole getOpndRoles(OpndRolesDesc ord, unsigned idx) |
| { |
| assert(idx < ord.count); |
| return (OpndRole)(ord.roles>>((ord.count-1-idx)*2) & 0x3); |
| } |
| |
| /** |
| * @brief Info about single opcode - its opcode bytes, operands, |
| * operands' roles. |
| */ |
| union OpcodeDesc { |
| char dummy[128]; // To make total size a power of 2 |
| |
| struct { |
| /** |
| * @brief Raw opcode bytes. |
| * |
| * 'Raw' opcode bytes which do not require any analysis and are |
| * independent from arguments/sizes/etc (may include opcode size |
| * prefix). |
| */ |
| char opcode[5]; |
| unsigned opcode_len; |
| unsigned aux0; |
| unsigned aux1; |
| /** |
| * @brief Info about opcode's operands. |
| * |
| * The [3] mostly comes from IDIV/IMUL which both may have up to 3 |
| * operands. |
| */ |
| OpndDesc opnds[3]; |
| unsigned first_opnd; |
| /** |
| * @brief Info about operands - total number, number of uses/defs, |
| * operands' roles. |
| */ |
| OpndRolesDesc roles; |
| /** |
| * @brief If not zero, then this is final OpcodeDesc structure in |
| * the list of opcodes for a given mnemonic. |
| */ |
| char last; |
| char platf; |
| }; |
| }; |
| public: |
| /** |
| * @brief General info about mnemonic. |
| */ |
| struct MnemonicDesc { |
| /** |
| * @brief The mnemonic itself. |
| */ |
| Mnemonic mn; |
| /** |
| * Various characteristics of mnemonic. |
| * @see MF_ |
| */ |
| unsigned flags; |
| /** |
| * @brief Operation's operand's count and roles. |
| * |
| * For the operations whose opcodes may use different number of |
| * operands (i.e. IMUL/SHL) either most common value used, or empty |
| * value left. |
| */ |
| OpndRolesDesc roles; |
| /** |
| * @brief Print name of the mnemonic. |
| */ |
| const char * name; |
| }; |
| |
| |
| /** |
| * @brief Magic number, shows a maximum value a hash code can take. |
| * |
| * For meaning and arithmetics see enc_tabl.cpp. |
| * |
| * The value was increased from '5155' to '8192' to make it aligned |
| * for faster access in EncoderBase::lookup(). |
| */ |
| static const unsigned int HASH_MAX = 8192; //5155; |
| /** |
| * @brief Empty value, used in hash-to-opcode map to show an empty slot. |
| */ |
| static const unsigned char NOHASH = 0xFF; |
| /** |
| * @brief The name says it all. |
| */ |
| static const unsigned char HASH_BITS_PER_OPERAND = 5; |
| |
| /** |
| * @brief Contains info about a single instructions's operand - its |
| * location, size and a value for immediate or RegName for |
| * register operands. |
| */ |
| class Operand { |
| public: |
| /** |
| * @brief Initializes the instance with empty size and kind. |
| */ |
| Operand() : m_kind(OpndKind_Null), m_size(OpndSize_Null), m_ext(OpndExt_None), m_need_rex(false) {} |
| /** |
| * @brief Creates register operand from given RegName. |
| */ |
| Operand(RegName reg, OpndExt ext = OpndExt_None) : m_kind(getRegKind(reg)), |
| m_size(getRegSize(reg)), |
| m_ext(ext), m_reg(reg) |
| { |
| hash_it(); |
| } |
| /** |
| * @brief Creates register operand from given RegName and with the |
| * specified size and kind. |
| * |
| * Used to speedup Operand creation as there is no need to extract |
| * size and kind from the RegName. |
| * The provided size and kind must match the RegName's ones though. |
| */ |
| Operand(OpndSize sz, OpndKind kind, RegName reg, OpndExt ext = OpndExt_None) : |
| m_kind(kind), m_size(sz), m_ext(ext), m_reg(reg) |
| { |
| assert(m_size == getRegSize(reg)); |
| assert(m_kind == getRegKind(reg)); |
| hash_it(); |
| } |
| /** |
| * @brief Creates immediate operand with the given size and value. |
| */ |
| Operand(OpndSize size, long long ival, OpndExt ext = OpndExt_None) : |
| m_kind(OpndKind_Imm), m_size(size), m_ext(ext), m_imm64(ival) |
| { |
| hash_it(); |
| } |
| /** |
| * @brief Creates immediate operand of OpndSize_32. |
| */ |
| Operand(int ival, OpndExt ext = OpndExt_None) : |
| m_kind(OpndKind_Imm), m_size(OpndSize_32), m_ext(ext), m_imm64(ival) |
| { |
| hash_it(); |
| } |
| /** |
| * @brief Creates immediate operand of OpndSize_16. |
| */ |
| Operand(short ival, OpndExt ext = OpndExt_None) : |
| m_kind(OpndKind_Imm), m_size(OpndSize_16), m_ext(ext), m_imm64(ival) |
| { |
| hash_it(); |
| } |
| |
| /** |
| * @brief Creates immediate operand of OpndSize_8. |
| */ |
| Operand(char ival, OpndExt ext = OpndExt_None) : |
| m_kind(OpndKind_Imm), m_size(OpndSize_8), m_ext(ext), m_imm64(ival) |
| { |
| hash_it(); |
| } |
| |
| /** |
| * @brief Creates memory operand. |
| */ |
| Operand(OpndSize size, RegName base, RegName index, unsigned scale, |
| int disp, OpndExt ext = OpndExt_None) : m_kind(OpndKind_Mem), m_size(size), m_ext(ext) |
| { |
| m_base = base; |
| m_index = index; |
| m_scale = scale; |
| m_disp = disp; |
| hash_it(); |
| } |
| |
| /** |
| * @brief Creates memory operand with only base and displacement. |
| */ |
| Operand(OpndSize size, RegName base, int disp, OpndExt ext = OpndExt_None) : |
| m_kind(OpndKind_Mem), m_size(size), m_ext(ext) |
| { |
| m_base = base; |
| m_index = RegName_Null; |
| m_scale = 0; |
| m_disp = disp; |
| hash_it(); |
| } |
| // |
| // general info |
| // |
| /** |
| * @brief Returns kind of the operand. |
| */ |
| OpndKind kind(void) const { return m_kind; } |
| /** |
| * @brief Returns size of the operand. |
| */ |
| OpndSize size(void) const { return m_size; } |
| /** |
| * @brief Returns extention of the operand. |
| */ |
| OpndExt ext(void) const { return m_ext; } |
| /** |
| * @brief Returns hash of the operand. |
| */ |
| unsigned hash(void) const { return m_hash; } |
| // |
| #ifdef _EM64T_ |
| bool need_rex(void) const { return m_need_rex; } |
| #else |
| bool need_rex(void) const { return false; } |
| #endif |
| /** |
| * @brief Tests whether operand is memory operand. |
| */ |
| bool is_mem(void) const { return is_placed_in(OpndKind_Mem); } |
| /** |
| * @brief Tests whether operand is immediate operand. |
| */ |
| bool is_imm(void) const { return is_placed_in(OpndKind_Imm); } |
| /** |
| * @brief Tests whether operand is register operand. |
| */ |
| bool is_reg(void) const { return is_placed_in(OpndKind_Reg); } |
| /** |
| * @brief Tests whether operand is general-purpose register operand. |
| */ |
| bool is_gpreg(void) const { return is_placed_in(OpndKind_GPReg); } |
| /** |
| * @brief Tests whether operand is float-point pseudo-register operand. |
| */ |
| bool is_fpreg(void) const { return is_placed_in(OpndKind_FPReg); } |
| /** |
| * @brief Tests whether operand is XMM register operand. |
| */ |
| bool is_xmmreg(void) const { return is_placed_in(OpndKind_XMMReg); } |
| #ifdef _HAVE_MMX_ |
| /** |
| * @brief Tests whether operand is MMX register operand. |
| */ |
| bool is_mmxreg(void) const { return is_placed_in(OpndKind_MMXReg); } |
| #endif |
| /** |
| * @brief Tests whether operand is signed immediate operand. |
| */ |
| //bool is_signed(void) const { assert(is_imm()); return m_is_signed; } |
| |
| /** |
| * @brief Returns base of memory operand (RegName_Null if not memory). |
| */ |
| RegName base(void) const { return is_mem() ? m_base : RegName_Null; } |
| /** |
| * @brief Returns index of memory operand (RegName_Null if not memory). |
| */ |
| RegName index(void) const { return is_mem() ? m_index : RegName_Null; } |
| /** |
| * @brief Returns scale of memory operand (0 if not memory). |
| */ |
| unsigned scale(void) const { return is_mem() ? m_scale : 0; } |
| /** |
| * @brief Returns displacement of memory operand (0 if not memory). |
| */ |
| int disp(void) const { return is_mem() ? m_disp : 0; } |
| /** |
| * @brief Returns RegName of register operand (RegName_Null if not |
| * register). |
| */ |
| RegName reg(void) const { return is_reg() ? m_reg : RegName_Null; } |
| /** |
| * @brief Returns value of immediate operand (0 if not immediate). |
| */ |
| long long imm(void) const { return is_imm() ? m_imm64 : 0; } |
| private: |
| bool is_placed_in(OpndKind kd) const |
| { |
| return kd == OpndKind_Reg ? |
| m_kind == OpndKind_GPReg || |
| #ifdef _HAVE_MMX_ |
| m_kind == OpndKind_MMXReg || |
| #endif |
| m_kind == OpndKind_FPReg || |
| m_kind == OpndKind_XMMReg |
| : kd == m_kind; |
| } |
| void hash_it(void) |
| { |
| m_hash = get_size_hash(m_size) | get_kind_hash(m_kind); |
| #ifdef _EM64T_ |
| m_need_rex = false; |
| if (is_reg() && is_em64t_extra_reg(m_reg)) { |
| m_need_rex = true; |
| } |
| else if (is_mem() && (is_em64t_extra_reg(m_base) || |
| is_em64t_extra_reg(m_index))) { |
| m_need_rex = true; |
| } |
| #endif |
| } |
| // general info |
| OpndKind m_kind; |
| OpndSize m_size; |
| OpndExt m_ext; |
| // complex address form support |
| RegName m_base; |
| RegName m_index; |
| unsigned m_scale; |
| union { |
| int m_disp; |
| RegName m_reg; |
| long long m_imm64; |
| }; |
| unsigned m_hash; |
| bool m_need_rex; |
| friend class EncoderBase::Operands; |
| }; |
| /** |
| * @brief Simple container for up to 3 Operand-s. |
| */ |
| class Operands { |
| public: |
| Operands(void) |
| { |
| clear(); |
| } |
| Operands(const Operand& op0) |
| { |
| clear(); |
| add(op0); |
| } |
| |
| Operands(const Operand& op0, const Operand& op1) |
| { |
| clear(); |
| add(op0); add(op1); |
| } |
| |
| Operands(const Operand& op0, const Operand& op1, const Operand& op2) |
| { |
| clear(); |
| add(op0); add(op1); add(op2); |
| } |
| |
| unsigned count(void) const { return m_count; } |
| unsigned hash(void) const { return m_hash; } |
| const Operand& operator[](unsigned idx) const |
| { |
| assert(idx<m_count); |
| return m_operands[idx]; |
| } |
| |
| void add(const Operand& op) |
| { |
| assert(m_count < COUNTOF(m_operands)); |
| m_hash = (m_hash<<HASH_BITS_PER_OPERAND) | op.hash(); |
| m_operands[m_count++] = op; |
| m_need_rex = m_need_rex || op.m_need_rex; |
| } |
| #ifdef _EM64T_ |
| bool need_rex(void) const { return m_need_rex; } |
| #else |
| bool need_rex(void) const { return false; } |
| #endif |
| void clear(void) |
| { |
| m_count = 0; m_hash = 0; m_need_rex = false; |
| } |
| private: |
| unsigned m_count; |
| Operand m_operands[COUNTOF( ((OpcodeDesc*)NULL)->opnds )]; |
| unsigned m_hash; |
| bool m_need_rex; |
| }; |
| public: |
| #ifdef _DEBUG |
| /** |
| * Verifies some presumptions about encoding data table. |
| * Called automaticaly during statics initialization. |
| */ |
| static int verify(void); |
| #endif |
| |
| private: |
| /** |
| * @brief Returns found OpcodeDesc by the given Mnemonic and operands. |
| */ |
| static const OpcodeDesc * lookup(Mnemonic mn, const Operands& opnds); |
| /** |
| * @brief Encodes mod/rm byte. |
| */ |
| static char* encodeModRM(char* stream, const Operands& opnds, |
| unsigned idx, const OpcodeDesc * odesc, Rex * prex); |
| /** |
| * @brief Encodes special things of opcode description - '/r', 'ib', etc. |
| */ |
| static char* encode_aux(char* stream, unsigned aux, |
| const Operands& opnds, const OpcodeDesc * odesc, |
| unsigned * pargsCount, Rex* prex); |
| #ifdef _EM64T_ |
| /** |
| * @brief Returns true if the 'reg' argument represents one of the new |
| * EM64T registers - R8(D)-R15(D). |
| * |
| * The 64 bits versions of 'old-fashion' registers, i.e. RAX are not |
| * considered as 'extra'. |
| */ |
| static bool is_em64t_extra_reg(const RegName reg) |
| { |
| if (needs_rex_r(reg)) { |
| return true; |
| } |
| if (RegName_SPL <= reg && reg <= RegName_R15L) { |
| return true; |
| } |
| return false; |
| } |
| static bool needs_rex_r(const RegName reg) |
| { |
| if (RegName_R8 <= reg && reg <= RegName_R15) { |
| return true; |
| } |
| if (RegName_R8D <= reg && reg <= RegName_R15D) { |
| return true; |
| } |
| if (RegName_R8S <= reg && reg <= RegName_R15S) { |
| return true; |
| } |
| if (RegName_R8L <= reg && reg <= RegName_R15L) { |
| return true; |
| } |
| if (RegName_XMM8 <= reg && reg <= RegName_XMM15) { |
| return true; |
| } |
| if (RegName_XMM8D <= reg && reg <= RegName_XMM15D) { |
| return true; |
| } |
| if (RegName_XMM8S <= reg && reg <= RegName_XMM15S) { |
| return true; |
| } |
| return false; |
| } |
| /** |
| * @brief Returns an 'processor's index' of the register - the index |
| * used to encode the register in ModRM/SIB bytes. |
| * |
| * For the new EM64T registers the 'HW index' differs from the index |
| * encoded in RegName. For old-fashion registers it's effectively the |
| * same as ::getRegIndex(RegName). |
| */ |
| static unsigned char getHWRegIndex(const RegName reg) |
| { |
| if (getRegKind(reg) != OpndKind_GPReg) { |
| return getRegIndex(reg); |
| } |
| if (RegName_SPL <= reg && reg<=RegName_DIL) { |
| return getRegIndex(reg); |
| } |
| if (RegName_R8L<= reg && reg<=RegName_R15L) { |
| return getRegIndex(reg) - getRegIndex(RegName_R8L); |
| } |
| return is_em64t_extra_reg(reg) ? |
| getRegIndex(reg)-getRegIndex(RegName_R8D) : getRegIndex(reg); |
| } |
| #else |
| static unsigned char getHWRegIndex(const RegName reg) |
| { |
| return getRegIndex(reg); |
| } |
| static bool is_em64t_extra_reg(const RegName reg) |
| { |
| return false; |
| } |
| #endif |
| public: |
| static unsigned char get_size_hash(OpndSize size) { |
| return (size <= OpndSize_64) ? size_hash[size] : 0xFF; |
| } |
| static unsigned char get_kind_hash(OpndKind kind) { |
| return (kind <= OpndKind_Mem) ? kind_hash[kind] : 0xFF; |
| } |
| |
| /** |
| * @brief A table used for the fast computation of hash value. |
| * |
| * A change must be strictly balanced with hash-related functions and data |
| * in enc_base.h/.cpp. |
| */ |
| static const unsigned char size_hash[OpndSize_64+1]; |
| /** |
| * @brief A table used for the fast computation of hash value. |
| * |
| * A change must be strictly balanced with hash-related functions and data |
| * in enc_base.h/.cpp. |
| */ |
| static const unsigned char kind_hash[OpndKind_Mem+1]; |
| /** |
| * @brief Maximum number of opcodes used for a single mnemonic. |
| * |
| * No arithmetics behind the number, simply estimated. |
| */ |
| static const unsigned int MAX_OPCODES = 32; //20; |
| /** |
| * @brief Mapping between operands hash code and operands. |
| */ |
| static unsigned char opcodesHashMap[Mnemonic_Count][HASH_MAX]; |
| /** |
| * @brief Array of mnemonics. |
| */ |
| static MnemonicDesc mnemonics[Mnemonic_Count]; |
| /** |
| * @brief Array of available opcodes. |
| */ |
| static OpcodeDesc opcodes[Mnemonic_Count][MAX_OPCODES]; |
| |
| static int buildTable(void); |
| static void buildMnemonicDesc(const MnemonicInfo * minfo); |
| /** |
| * @brief Computes hash value for the given operands. |
| */ |
| static unsigned short getHash(const OpcodeInfo* odesc); |
| /** |
| * @brief Dummy variable, for automatic invocation of buildTable() at |
| * startup. |
| */ |
| static int dummy; |
| |
| static char * curRelOpnd[3]; |
| }; |
| |
| ENCODER_NAMESPACE_END |
| |
| #endif // ifndef __ENC_BASE_H_INCLUDED__ |