blob: 6ed549ceaab0f6e8547b56f07ee52a9b72019eb7 [file] [log] [blame]
/*
* Copyright (c) 2009, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package org.graalvm.compiler.asm.amd64;
import static jdk.vm.ci.amd64.AMD64.CPU;
import static jdk.vm.ci.amd64.AMD64.XMM;
import static jdk.vm.ci.amd64.AMD64.r12;
import static jdk.vm.ci.amd64.AMD64.r13;
import static jdk.vm.ci.amd64.AMD64.rbp;
import static jdk.vm.ci.amd64.AMD64.rip;
import static jdk.vm.ci.amd64.AMD64.rsp;
import static jdk.vm.ci.code.MemoryBarriers.STORE_LOAD;
import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseAddressNop;
import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseNormalNop;
import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.ADD;
import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.AND;
import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.CMP;
import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.OR;
import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SBB;
import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SUB;
import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.XOR;
import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.DEC;
import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.INC;
import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NEG;
import static org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64MOp.NOT;
import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.BYTE;
import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.DWORD;
import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.PD;
import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.PS;
import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.QWORD;
import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.SD;
import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.SS;
import static org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize.WORD;
import static org.graalvm.compiler.core.common.NumUtil.isByte;
import static org.graalvm.compiler.core.common.NumUtil.isInt;
import static org.graalvm.compiler.core.common.NumUtil.isShiftCount;
import static org.graalvm.compiler.core.common.NumUtil.isUByte;
import org.graalvm.compiler.asm.Assembler;
import org.graalvm.compiler.asm.Label;
import org.graalvm.compiler.asm.amd64.AMD64Address.Scale;
import org.graalvm.compiler.core.common.NumUtil;
import org.graalvm.compiler.debug.GraalError;
import jdk.vm.ci.amd64.AMD64;
import jdk.vm.ci.amd64.AMD64.CPUFeature;
import jdk.vm.ci.amd64.AMD64Kind;
import jdk.vm.ci.code.Register;
import jdk.vm.ci.code.Register.RegisterCategory;
import jdk.vm.ci.code.TargetDescription;
import jdk.vm.ci.meta.PlatformKind;
/**
* This class implements an assembler that can encode most X86 instructions.
*/
public class AMD64Assembler extends Assembler {
private static final int MinEncodingNeedsRex = 8;
/**
* The x86 condition codes used for conditional jumps/moves.
*/
public enum ConditionFlag {
Zero(0x4, "|zero|"),
NotZero(0x5, "|nzero|"),
Equal(0x4, "="),
NotEqual(0x5, "!="),
Less(0xc, "<"),
LessEqual(0xe, "<="),
Greater(0xf, ">"),
GreaterEqual(0xd, ">="),
Below(0x2, "|<|"),
BelowEqual(0x6, "|<=|"),
Above(0x7, "|>|"),
AboveEqual(0x3, "|>=|"),
Overflow(0x0, "|of|"),
NoOverflow(0x1, "|nof|"),
CarrySet(0x2, "|carry|"),
CarryClear(0x3, "|ncarry|"),
Negative(0x8, "|neg|"),
Positive(0x9, "|pos|"),
Parity(0xa, "|par|"),
NoParity(0xb, "|npar|");
private final int value;
private final String operator;
ConditionFlag(int value, String operator) {
this.value = value;
this.operator = operator;
}
public ConditionFlag negate() {
switch (this) {
case Zero:
return NotZero;
case NotZero:
return Zero;
case Equal:
return NotEqual;
case NotEqual:
return Equal;
case Less:
return GreaterEqual;
case LessEqual:
return Greater;
case Greater:
return LessEqual;
case GreaterEqual:
return Less;
case Below:
return AboveEqual;
case BelowEqual:
return Above;
case Above:
return BelowEqual;
case AboveEqual:
return Below;
case Overflow:
return NoOverflow;
case NoOverflow:
return Overflow;
case CarrySet:
return CarryClear;
case CarryClear:
return CarrySet;
case Negative:
return Positive;
case Positive:
return Negative;
case Parity:
return NoParity;
case NoParity:
return Parity;
}
throw new IllegalArgumentException();
}
public int getValue() {
return value;
}
@Override
public String toString() {
return operator;
}
}
/**
* Constants for X86 prefix bytes.
*/
private static class Prefix {
private static final int REX = 0x40;
private static final int REXB = 0x41;
private static final int REXX = 0x42;
private static final int REXXB = 0x43;
private static final int REXR = 0x44;
private static final int REXRB = 0x45;
private static final int REXRX = 0x46;
private static final int REXRXB = 0x47;
private static final int REXW = 0x48;
private static final int REXWB = 0x49;
private static final int REXWX = 0x4A;
private static final int REXWXB = 0x4B;
private static final int REXWR = 0x4C;
private static final int REXWRB = 0x4D;
private static final int REXWRX = 0x4E;
private static final int REXWRXB = 0x4F;
private static final int VEX_3BYTES = 0xC4;
private static final int VEX_2BYTES = 0xC5;
}
private static class VexPrefix {
private static final int VEX_R = 0x80;
private static final int VEX_W = 0x80;
}
private static class VexSimdPrefix {
private static final int VEX_SIMD_NONE = 0x0;
private static final int VEX_SIMD_66 = 0x1;
private static final int VEX_SIMD_F3 = 0x2;
private static final int VEX_SIMD_F2 = 0x3;
}
private static class VexOpcode {
private static final int VEX_OPCODE_NONE = 0x0;
private static final int VEX_OPCODE_0F = 0x1;
private static final int VEX_OPCODE_0F_38 = 0x2;
private static final int VEX_OPCODE_0F_3A = 0x3;
}
public static class AvxVectorLen {
public static final int AVX_128bit = 0x0;
public static final int AVX_256bit = 0x1;
public static final int AVX_512bit = 0x2;
public static final int AVX_NoVec = 0x4;
}
public static class EvexTupleType {
public static final int EVEX_FV = 0;
public static final int EVEX_HV = 4;
public static final int EVEX_FVM = 6;
public static final int EVEX_T1S = 7;
public static final int EVEX_T1F = 11;
public static final int EVEX_T2 = 13;
public static final int EVEX_T4 = 15;
public static final int EVEX_T8 = 17;
public static final int EVEX_HVM = 18;
public static final int EVEX_QVM = 19;
public static final int EVEX_OVM = 20;
public static final int EVEX_M128 = 21;
public static final int EVEX_DUP = 22;
public static final int EVEX_ETUP = 23;
}
public static class EvexInputSizeInBits {
public static final int EVEX_8bit = 0;
public static final int EVEX_16bit = 1;
public static final int EVEX_32bit = 2;
public static final int EVEX_64bit = 3;
public static final int EVEX_NObit = 4;
}
private AMD64InstructionAttr curAttributes;
AMD64InstructionAttr getCurAttributes() {
return curAttributes;
}
void setCurAttributes(AMD64InstructionAttr attributes) {
curAttributes = attributes;
}
/**
* The x86 operand sizes.
*/
public enum OperandSize {
BYTE(1, AMD64Kind.BYTE) {
@Override
protected void emitImmediate(AMD64Assembler asm, int imm) {
assert imm == (byte) imm;
asm.emitByte(imm);
}
@Override
protected int immediateSize() {
return 1;
}
},
WORD(2, AMD64Kind.WORD, 0x66) {
@Override
protected void emitImmediate(AMD64Assembler asm, int imm) {
assert imm == (short) imm;
asm.emitShort(imm);
}
@Override
protected int immediateSize() {
return 2;
}
},
DWORD(4, AMD64Kind.DWORD) {
@Override
protected void emitImmediate(AMD64Assembler asm, int imm) {
asm.emitInt(imm);
}
@Override
protected int immediateSize() {
return 4;
}
},
QWORD(8, AMD64Kind.QWORD) {
@Override
protected void emitImmediate(AMD64Assembler asm, int imm) {
asm.emitInt(imm);
}
@Override
protected int immediateSize() {
return 4;
}
},
SS(4, AMD64Kind.SINGLE, 0xF3, true),
SD(8, AMD64Kind.DOUBLE, 0xF2, true),
PS(16, AMD64Kind.V128_SINGLE, true),
PD(16, AMD64Kind.V128_DOUBLE, 0x66, true);
private final int sizePrefix;
private final int bytes;
private final boolean xmm;
private final AMD64Kind kind;
OperandSize(int bytes, AMD64Kind kind) {
this(bytes, kind, 0);
}
OperandSize(int bytes, AMD64Kind kind, int sizePrefix) {
this(bytes, kind, sizePrefix, false);
}
OperandSize(int bytes, AMD64Kind kind, boolean xmm) {
this(bytes, kind, 0, xmm);
}
OperandSize(int bytes, AMD64Kind kind, int sizePrefix, boolean xmm) {
this.sizePrefix = sizePrefix;
this.bytes = bytes;
this.kind = kind;
this.xmm = xmm;
}
public int getBytes() {
return bytes;
}
public boolean isXmmType() {
return xmm;
}
public AMD64Kind getKind() {
return kind;
}
public static OperandSize get(PlatformKind kind) {
for (OperandSize operandSize : OperandSize.values()) {
if (operandSize.kind.equals(kind)) {
return operandSize;
}
}
throw GraalError.shouldNotReachHere("Unexpected kind: " + kind.toString());
}
/**
* Emit an immediate of this size. Note that immediate {@link #QWORD} operands are encoded
* as sign-extended 32-bit values.
*
* @param asm
* @param imm
*/
protected void emitImmediate(AMD64Assembler asm, int imm) {
throw new UnsupportedOperationException();
}
protected int immediateSize() {
throw new UnsupportedOperationException();
}
}
/**
* Operand size and register type constraints.
*/
private enum OpAssertion {
ByteAssertion(CPU, CPU, BYTE),
ByteOrLargerAssertion(CPU, CPU, BYTE, WORD, DWORD, QWORD),
WordOrLargerAssertion(CPU, CPU, WORD, DWORD, QWORD),
DwordOrLargerAssertion(CPU, CPU, DWORD, QWORD),
WordOrDwordAssertion(CPU, CPU, WORD, QWORD),
QwordAssertion(CPU, CPU, QWORD),
FloatAssertion(XMM, XMM, SS, SD, PS, PD),
PackedFloatAssertion(XMM, XMM, PS, PD),
SingleAssertion(XMM, XMM, SS),
DoubleAssertion(XMM, XMM, SD),
PackedDoubleAssertion(XMM, XMM, PD),
IntToFloatAssertion(XMM, CPU, DWORD, QWORD),
FloatToIntAssertion(CPU, XMM, DWORD, QWORD);
private final RegisterCategory resultCategory;
private final RegisterCategory inputCategory;
private final OperandSize[] allowedSizes;
OpAssertion(RegisterCategory resultCategory, RegisterCategory inputCategory, OperandSize... allowedSizes) {
this.resultCategory = resultCategory;
this.inputCategory = inputCategory;
this.allowedSizes = allowedSizes;
}
protected boolean checkOperands(AMD64Op op, OperandSize size, Register resultReg, Register inputReg) {
assert resultReg == null || resultCategory.equals(resultReg.getRegisterCategory()) : "invalid result register " + resultReg + " used in " + op;
assert inputReg == null || inputCategory.equals(inputReg.getRegisterCategory()) : "invalid input register " + inputReg + " used in " + op;
for (OperandSize s : allowedSizes) {
if (size == s) {
return true;
}
}
assert false : "invalid operand size " + size + " used in " + op;
return false;
}
}
public abstract static class OperandDataAnnotation extends CodeAnnotation {
/**
* The position (bytes from the beginning of the method) of the operand.
*/
public final int operandPosition;
/**
* The size of the operand, in bytes.
*/
public final int operandSize;
/**
* The position (bytes from the beginning of the method) of the next instruction. On AMD64,
* RIP-relative operands are relative to this position.
*/
public final int nextInstructionPosition;
OperandDataAnnotation(int instructionPosition, int operandPosition, int operandSize, int nextInstructionPosition) {
super(instructionPosition);
this.operandPosition = operandPosition;
this.operandSize = operandSize;
this.nextInstructionPosition = nextInstructionPosition;
}
@Override
public String toString() {
return getClass().getSimpleName() + " instruction [" + instructionPosition + ", " + nextInstructionPosition + "[ operand at " + operandPosition + " size " + operandSize;
}
}
/**
* Annotation that stores additional information about the displacement of a
* {@link Assembler#getPlaceholder placeholder address} that needs patching.
*/
public static class AddressDisplacementAnnotation extends OperandDataAnnotation {
AddressDisplacementAnnotation(int instructionPosition, int operandPosition, int operndSize, int nextInstructionPosition) {
super(instructionPosition, operandPosition, operndSize, nextInstructionPosition);
}
}
/**
* Annotation that stores additional information about the immediate operand, e.g., of a call
* instruction, that needs patching.
*/
public static class ImmediateOperandAnnotation extends OperandDataAnnotation {
ImmediateOperandAnnotation(int instructionPosition, int operandPosition, int operndSize, int nextInstructionPosition) {
super(instructionPosition, operandPosition, operndSize, nextInstructionPosition);
}
}
/**
* Constructs an assembler for the AMD64 architecture.
*/
public AMD64Assembler(TargetDescription target) {
super(target);
}
public boolean supports(CPUFeature feature) {
return ((AMD64) target.arch).getFeatures().contains(feature);
}
private static int encode(Register r) {
assert r.encoding < 16 && r.encoding >= 0 : "encoding out of range: " + r.encoding;
return r.encoding & 0x7;
}
/**
* Get RXB bits for register-register instruction. In that encoding, ModRM.rm contains a
* register index. The R bit extends the ModRM.reg field and the B bit extends the ModRM.rm
* field. The X bit must be 0.
*/
protected static int getRXB(Register reg, Register rm) {
int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1;
rxb |= (rm == null ? 0 : rm.encoding & 0x08) >> 3;
return rxb;
}
/**
* Get RXB bits for register-memory instruction. The R bit extends the ModRM.reg field. There
* are two cases for the memory operand:<br>
* ModRM.rm contains the base register: In that case, B extends the ModRM.rm field and X = 0.
* <br>
* There is an SIB byte: In that case, X extends SIB.index and B extends SIB.base.
*/
protected static int getRXB(Register reg, AMD64Address rm) {
int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1;
if (!rm.getIndex().equals(Register.None)) {
rxb |= (rm.getIndex().encoding & 0x08) >> 2;
}
if (!rm.getBase().equals(Register.None)) {
rxb |= (rm.getBase().encoding & 0x08) >> 3;
}
return rxb;
}
/**
* Emit the ModR/M byte for one register operand and an opcode extension in the R field.
* <p>
* Format: [ 11 reg r/m ]
*/
protected void emitModRM(int reg, Register rm) {
assert (reg & 0x07) == reg;
emitByte(0xC0 | (reg << 3) | (rm.encoding & 0x07));
}
/**
* Emit the ModR/M byte for two register operands.
* <p>
* Format: [ 11 reg r/m ]
*/
protected void emitModRM(Register reg, Register rm) {
emitModRM(reg.encoding & 0x07, rm);
}
protected void emitOperandHelper(Register reg, AMD64Address addr, int additionalInstructionSize) {
assert !reg.equals(Register.None);
emitOperandHelper(encode(reg), addr, false, additionalInstructionSize);
}
/**
* Emits the ModR/M byte and optionally the SIB byte for one register and one memory operand.
*
* @param force4Byte use 4 byte encoding for displacements that would normally fit in a byte
*/
protected void emitOperandHelper(Register reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize) {
assert !reg.equals(Register.None);
emitOperandHelper(encode(reg), addr, force4Byte, additionalInstructionSize);
}
protected void emitOperandHelper(int reg, AMD64Address addr, int additionalInstructionSize) {
emitOperandHelper(reg, addr, false, additionalInstructionSize);
}
/**
* Emits the ModR/M byte and optionally the SIB byte for one memory operand and an opcode
* extension in the R field.
*
* @param force4Byte use 4 byte encoding for displacements that would normally fit in a byte
* @param additionalInstructionSize the number of bytes that will be emitted after the operand,
* so that the start position of the next instruction can be computed even though
* this instruction has not been completely emitted yet.
*/
protected void emitOperandHelper(int reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize) {
assert (reg & 0x07) == reg;
int regenc = reg << 3;
Register base = addr.getBase();
Register index = addr.getIndex();
AMD64Address.Scale scale = addr.getScale();
int disp = addr.getDisplacement();
if (base.equals(AMD64.rip)) { // also matches addresses returned by getPlaceholder()
// [00 000 101] disp32
assert index.equals(Register.None) : "cannot use RIP relative addressing with index register";
emitByte(0x05 | regenc);
if (codePatchingAnnotationConsumer != null && addr.instructionStartPosition >= 0) {
codePatchingAnnotationConsumer.accept(new AddressDisplacementAnnotation(addr.instructionStartPosition, position(), 4, position() + 4 + additionalInstructionSize));
}
emitInt(disp);
} else if (base.isValid()) {
int baseenc = base.isValid() ? encode(base) : 0;
if (index.isValid()) {
int indexenc = encode(index) << 3;
// [base + indexscale + disp]
if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) {
// [base + indexscale]
// [00 reg 100][ss index base]
assert !index.equals(rsp) : "illegal addressing mode";
emitByte(0x04 | regenc);
emitByte(scale.log2 << 6 | indexenc | baseenc);
} else if (isByte(disp) && !force4Byte) {
// [base + indexscale + imm8]
// [01 reg 100][ss index base] imm8
assert !index.equals(rsp) : "illegal addressing mode";
emitByte(0x44 | regenc);
emitByte(scale.log2 << 6 | indexenc | baseenc);
emitByte(disp & 0xFF);
} else {
// [base + indexscale + disp32]
// [10 reg 100][ss index base] disp32
assert !index.equals(rsp) : "illegal addressing mode";
emitByte(0x84 | regenc);
emitByte(scale.log2 << 6 | indexenc | baseenc);
emitInt(disp);
}
} else if (base.equals(rsp) || base.equals(r12)) {
// [rsp + disp]
if (disp == 0) {
// [rsp]
// [00 reg 100][00 100 100]
emitByte(0x04 | regenc);
emitByte(0x24);
} else if (isByte(disp) && !force4Byte) {
// [rsp + imm8]
// [01 reg 100][00 100 100] disp8
emitByte(0x44 | regenc);
emitByte(0x24);
emitByte(disp & 0xFF);
} else {
// [rsp + imm32]
// [10 reg 100][00 100 100] disp32
emitByte(0x84 | regenc);
emitByte(0x24);
emitInt(disp);
}
} else {
// [base + disp]
assert !base.equals(rsp) && !base.equals(r12) : "illegal addressing mode";
if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) {
// [base]
// [00 reg base]
emitByte(0x00 | regenc | baseenc);
} else if (isByte(disp) && !force4Byte) {
// [base + disp8]
// [01 reg base] disp8
emitByte(0x40 | regenc | baseenc);
emitByte(disp & 0xFF);
} else {
// [base + disp32]
// [10 reg base] disp32
emitByte(0x80 | regenc | baseenc);
emitInt(disp);
}
}
} else {
if (index.isValid()) {
int indexenc = encode(index) << 3;
// [indexscale + disp]
// [00 reg 100][ss index 101] disp32
assert !index.equals(rsp) : "illegal addressing mode";
emitByte(0x04 | regenc);
emitByte(scale.log2 << 6 | indexenc | 0x05);
emitInt(disp);
} else {
// [disp] ABSOLUTE
// [00 reg 100][00 100 101] disp32
emitByte(0x04 | regenc);
emitByte(0x25);
emitInt(disp);
}
}
setCurAttributes(null);
}
/**
* Base class for AMD64 opcodes.
*/
public static class AMD64Op {
protected static final int P_0F = 0x0F;
protected static final int P_0F38 = 0x380F;
protected static final int P_0F3A = 0x3A0F;
private final String opcode;
protected final int prefix1;
protected final int prefix2;
protected final int op;
private final boolean dstIsByte;
private final boolean srcIsByte;
private final OpAssertion assertion;
private final CPUFeature feature;
protected AMD64Op(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
this(opcode, prefix1, prefix2, op, assertion == OpAssertion.ByteAssertion, assertion == OpAssertion.ByteAssertion, assertion, feature);
}
protected AMD64Op(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) {
this.opcode = opcode;
this.prefix1 = prefix1;
this.prefix2 = prefix2;
this.op = op;
this.dstIsByte = dstIsByte;
this.srcIsByte = srcIsByte;
this.assertion = assertion;
this.feature = feature;
}
protected final void emitOpcode(AMD64Assembler asm, OperandSize size, int rxb, int dstEnc, int srcEnc) {
if (prefix1 != 0) {
asm.emitByte(prefix1);
}
if (size.sizePrefix != 0) {
asm.emitByte(size.sizePrefix);
}
int rexPrefix = 0x40 | rxb;
if (size == QWORD) {
rexPrefix |= 0x08;
}
if (rexPrefix != 0x40 || (dstIsByte && dstEnc >= 4) || (srcIsByte && srcEnc >= 4)) {
asm.emitByte(rexPrefix);
}
if (prefix2 > 0xFF) {
asm.emitShort(prefix2);
} else if (prefix2 > 0) {
asm.emitByte(prefix2);
}
asm.emitByte(op);
}
protected final boolean verify(AMD64Assembler asm, OperandSize size, Register resultReg, Register inputReg) {
assert feature == null || asm.supports(feature) : String.format("unsupported feature %s required for %s", feature, opcode);
assert assertion.checkOperands(this, size, resultReg, inputReg);
return true;
}
@Override
public String toString() {
return opcode;
}
}
/**
* Base class for AMD64 opcodes with immediate operands.
*/
public static class AMD64ImmOp extends AMD64Op {
private final boolean immIsByte;
protected AMD64ImmOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion) {
super(opcode, 0, prefix, op, assertion, null);
this.immIsByte = immIsByte;
}
protected final void emitImmediate(AMD64Assembler asm, OperandSize size, int imm) {
if (immIsByte) {
assert imm == (byte) imm;
asm.emitByte(imm);
} else {
size.emitImmediate(asm, imm);
}
}
protected final int immediateSize(OperandSize size) {
if (immIsByte) {
return 1;
} else {
return size.bytes;
}
}
}
/**
* Opcode with operand order of either RM or MR for 2 address forms.
*/
public abstract static class AMD64RROp extends AMD64Op {
protected AMD64RROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
super(opcode, prefix1, prefix2, op, assertion, feature);
}
protected AMD64RROp(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) {
super(opcode, prefix1, prefix2, op, dstIsByte, srcIsByte, assertion, feature);
}
public abstract void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src);
}
/**
* Opcode with operand order of either RM or MR for 3 address forms.
*/
public abstract static class AMD64RRROp extends AMD64Op {
protected AMD64RRROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
super(opcode, prefix1, prefix2, op, assertion, feature);
}
protected AMD64RRROp(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) {
super(opcode, prefix1, prefix2, op, dstIsByte, srcIsByte, assertion, feature);
}
public abstract void emit(AMD64Assembler asm, OperandSize size, Register dst, Register nds, Register src);
}
/**
* Opcode with operand order of RM.
*/
public static class AMD64RMOp extends AMD64RROp {
// @formatter:off
public static final AMD64RMOp IMUL = new AMD64RMOp("IMUL", P_0F, 0xAF, OpAssertion.ByteOrLargerAssertion);
public static final AMD64RMOp BSF = new AMD64RMOp("BSF", P_0F, 0xBC);
public static final AMD64RMOp BSR = new AMD64RMOp("BSR", P_0F, 0xBD);
public static final AMD64RMOp POPCNT = new AMD64RMOp("POPCNT", 0xF3, P_0F, 0xB8, CPUFeature.POPCNT);
public static final AMD64RMOp TZCNT = new AMD64RMOp("TZCNT", 0xF3, P_0F, 0xBC, CPUFeature.BMI1);
public static final AMD64RMOp LZCNT = new AMD64RMOp("LZCNT", 0xF3, P_0F, 0xBD, CPUFeature.LZCNT);
public static final AMD64RMOp MOVZXB = new AMD64RMOp("MOVZXB", P_0F, 0xB6, false, true, OpAssertion.WordOrLargerAssertion);
public static final AMD64RMOp MOVZX = new AMD64RMOp("MOVZX", P_0F, 0xB7, OpAssertion.DwordOrLargerAssertion);
public static final AMD64RMOp MOVSXB = new AMD64RMOp("MOVSXB", P_0F, 0xBE, false, true, OpAssertion.WordOrLargerAssertion);
public static final AMD64RMOp MOVSX = new AMD64RMOp("MOVSX", P_0F, 0xBF, OpAssertion.DwordOrLargerAssertion);
public static final AMD64RMOp MOVSXD = new AMD64RMOp("MOVSXD", 0x63, OpAssertion.QwordAssertion);
public static final AMD64RMOp MOVB = new AMD64RMOp("MOVB", 0x8A, OpAssertion.ByteAssertion);
public static final AMD64RMOp MOV = new AMD64RMOp("MOV", 0x8B);
public static final AMD64RMOp CMP = new AMD64RMOp("CMP", 0x3B);
// MOVD/MOVQ and MOVSS/MOVSD are the same opcode, just with different operand size prefix
public static final AMD64RMOp MOVD = new AMD64RMOp("MOVD", 0x66, P_0F, 0x6E, OpAssertion.IntToFloatAssertion, CPUFeature.SSE2);
public static final AMD64RMOp MOVQ = new AMD64RMOp("MOVQ", 0x66, P_0F, 0x6E, OpAssertion.IntToFloatAssertion, CPUFeature.SSE2);
public static final AMD64RMOp MOVSS = new AMD64RMOp("MOVSS", P_0F, 0x10, OpAssertion.FloatAssertion, CPUFeature.SSE);
public static final AMD64RMOp MOVSD = new AMD64RMOp("MOVSD", P_0F, 0x10, OpAssertion.FloatAssertion, CPUFeature.SSE);
// TEST is documented as MR operation, but it's symmetric, and using it as RM operation is more convenient.
public static final AMD64RMOp TESTB = new AMD64RMOp("TEST", 0x84, OpAssertion.ByteAssertion);
public static final AMD64RMOp TEST = new AMD64RMOp("TEST", 0x85);
// @formatter:on
protected AMD64RMOp(String opcode, int op) {
this(opcode, 0, op);
}
protected AMD64RMOp(String opcode, int op, OpAssertion assertion) {
this(opcode, 0, op, assertion);
}
protected AMD64RMOp(String opcode, int prefix, int op) {
this(opcode, 0, prefix, op, null);
}
protected AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion) {
this(opcode, 0, prefix, op, assertion, null);
}
protected AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) {
this(opcode, 0, prefix, op, assertion, feature);
}
protected AMD64RMOp(String opcode, int prefix, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion) {
super(opcode, 0, prefix, op, dstIsByte, srcIsByte, assertion, null);
}
protected AMD64RMOp(String opcode, int prefix1, int prefix2, int op, CPUFeature feature) {
this(opcode, prefix1, prefix2, op, OpAssertion.WordOrLargerAssertion, feature);
}
protected AMD64RMOp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
super(opcode, prefix1, prefix2, op, assertion, feature);
}
@Override
public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src) {
assert verify(asm, size, dst, src);
boolean isSimd = false;
boolean noNds = false;
switch (op) {
case 0x2A:
case 0x2C:
case 0x2E:
case 0x5A:
case 0x6E:
isSimd = true;
noNds = true;
break;
case 0x10:
case 0x51:
case 0x54:
case 0x55:
case 0x56:
case 0x57:
case 0x58:
case 0x59:
case 0x5C:
case 0x5D:
case 0x5E:
case 0x5F:
isSimd = true;
break;
}
int opc = 0;
if (isSimd) {
switch (prefix2) {
case P_0F:
opc = VexOpcode.VEX_OPCODE_0F;
break;
case P_0F38:
opc = VexOpcode.VEX_OPCODE_0F_38;
break;
case P_0F3A:
opc = VexOpcode.VEX_OPCODE_0F_3A;
break;
default:
opc = VexOpcode.VEX_OPCODE_NONE;
isSimd = false;
break;
}
}
if (isSimd) {
int pre;
boolean rexVexW = (size == QWORD) ? true : false;
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
int curPrefix = size.sizePrefix | prefix1;
switch (curPrefix) {
case 0x66:
pre = VexSimdPrefix.VEX_SIMD_66;
break;
case 0xF2:
pre = VexSimdPrefix.VEX_SIMD_F2;
break;
case 0xF3:
pre = VexSimdPrefix.VEX_SIMD_F3;
break;
default:
pre = VexSimdPrefix.VEX_SIMD_NONE;
break;
}
int encode;
if (noNds) {
encode = asm.simdPrefixAndEncode(dst, Register.None, src, pre, opc, attributes);
} else {
encode = asm.simdPrefixAndEncode(dst, dst, src, pre, opc, attributes);
}
asm.emitByte(op);
asm.emitByte(0xC0 | encode);
} else {
emitOpcode(asm, size, getRXB(dst, src), dst.encoding, src.encoding);
asm.emitModRM(dst, src);
}
}
public final void emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src) {
assert verify(asm, size, dst, null);
boolean isSimd = false;
boolean noNds = false;
switch (op) {
case 0x10:
case 0x2A:
case 0x2C:
case 0x2E:
case 0x6E:
isSimd = true;
noNds = true;
break;
case 0x51:
case 0x54:
case 0x55:
case 0x56:
case 0x57:
case 0x58:
case 0x59:
case 0x5C:
case 0x5D:
case 0x5E:
case 0x5F:
isSimd = true;
break;
}
int opc = 0;
if (isSimd) {
switch (prefix2) {
case P_0F:
opc = VexOpcode.VEX_OPCODE_0F;
break;
case P_0F38:
opc = VexOpcode.VEX_OPCODE_0F_38;
break;
case P_0F3A:
opc = VexOpcode.VEX_OPCODE_0F_3A;
break;
default:
isSimd = false;
break;
}
}
if (isSimd) {
int pre;
boolean rexVexW = (size == QWORD) ? true : false;
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
int curPrefix = size.sizePrefix | prefix1;
switch (curPrefix) {
case 0x66:
pre = VexSimdPrefix.VEX_SIMD_66;
break;
case 0xF2:
pre = VexSimdPrefix.VEX_SIMD_F2;
break;
case 0xF3:
pre = VexSimdPrefix.VEX_SIMD_F3;
break;
default:
pre = VexSimdPrefix.VEX_SIMD_NONE;
break;
}
if (noNds) {
asm.simdPrefix(dst, Register.None, src, pre, opc, attributes);
} else {
asm.simdPrefix(dst, dst, src, pre, opc, attributes);
}
asm.emitByte(op);
asm.emitOperandHelper(dst, src, 0);
} else {
emitOpcode(asm, size, getRXB(dst, src), dst.encoding, 0);
asm.emitOperandHelper(dst, src, 0);
}
}
}
/**
* Opcode with operand order of RM.
*/
public static class AMD64RRMOp extends AMD64RRROp {
protected AMD64RRMOp(String opcode, int op) {
this(opcode, 0, op);
}
protected AMD64RRMOp(String opcode, int op, OpAssertion assertion) {
this(opcode, 0, op, assertion);
}
protected AMD64RRMOp(String opcode, int prefix, int op) {
this(opcode, 0, prefix, op, null);
}
protected AMD64RRMOp(String opcode, int prefix, int op, OpAssertion assertion) {
this(opcode, 0, prefix, op, assertion, null);
}
protected AMD64RRMOp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) {
this(opcode, 0, prefix, op, assertion, feature);
}
protected AMD64RRMOp(String opcode, int prefix, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion) {
super(opcode, 0, prefix, op, dstIsByte, srcIsByte, assertion, null);
}
protected AMD64RRMOp(String opcode, int prefix1, int prefix2, int op, CPUFeature feature) {
this(opcode, prefix1, prefix2, op, OpAssertion.WordOrLargerAssertion, feature);
}
protected AMD64RRMOp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
super(opcode, prefix1, prefix2, op, assertion, feature);
}
@Override
public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register nds, Register src) {
assert verify(asm, size, dst, src);
int pre;
int opc;
boolean rexVexW = (size == QWORD) ? true : false;
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
int curPrefix = size.sizePrefix | prefix1;
switch (curPrefix) {
case 0x66:
pre = VexSimdPrefix.VEX_SIMD_66;
break;
case 0xF2:
pre = VexSimdPrefix.VEX_SIMD_F2;
break;
case 0xF3:
pre = VexSimdPrefix.VEX_SIMD_F3;
break;
default:
pre = VexSimdPrefix.VEX_SIMD_NONE;
break;
}
switch (prefix2) {
case P_0F:
opc = VexOpcode.VEX_OPCODE_0F;
break;
case P_0F38:
opc = VexOpcode.VEX_OPCODE_0F_38;
break;
case P_0F3A:
opc = VexOpcode.VEX_OPCODE_0F_3A;
break;
default:
throw GraalError.shouldNotReachHere("invalid VEX instruction prefix");
}
int encode;
encode = asm.simdPrefixAndEncode(dst, nds, src, pre, opc, attributes);
asm.emitByte(op);
asm.emitByte(0xC0 | encode);
}
public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register nds, AMD64Address src) {
assert verify(asm, size, dst, null);
int pre;
int opc;
boolean rexVexW = (size == QWORD) ? true : false;
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
int curPrefix = size.sizePrefix | prefix1;
switch (curPrefix) {
case 0x66:
pre = VexSimdPrefix.VEX_SIMD_66;
break;
case 0xF2:
pre = VexSimdPrefix.VEX_SIMD_F2;
break;
case 0xF3:
pre = VexSimdPrefix.VEX_SIMD_F3;
break;
default:
pre = VexSimdPrefix.VEX_SIMD_NONE;
break;
}
switch (prefix2) {
case P_0F:
opc = VexOpcode.VEX_OPCODE_0F;
break;
case P_0F38:
opc = VexOpcode.VEX_OPCODE_0F_38;
break;
case P_0F3A:
opc = VexOpcode.VEX_OPCODE_0F_3A;
break;
default:
throw GraalError.shouldNotReachHere("invalid VEX instruction prefix");
}
asm.simdPrefix(dst, nds, src, pre, opc, attributes);
asm.emitByte(op);
asm.emitOperandHelper(dst, src, 0);
}
}
/**
* Opcode with operand order of MR.
*/
public static class AMD64MROp extends AMD64RROp {
// @formatter:off
public static final AMD64MROp MOVB = new AMD64MROp("MOVB", 0x88, OpAssertion.ByteAssertion);
public static final AMD64MROp MOV = new AMD64MROp("MOV", 0x89);
// MOVD and MOVQ are the same opcode, just with different operand size prefix
// Note that as MR opcodes, they have reverse operand order, so the IntToFloatingAssertion must be used.
public static final AMD64MROp MOVD = new AMD64MROp("MOVD", 0x66, P_0F, 0x7E, OpAssertion.IntToFloatAssertion, CPUFeature.SSE2);
public static final AMD64MROp MOVQ = new AMD64MROp("MOVQ", 0x66, P_0F, 0x7E, OpAssertion.IntToFloatAssertion, CPUFeature.SSE2);
// MOVSS and MOVSD are the same opcode, just with different operand size prefix
public static final AMD64MROp MOVSS = new AMD64MROp("MOVSS", P_0F, 0x11, OpAssertion.FloatAssertion, CPUFeature.SSE);
public static final AMD64MROp MOVSD = new AMD64MROp("MOVSD", P_0F, 0x11, OpAssertion.FloatAssertion, CPUFeature.SSE);
// @formatter:on
protected AMD64MROp(String opcode, int op) {
this(opcode, 0, op);
}
protected AMD64MROp(String opcode, int op, OpAssertion assertion) {
this(opcode, 0, op, assertion);
}
protected AMD64MROp(String opcode, int prefix, int op) {
this(opcode, prefix, op, OpAssertion.WordOrLargerAssertion);
}
protected AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion) {
this(opcode, prefix, op, assertion, null);
}
protected AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) {
this(opcode, 0, prefix, op, assertion, feature);
}
protected AMD64MROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
super(opcode, prefix1, prefix2, op, assertion, feature);
}
@Override
public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src) {
assert verify(asm, size, src, dst);
boolean isSimd = false;
boolean noNds = false;
switch (op) {
case 0x7E:
isSimd = true;
noNds = true;
break;
case 0x11:
isSimd = true;
break;
}
int opc = 0;
if (isSimd) {
switch (prefix2) {
case P_0F:
opc = VexOpcode.VEX_OPCODE_0F;
break;
case P_0F38:
opc = VexOpcode.VEX_OPCODE_0F_38;
break;
case P_0F3A:
opc = VexOpcode.VEX_OPCODE_0F_3A;
break;
default:
isSimd = false;
break;
}
}
if (isSimd) {
int pre;
boolean rexVexW = (size == QWORD) ? true : false;
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
int curPrefix = size.sizePrefix | prefix1;
switch (curPrefix) {
case 0x66:
pre = VexSimdPrefix.VEX_SIMD_66;
break;
case 0xF2:
pre = VexSimdPrefix.VEX_SIMD_F2;
break;
case 0xF3:
pre = VexSimdPrefix.VEX_SIMD_F3;
break;
default:
pre = VexSimdPrefix.VEX_SIMD_NONE;
break;
}
int encode;
if (noNds) {
encode = asm.simdPrefixAndEncode(src, Register.None, dst, pre, opc, attributes);
} else {
encode = asm.simdPrefixAndEncode(src, src, dst, pre, opc, attributes);
}
asm.emitByte(op);
asm.emitByte(0xC0 | encode);
} else {
emitOpcode(asm, size, getRXB(src, dst), src.encoding, dst.encoding);
asm.emitModRM(src, dst);
}
}
public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, Register src) {
assert verify(asm, size, null, src);
boolean isSimd = false;
switch (op) {
case 0x7E:
case 0x11:
isSimd = true;
break;
}
int opc = 0;
if (isSimd) {
switch (prefix2) {
case P_0F:
opc = VexOpcode.VEX_OPCODE_0F;
break;
case P_0F38:
opc = VexOpcode.VEX_OPCODE_0F_38;
break;
case P_0F3A:
opc = VexOpcode.VEX_OPCODE_0F_3A;
break;
default:
isSimd = false;
break;
}
}
if (isSimd) {
int pre;
boolean rexVexW = (size == QWORD) ? true : false;
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, rexVexW, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
int curPrefix = size.sizePrefix | prefix1;
switch (curPrefix) {
case 0x66:
pre = VexSimdPrefix.VEX_SIMD_66;
break;
case 0xF2:
pre = VexSimdPrefix.VEX_SIMD_F2;
break;
case 0xF3:
pre = VexSimdPrefix.VEX_SIMD_F3;
break;
default:
pre = VexSimdPrefix.VEX_SIMD_NONE;
break;
}
asm.simdPrefix(src, Register.None, dst, pre, opc, attributes);
asm.emitByte(op);
asm.emitOperandHelper(src, dst, 0);
} else {
emitOpcode(asm, size, getRXB(src, dst), src.encoding, 0);
asm.emitOperandHelper(src, dst, 0);
}
}
}
/**
* Opcodes with operand order of M.
*/
public static class AMD64MOp extends AMD64Op {
// @formatter:off
public static final AMD64MOp NOT = new AMD64MOp("NOT", 0xF7, 2);
public static final AMD64MOp NEG = new AMD64MOp("NEG", 0xF7, 3);
public static final AMD64MOp MUL = new AMD64MOp("MUL", 0xF7, 4);
public static final AMD64MOp IMUL = new AMD64MOp("IMUL", 0xF7, 5);
public static final AMD64MOp DIV = new AMD64MOp("DIV", 0xF7, 6);
public static final AMD64MOp IDIV = new AMD64MOp("IDIV", 0xF7, 7);
public static final AMD64MOp INC = new AMD64MOp("INC", 0xFF, 0);
public static final AMD64MOp DEC = new AMD64MOp("DEC", 0xFF, 1);
public static final AMD64MOp PUSH = new AMD64MOp("PUSH", 0xFF, 6);
public static final AMD64MOp POP = new AMD64MOp("POP", 0x8F, 0, OpAssertion.WordOrDwordAssertion);
// @formatter:on
private final int ext;
protected AMD64MOp(String opcode, int op, int ext) {
this(opcode, 0, op, ext);
}
protected AMD64MOp(String opcode, int prefix, int op, int ext) {
this(opcode, prefix, op, ext, OpAssertion.WordOrLargerAssertion);
}
protected AMD64MOp(String opcode, int op, int ext, OpAssertion assertion) {
this(opcode, 0, op, ext, assertion);
}
protected AMD64MOp(String opcode, int prefix, int op, int ext, OpAssertion assertion) {
super(opcode, 0, prefix, op, assertion, null);
this.ext = ext;
}
public final void emit(AMD64Assembler asm, OperandSize size, Register dst) {
assert verify(asm, size, dst, null);
emitOpcode(asm, size, getRXB(null, dst), 0, dst.encoding);
asm.emitModRM(ext, dst);
}
public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst) {
assert verify(asm, size, null, null);
emitOpcode(asm, size, getRXB(null, dst), 0, 0);
asm.emitOperandHelper(ext, dst, 0);
}
}
/**
* Opcodes with operand order of MI.
*/
public static class AMD64MIOp extends AMD64ImmOp {
// @formatter:off
public static final AMD64MIOp MOVB = new AMD64MIOp("MOVB", true, 0xC6, 0, OpAssertion.ByteAssertion);
public static final AMD64MIOp MOV = new AMD64MIOp("MOV", false, 0xC7, 0);
public static final AMD64MIOp TEST = new AMD64MIOp("TEST", false, 0xF7, 0);
// @formatter:on
private final int ext;
protected AMD64MIOp(String opcode, boolean immIsByte, int op, int ext) {
this(opcode, immIsByte, op, ext, OpAssertion.WordOrLargerAssertion);
}
protected AMD64MIOp(String opcode, boolean immIsByte, int op, int ext, OpAssertion assertion) {
this(opcode, immIsByte, 0, op, ext, assertion);
}
protected AMD64MIOp(String opcode, boolean immIsByte, int prefix, int op, int ext, OpAssertion assertion) {
super(opcode, immIsByte, prefix, op, assertion);
this.ext = ext;
}
public final void emit(AMD64Assembler asm, OperandSize size, Register dst, int imm) {
assert verify(asm, size, dst, null);
emitOpcode(asm, size, getRXB(null, dst), 0, dst.encoding);
asm.emitModRM(ext, dst);
emitImmediate(asm, size, imm);
}
public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, int imm) {
assert verify(asm, size, null, null);
emitOpcode(asm, size, getRXB(null, dst), 0, 0);
asm.emitOperandHelper(ext, dst, immediateSize(size));
emitImmediate(asm, size, imm);
}
}
/**
* Opcodes with operand order of RMI.
*
* We only have one form of round as the operation is always treated with single variant input,
* making its extension to 3 address forms redundant.
*/
public static class AMD64RMIOp extends AMD64ImmOp {
// @formatter:off
public static final AMD64RMIOp IMUL = new AMD64RMIOp("IMUL", false, 0x69);
public static final AMD64RMIOp IMUL_SX = new AMD64RMIOp("IMUL", true, 0x6B);
public static final AMD64RMIOp ROUNDSS = new AMD64RMIOp("ROUNDSS", true, P_0F3A, 0x0A, OpAssertion.PackedDoubleAssertion);
public static final AMD64RMIOp ROUNDSD = new AMD64RMIOp("ROUNDSD", true, P_0F3A, 0x0B, OpAssertion.PackedDoubleAssertion);
// @formatter:on
protected AMD64RMIOp(String opcode, boolean immIsByte, int op) {
this(opcode, immIsByte, 0, op, OpAssertion.WordOrLargerAssertion);
}
protected AMD64RMIOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion) {
super(opcode, immIsByte, prefix, op, assertion);
}
public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src, int imm) {
assert verify(asm, size, dst, src);
boolean isSimd = false;
boolean noNds = false;
switch (op) {
case 0x0A:
case 0x0B:
isSimd = true;
noNds = true;
break;
}
int opc = 0;
if (isSimd) {
switch (prefix2) {
case P_0F:
opc = VexOpcode.VEX_OPCODE_0F;
break;
case P_0F38:
opc = VexOpcode.VEX_OPCODE_0F_38;
break;
case P_0F3A:
opc = VexOpcode.VEX_OPCODE_0F_3A;
break;
default:
isSimd = false;
break;
}
}
if (isSimd) {
int pre;
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
int curPrefix = size.sizePrefix | prefix1;
switch (curPrefix) {
case 0x66:
pre = VexSimdPrefix.VEX_SIMD_66;
break;
case 0xF2:
pre = VexSimdPrefix.VEX_SIMD_F2;
break;
case 0xF3:
pre = VexSimdPrefix.VEX_SIMD_F3;
break;
default:
pre = VexSimdPrefix.VEX_SIMD_NONE;
break;
}
int encode;
if (noNds) {
encode = asm.simdPrefixAndEncode(dst, Register.None, src, pre, opc, attributes);
} else {
encode = asm.simdPrefixAndEncode(dst, dst, src, pre, opc, attributes);
}
asm.emitByte(op);
asm.emitByte(0xC0 | encode);
emitImmediate(asm, size, imm);
} else {
emitOpcode(asm, size, getRXB(dst, src), dst.encoding, src.encoding);
asm.emitModRM(dst, src);
emitImmediate(asm, size, imm);
}
}
public final void emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src, int imm) {
assert verify(asm, size, dst, null);
boolean isSimd = false;
boolean noNds = false;
switch (op) {
case 0x0A:
case 0x0B:
isSimd = true;
noNds = true;
break;
}
int opc = 0;
if (isSimd) {
switch (prefix2) {
case P_0F:
opc = VexOpcode.VEX_OPCODE_0F;
break;
case P_0F38:
opc = VexOpcode.VEX_OPCODE_0F_38;
break;
case P_0F3A:
opc = VexOpcode.VEX_OPCODE_0F_3A;
break;
default:
isSimd = false;
break;
}
}
if (isSimd) {
int pre;
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, asm.target);
int curPrefix = size.sizePrefix | prefix1;
switch (curPrefix) {
case 0x66:
pre = VexSimdPrefix.VEX_SIMD_66;
break;
case 0xF2:
pre = VexSimdPrefix.VEX_SIMD_F2;
break;
case 0xF3:
pre = VexSimdPrefix.VEX_SIMD_F3;
break;
default:
pre = VexSimdPrefix.VEX_SIMD_NONE;
break;
}
if (noNds) {
asm.simdPrefix(dst, Register.None, src, pre, opc, attributes);
} else {
asm.simdPrefix(dst, dst, src, pre, opc, attributes);
}
asm.emitByte(op);
asm.emitOperandHelper(dst, src, immediateSize(size));
emitImmediate(asm, size, imm);
} else {
emitOpcode(asm, size, getRXB(dst, src), dst.encoding, 0);
asm.emitOperandHelper(dst, src, immediateSize(size));
emitImmediate(asm, size, imm);
}
}
}
public static class SSEOp extends AMD64RMOp {
// @formatter:off
public static final SSEOp CVTSI2SS = new SSEOp("CVTSI2SS", 0xF3, P_0F, 0x2A, OpAssertion.IntToFloatAssertion);
public static final SSEOp CVTSI2SD = new SSEOp("CVTSI2SS", 0xF2, P_0F, 0x2A, OpAssertion.IntToFloatAssertion);
public static final SSEOp CVTTSS2SI = new SSEOp("CVTTSS2SI", 0xF3, P_0F, 0x2C, OpAssertion.FloatToIntAssertion);
public static final SSEOp CVTTSD2SI = new SSEOp("CVTTSD2SI", 0xF2, P_0F, 0x2C, OpAssertion.FloatToIntAssertion);
public static final SSEOp UCOMIS = new SSEOp("UCOMIS", P_0F, 0x2E, OpAssertion.PackedFloatAssertion);
public static final SSEOp SQRT = new SSEOp("SQRT", P_0F, 0x51);
public static final SSEOp AND = new SSEOp("AND", P_0F, 0x54, OpAssertion.PackedFloatAssertion);
public static final SSEOp ANDN = new SSEOp("ANDN", P_0F, 0x55, OpAssertion.PackedFloatAssertion);
public static final SSEOp OR = new SSEOp("OR", P_0F, 0x56, OpAssertion.PackedFloatAssertion);
public static final SSEOp XOR = new SSEOp("XOR", P_0F, 0x57, OpAssertion.PackedFloatAssertion);
public static final SSEOp ADD = new SSEOp("ADD", P_0F, 0x58);
public static final SSEOp MUL = new SSEOp("MUL", P_0F, 0x59);
public static final SSEOp CVTSS2SD = new SSEOp("CVTSS2SD", P_0F, 0x5A, OpAssertion.SingleAssertion);
public static final SSEOp CVTSD2SS = new SSEOp("CVTSD2SS", P_0F, 0x5A, OpAssertion.DoubleAssertion);
public static final SSEOp SUB = new SSEOp("SUB", P_0F, 0x5C);
public static final SSEOp MIN = new SSEOp("MIN", P_0F, 0x5D);
public static final SSEOp DIV = new SSEOp("DIV", P_0F, 0x5E);
public static final SSEOp MAX = new SSEOp("MAX", P_0F, 0x5F);
// @formatter:on
protected SSEOp(String opcode, int prefix, int op) {
this(opcode, prefix, op, OpAssertion.FloatAssertion);
}
protected SSEOp(String opcode, int prefix, int op, OpAssertion assertion) {
this(opcode, 0, prefix, op, assertion);
}
protected SSEOp(String opcode, int mandatoryPrefix, int prefix, int op, OpAssertion assertion) {
super(opcode, mandatoryPrefix, prefix, op, assertion, CPUFeature.SSE2);
}
}
public static class AVXOp extends AMD64RRMOp {
// @formatter:off
public static final AVXOp AND = new AVXOp("AND", P_0F, 0x54, OpAssertion.PackedFloatAssertion);
public static final AVXOp ANDN = new AVXOp("ANDN", P_0F, 0x55, OpAssertion.PackedFloatAssertion);
public static final AVXOp OR = new AVXOp("OR", P_0F, 0x56, OpAssertion.PackedFloatAssertion);
public static final AVXOp XOR = new AVXOp("XOR", P_0F, 0x57, OpAssertion.PackedFloatAssertion);
public static final AVXOp ADD = new AVXOp("ADD", P_0F, 0x58);
public static final AVXOp MUL = new AVXOp("MUL", P_0F, 0x59);
public static final AVXOp SUB = new AVXOp("SUB", P_0F, 0x5C);
public static final AVXOp MIN = new AVXOp("MIN", P_0F, 0x5D);
public static final AVXOp DIV = new AVXOp("DIV", P_0F, 0x5E);
public static final AVXOp MAX = new AVXOp("MAX", P_0F, 0x5F);
// @formatter:on
protected AVXOp(String opcode, int prefix, int op) {
this(opcode, prefix, op, OpAssertion.FloatAssertion);
}
protected AVXOp(String opcode, int prefix, int op, OpAssertion assertion) {
this(opcode, 0, prefix, op, assertion);
}
protected AVXOp(String opcode, int mandatoryPrefix, int prefix, int op, OpAssertion assertion) {
super(opcode, mandatoryPrefix, prefix, op, assertion, CPUFeature.AVX);
}
}
/**
* Arithmetic operation with operand order of RM, MR or MI.
*/
public static final class AMD64BinaryArithmetic {
// @formatter:off
public static final AMD64BinaryArithmetic ADD = new AMD64BinaryArithmetic("ADD", 0);
public static final AMD64BinaryArithmetic OR = new AMD64BinaryArithmetic("OR", 1);
public static final AMD64BinaryArithmetic ADC = new AMD64BinaryArithmetic("ADC", 2);
public static final AMD64BinaryArithmetic SBB = new AMD64BinaryArithmetic("SBB", 3);
public static final AMD64BinaryArithmetic AND = new AMD64BinaryArithmetic("AND", 4);
public static final AMD64BinaryArithmetic SUB = new AMD64BinaryArithmetic("SUB", 5);
public static final AMD64BinaryArithmetic XOR = new AMD64BinaryArithmetic("XOR", 6);
public static final AMD64BinaryArithmetic CMP = new AMD64BinaryArithmetic("CMP", 7);
// @formatter:on
private final AMD64MIOp byteImmOp;
private final AMD64MROp byteMrOp;
private final AMD64RMOp byteRmOp;
private final AMD64MIOp immOp;
private final AMD64MIOp immSxOp;
private final AMD64MROp mrOp;
private final AMD64RMOp rmOp;
private AMD64BinaryArithmetic(String opcode, int code) {
int baseOp = code << 3;
byteImmOp = new AMD64MIOp(opcode, true, 0, 0x80, code, OpAssertion.ByteAssertion);
byteMrOp = new AMD64MROp(opcode, 0, baseOp, OpAssertion.ByteAssertion);
byteRmOp = new AMD64RMOp(opcode, 0, baseOp | 0x02, OpAssertion.ByteAssertion);
immOp = new AMD64MIOp(opcode, false, 0, 0x81, code, OpAssertion.WordOrLargerAssertion);
immSxOp = new AMD64MIOp(opcode, true, 0, 0x83, code, OpAssertion.WordOrLargerAssertion);
mrOp = new AMD64MROp(opcode, 0, baseOp | 0x01, OpAssertion.WordOrLargerAssertion);
rmOp = new AMD64RMOp(opcode, 0, baseOp | 0x03, OpAssertion.WordOrLargerAssertion);
}
public AMD64MIOp getMIOpcode(OperandSize size, boolean sx) {
if (size == BYTE) {
return byteImmOp;
} else if (sx) {
return immSxOp;
} else {
return immOp;
}
}
public AMD64MROp getMROpcode(OperandSize size) {
if (size == BYTE) {
return byteMrOp;
} else {
return mrOp;
}
}
public AMD64RMOp getRMOpcode(OperandSize size) {
if (size == BYTE) {
return byteRmOp;
} else {
return rmOp;
}
}
}
/**
* Shift operation with operand order of M1, MC or MI.
*/
public static final class AMD64Shift {
// @formatter:off
public static final AMD64Shift ROL = new AMD64Shift("ROL", 0);
public static final AMD64Shift ROR = new AMD64Shift("ROR", 1);
public static final AMD64Shift RCL = new AMD64Shift("RCL", 2);
public static final AMD64Shift RCR = new AMD64Shift("RCR", 3);
public static final AMD64Shift SHL = new AMD64Shift("SHL", 4);
public static final AMD64Shift SHR = new AMD64Shift("SHR", 5);
public static final AMD64Shift SAR = new AMD64Shift("SAR", 7);
// @formatter:on
public final AMD64MOp m1Op;
public final AMD64MOp mcOp;
public final AMD64MIOp miOp;
private AMD64Shift(String opcode, int code) {
m1Op = new AMD64MOp(opcode, 0, 0xD1, code, OpAssertion.WordOrLargerAssertion);
mcOp = new AMD64MOp(opcode, 0, 0xD3, code, OpAssertion.WordOrLargerAssertion);
miOp = new AMD64MIOp(opcode, true, 0, 0xC1, code, OpAssertion.WordOrLargerAssertion);
}
}
public final void addl(AMD64Address dst, int imm32) {
ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
}
public final void addl(Register dst, int imm32) {
ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
}
public final void addl(Register dst, Register src) {
ADD.rmOp.emit(this, DWORD, dst, src);
}
public final void addpd(Register dst, Register src) {
assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x58);
emitByte(0xC0 | encode);
}
public final void addpd(Register dst, AMD64Address src) {
assert dst.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x58);
emitOperandHelper(dst, src, 0);
}
public final void addsd(Register dst, Register src) {
assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x58);
emitByte(0xC0 | encode);
}
public final void addsd(Register dst, AMD64Address src) {
assert dst.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x58);
emitOperandHelper(dst, src, 0);
}
private void addrNop4() {
// 4 bytes: NOP DWORD PTR [EAX+0]
emitByte(0x0F);
emitByte(0x1F);
emitByte(0x40); // emitRm(cbuf, 0x1, EAXEnc, EAXEnc);
emitByte(0); // 8-bits offset (1 byte)
}
private void addrNop5() {
// 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
emitByte(0x0F);
emitByte(0x1F);
emitByte(0x44); // emitRm(cbuf, 0x1, EAXEnc, 0x4);
emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc);
emitByte(0); // 8-bits offset (1 byte)
}
private void addrNop7() {
// 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
emitByte(0x0F);
emitByte(0x1F);
emitByte(0x80); // emitRm(cbuf, 0x2, EAXEnc, EAXEnc);
emitInt(0); // 32-bits offset (4 bytes)
}
private void addrNop8() {
// 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
emitByte(0x0F);
emitByte(0x1F);
emitByte(0x84); // emitRm(cbuf, 0x2, EAXEnc, 0x4);
emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc);
emitInt(0); // 32-bits offset (4 bytes)
}
public final void andl(Register dst, int imm32) {
AND.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
}
public final void andl(Register dst, Register src) {
AND.rmOp.emit(this, DWORD, dst, src);
}
public final void andpd(Register dst, Register src) {
assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x54);
emitByte(0xC0 | encode);
}
public final void andpd(Register dst, AMD64Address src) {
assert dst.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x54);
emitOperandHelper(dst, src, 0);
}
public final void bsfq(Register dst, Register src) {
int encode = prefixqAndEncode(dst.encoding(), src.encoding());
emitByte(0x0F);
emitByte(0xBC);
emitByte(0xC0 | encode);
}
public final void bsrl(Register dst, Register src) {
int encode = prefixAndEncode(dst.encoding(), src.encoding());
emitByte(0x0F);
emitByte(0xBD);
emitByte(0xC0 | encode);
}
public final void bswapl(Register reg) {
int encode = prefixAndEncode(reg.encoding);
emitByte(0x0F);
emitByte(0xC8 | encode);
}
public final void cdql() {
emitByte(0x99);
}
public final void cmovl(ConditionFlag cc, Register dst, Register src) {
int encode = prefixAndEncode(dst.encoding, src.encoding);
emitByte(0x0F);
emitByte(0x40 | cc.getValue());
emitByte(0xC0 | encode);
}
public final void cmovl(ConditionFlag cc, Register dst, AMD64Address src) {
prefix(src, dst);
emitByte(0x0F);
emitByte(0x40 | cc.getValue());
emitOperandHelper(dst, src, 0);
}
public final void cmpl(Register dst, int imm32) {
CMP.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
}
public final void cmpl(Register dst, Register src) {
CMP.rmOp.emit(this, DWORD, dst, src);
}
public final void cmpl(Register dst, AMD64Address src) {
CMP.rmOp.emit(this, DWORD, dst, src);
}
public final void cmpl(AMD64Address dst, int imm32) {
CMP.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
}
/**
* The 8-bit cmpxchg compares the value at adr with the contents of X86.rax, and stores reg into
* adr if so; otherwise, the value at adr is loaded into X86.rax,. The ZF is set if the compared
* values were equal, and cleared otherwise.
*/
public final void cmpxchgb(Register reg, AMD64Address adr) { // cmpxchg
prefixb(adr, reg);
emitByte(0x0F);
emitByte(0xB0);
emitOperandHelper(reg, adr, 0);
}
/**
* The 16-bit cmpxchg compares the value at adr with the contents of X86.rax, and stores reg
* into adr if so; otherwise, the value at adr is loaded into X86.rax,. The ZF is set if the
* compared values were equal, and cleared otherwise.
*/
public final void cmpxchgw(Register reg, AMD64Address adr) { // cmpxchg
emitByte(0x66); // Switch to 16-bit mode.
prefix(adr, reg);
emitByte(0x0F);
emitByte(0xB1);
emitOperandHelper(reg, adr, 0);
}
/**
* The 32-bit cmpxchg compares the value at adr with the contents of X86.rax, and stores reg
* into adr if so; otherwise, the value at adr is loaded into X86.rax,. The ZF is set if the
* compared values were equal, and cleared otherwise.
*/
public final void cmpxchgl(Register reg, AMD64Address adr) { // cmpxchg
prefix(adr, reg);
emitByte(0x0F);
emitByte(0xB1);
emitOperandHelper(reg, adr, 0);
}
public final void cvtsi2sdl(Register dst, Register src) {
assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x2A);
emitByte(0xC0 | encode);
}
public final void cvttsd2sil(Register dst, Register src) {
assert dst.getRegisterCategory().equals(AMD64.CPU) && src.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x2C);
emitByte(0xC0 | encode);
}
protected final void decl(AMD64Address dst) {
prefix(dst);
emitByte(0xFF);
emitOperandHelper(1, dst, 0);
}
public final void divsd(Register dst, Register src) {
assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x5E);
emitByte(0xC0 | encode);
}
public final void evmovdquq(Register dst, AMD64Address src, int vectorLen) {
assert supports(CPUFeature.AVX512F);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(vectorLen, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true, target);
attributes.setAddressAttributes(/* tuple_type */ EvexTupleType.EVEX_FVM, /* input_size_in_bits */ EvexInputSizeInBits.EVEX_NObit);
attributes.setIsEvexInstruction();
vexPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x6F);
emitOperandHelper(dst, src, 0);
}
public final void evpcmpeqb(Register kdst, Register nds, AMD64Address src, int vectorLen) {
assert supports(CPUFeature.AVX512BW);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(vectorLen, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false, target);
attributes.setIsEvexInstruction();
attributes.setAddressAttributes(/* tuple_type */ EvexTupleType.EVEX_FVM, /* input_size_in_bits */ EvexInputSizeInBits.EVEX_NObit);
vexPrefix(src, nds, kdst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x74);
emitOperandHelper(kdst, src, 0);
}
public final void hlt() {
emitByte(0xF4);
}
public final void imull(Register dst, Register src, int value) {
if (isByte(value)) {
AMD64RMIOp.IMUL_SX.emit(this, DWORD, dst, src, value);
} else {
AMD64RMIOp.IMUL.emit(this, DWORD, dst, src, value);
}
}
protected final void incl(AMD64Address dst) {
prefix(dst);
emitByte(0xFF);
emitOperandHelper(0, dst, 0);
}
public void jcc(ConditionFlag cc, int jumpTarget, boolean forceDisp32) {
int shortSize = 2;
int longSize = 6;
long disp = jumpTarget - position();
if (!forceDisp32 && isByte(disp - shortSize)) {
// 0111 tttn #8-bit disp
emitByte(0x70 | cc.getValue());
emitByte((int) ((disp - shortSize) & 0xFF));
} else {
// 0000 1111 1000 tttn #32-bit disp
assert isInt(disp - longSize) : "must be 32bit offset (call4)";
emitByte(0x0F);
emitByte(0x80 | cc.getValue());
emitInt((int) (disp - longSize));
}
}
public final void jcc(ConditionFlag cc, Label l) {
assert (0 <= cc.getValue()) && (cc.getValue() < 16) : "illegal cc";
if (l.isBound()) {
jcc(cc, l.position(), false);
} else {
// Note: could eliminate cond. jumps to this jump if condition
// is the same however, seems to be rather unlikely case.
// Note: use jccb() if label to be bound is very close to get
// an 8-bit displacement
l.addPatchAt(position());
emitByte(0x0F);
emitByte(0x80 | cc.getValue());
emitInt(0);
}
}
public final void jccb(ConditionFlag cc, Label l) {
if (l.isBound()) {
int shortSize = 2;
int entry = l.position();
assert isByte(entry - (position() + shortSize)) : "Dispacement too large for a short jmp";
long disp = entry - position();
// 0111 tttn #8-bit disp
emitByte(0x70 | cc.getValue());
emitByte((int) ((disp - shortSize) & 0xFF));
} else {
l.addPatchAt(position());
emitByte(0x70 | cc.getValue());
emitByte(0);
}
}
public final void jmp(int jumpTarget, boolean forceDisp32) {
int shortSize = 2;
int longSize = 5;
long disp = jumpTarget - position();
if (!forceDisp32 && isByte(disp - shortSize)) {
emitByte(0xEB);
emitByte((int) ((disp - shortSize) & 0xFF));
} else {
emitByte(0xE9);
emitInt((int) (disp - longSize));
}
}
@Override
public final void jmp(Label l) {
if (l.isBound()) {
jmp(l.position(), false);
} else {
// By default, forward jumps are always 32-bit displacements, since
// we can't yet know where the label will be bound. If you're sure that
// the forward jump will not run beyond 256 bytes, use jmpb to
// force an 8-bit displacement.
l.addPatchAt(position());
emitByte(0xE9);
emitInt(0);
}
}
public final void jmp(Register entry) {
int encode = prefixAndEncode(entry.encoding);
emitByte(0xFF);
emitByte(0xE0 | encode);
}
public final void jmp(AMD64Address adr) {
prefix(adr);
emitByte(0xFF);
emitOperandHelper(rsp, adr, 0);
}
public final void jmpb(Label l) {
if (l.isBound()) {
int shortSize = 2;
int entry = l.position();
assert isByte((entry - position()) + shortSize) : "Dispacement too large for a short jmp";
long offs = entry - position();
emitByte(0xEB);
emitByte((int) ((offs - shortSize) & 0xFF));
} else {
l.addPatchAt(position());
emitByte(0xEB);
emitByte(0);
}
}
// This instruction produces ZF or CF flags
public final void kortestql(Register src1, Register src2) {
assert supports(CPUFeature.AVX512BW);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false, target);
int encode = vexPrefixAndEncode(src1, Register.None, src2, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x98);
emitByte(0xC0 | encode);
}
public final void kmovql(Register dst, Register src) {
assert supports(CPUFeature.AVX512BW);
if (src.getRegisterCategory().equals(AMD64.MASK)) {
// kmovql(KRegister dst, KRegister src)
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false, target);
int encode = vexPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x90);
emitByte(0xC0 | encode);
} else {
// kmovql(KRegister dst, Register src)
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false, target);
int encode = vexPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x92);
emitByte(0xC0 | encode);
}
}
public final void lead(Register dst, AMD64Address src) {
prefix(src, dst);
emitByte(0x8D);
emitOperandHelper(dst, src, 0);
}
public final void leaq(Register dst, AMD64Address src) {
prefixq(src, dst);
emitByte(0x8D);
emitOperandHelper(dst, src, 0);
}
public final void leave() {
emitByte(0xC9);
}
public final void lock() {
emitByte(0xF0);
}
public final void movapd(Register dst, Register src) {
assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x28);
emitByte(0xC0 | encode);
}
public final void movaps(Register dst, Register src) {
assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x28);
emitByte(0xC0 | encode);
}
public final void movb(AMD64Address dst, int imm8) {
prefix(dst);
emitByte(0xC6);
emitOperandHelper(0, dst, 1);
emitByte(imm8);
}
public final void movb(AMD64Address dst, Register src) {
assert src.getRegisterCategory().equals(AMD64.CPU) : "must have byte register";
prefixb(dst, src);
emitByte(0x88);
emitOperandHelper(src, dst, 0);
}
public final void movl(Register dst, int imm32) {
int encode = prefixAndEncode(dst.encoding);
emitByte(0xB8 | encode);
emitInt(imm32);
}
public final void movl(Register dst, Register src) {
int encode = prefixAndEncode(dst.encoding, src.encoding);
emitByte(0x8B);
emitByte(0xC0 | encode);
}
public final void movl(Register dst, AMD64Address src) {
prefix(src, dst);
emitByte(0x8B);
emitOperandHelper(dst, src, 0);
}
/**
* @param wide use 4 byte encoding for displacements that would normally fit in a byte
*/
public final void movl(Register dst, AMD64Address src, boolean wide) {
prefix(src, dst);
emitByte(0x8B);
emitOperandHelper(dst, src, wide, 0);
}
public final void movl(AMD64Address dst, int imm32) {
prefix(dst);
emitByte(0xC7);
emitOperandHelper(0, dst, 4);
emitInt(imm32);
}
public final void movl(AMD64Address dst, Register src) {
prefix(dst, src);
emitByte(0x89);
emitOperandHelper(src, dst, 0);
}
/**
* New CPUs require use of movsd and movss to avoid partial register stall when loading from
* memory. But for old Opteron use movlpd instead of movsd. The selection is done in
* {@link AMD64MacroAssembler#movdbl(Register, AMD64Address)} and
* {@link AMD64MacroAssembler#movflt(Register, Register)}.
*/
public final void movlpd(Register dst, AMD64Address src) {
assert dst.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x12);
emitOperandHelper(dst, src, 0);
}
public final void movlhps(Register dst, Register src) {
assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = simdPrefixAndEncode(dst, src, src, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x16);
emitByte(0xC0 | encode);
}
public final void movq(Register dst, AMD64Address src) {
movq(dst, src, false);
}
public final void movq(Register dst, AMD64Address src, boolean wide) {
if (dst.getRegisterCategory().equals(AMD64.XMM)) {
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ wide, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x7E);
emitOperandHelper(dst, src, wide, 0);
} else {
// gpr version of movq
prefixq(src, dst);
emitByte(0x8B);
emitOperandHelper(dst, src, wide, 0);
}
}
public final void movq(Register dst, Register src) {
int encode = prefixqAndEncode(dst.encoding, src.encoding);
emitByte(0x8B);
emitByte(0xC0 | encode);
}
public final void movq(AMD64Address dst, Register src) {
if (src.getRegisterCategory().equals(AMD64.XMM)) {
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
simdPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0xD6);
emitOperandHelper(src, dst, 0);
} else {
// gpr version of movq
prefixq(dst, src);
emitByte(0x89);
emitOperandHelper(src, dst, 0);
}
}
public final void movsbl(Register dst, AMD64Address src) {
prefix(src, dst);
emitByte(0x0F);
emitByte(0xBE);
emitOperandHelper(dst, src, 0);
}
public final void movsbl(Register dst, Register src) {
int encode = prefixAndEncode(dst.encoding, false, src.encoding, true);
emitByte(0x0F);
emitByte(0xBE);
emitByte(0xC0 | encode);
}
public final void movsbq(Register dst, AMD64Address src) {
prefixq(src, dst);
emitByte(0x0F);
emitByte(0xBE);
emitOperandHelper(dst, src, 0);
}
public final void movsbq(Register dst, Register src) {
int encode = prefixqAndEncode(dst.encoding, src.encoding);
emitByte(0x0F);
emitByte(0xBE);
emitByte(0xC0 | encode);
}
public final void movsd(Register dst, Register src) {
assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x10);
emitByte(0xC0 | encode);
}
public final void movsd(Register dst, AMD64Address src) {
assert dst.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x10);
emitOperandHelper(dst, src, 0);
}
public final void movsd(AMD64Address dst, Register src) {
assert src.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
simdPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x11);
emitOperandHelper(src, dst, 0);
}
public final void movss(Register dst, Register src) {
assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x10);
emitByte(0xC0 | encode);
}
public final void movss(Register dst, AMD64Address src) {
assert dst.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x10);
emitOperandHelper(dst, src, 0);
}
public final void movss(AMD64Address dst, Register src) {
assert src.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
simdPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x11);
emitOperandHelper(src, dst, 0);
}
public final void mulpd(Register dst, Register src) {
assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x59);
emitByte(0xC0 | encode);
}
public final void mulpd(Register dst, AMD64Address src) {
assert dst.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x59);
emitOperandHelper(dst, src, 0);
}
public final void mulsd(Register dst, Register src) {
assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x59);
emitByte(0xC0 | encode);
}
public final void mulsd(Register dst, AMD64Address src) {
assert dst.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x59);
emitOperandHelper(dst, src, 0);
}
public final void mulss(Register dst, Register src) {
assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x59);
emitByte(0xC0 | encode);
}
public final void movswl(Register dst, AMD64Address src) {
prefix(src, dst);
emitByte(0x0F);
emitByte(0xBF);
emitOperandHelper(dst, src, 0);
}
public final void movw(AMD64Address dst, int imm16) {
emitByte(0x66); // switch to 16-bit mode
prefix(dst);
emitByte(0xC7);
emitOperandHelper(0, dst, 2);
emitShort(imm16);
}
public final void movw(AMD64Address dst, Register src) {
emitByte(0x66);
prefix(dst, src);
emitByte(0x89);
emitOperandHelper(src, dst, 0);
}
public final void movzbl(Register dst, AMD64Address src) {
prefix(src, dst);
emitByte(0x0F);
emitByte(0xB6);
emitOperandHelper(dst, src, 0);
}
public final void movzbl(Register dst, Register src) {
AMD64RMOp.MOVZXB.emit(this, OperandSize.DWORD, dst, src);
}
public final void movzbq(Register dst, Register src) {
AMD64RMOp.MOVZXB.emit(this, OperandSize.QWORD, dst, src);
}
public final void movzwl(Register dst, AMD64Address src) {
prefix(src, dst);
emitByte(0x0F);
emitByte(0xB7);
emitOperandHelper(dst, src, 0);
}
public final void negl(Register dst) {
NEG.emit(this, DWORD, dst);
}
public final void notl(Register dst) {
NOT.emit(this, DWORD, dst);
}
public final void notq(Register dst) {
NOT.emit(this, QWORD, dst);
}
@Override
public final void ensureUniquePC() {
nop();
}
public final void nop() {
nop(1);
}
public void nop(int count) {
int i = count;
if (UseNormalNop) {
assert i > 0 : " ";
// The fancy nops aren't currently recognized by debuggers making it a
// pain to disassemble code while debugging. If assert are on clearly
// speed is not an issue so simply use the single byte traditional nop
// to do alignment.
for (; i > 0; i--) {
emitByte(0x90);
}
return;
}
if (UseAddressNop) {
//
// Using multi-bytes nops "0x0F 0x1F [Address]" for AMD.
// 1: 0x90
// 2: 0x66 0x90
// 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
// 4: 0x0F 0x1F 0x40 0x00
// 5: 0x0F 0x1F 0x44 0x00 0x00
// 6: 0x66 0x0F 0x1F 0x44 0x00 0x00
// 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
// 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
// 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
// 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
// 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
// The rest coding is AMD specific - use consecutive Address nops
// 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
// 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
// 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
// 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
// 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
// Size prefixes (0x66) are added for larger sizes
while (i >= 22) {
i -= 11;
emitByte(0x66); // size prefix
emitByte(0x66); // size prefix
emitByte(0x66); // size prefix
addrNop8();
}
// Generate first nop for size between 21-12
switch (i) {
case 21:
i -= 11;
emitByte(0x66); // size prefix
emitByte(0x66); // size prefix
emitByte(0x66); // size prefix
addrNop8();
break;
case 20:
case 19:
i -= 10;
emitByte(0x66); // size prefix
emitByte(0x66); // size prefix
addrNop8();
break;
case 18:
case 17:
i -= 9;
emitByte(0x66); // size prefix
addrNop8();
break;
case 16:
case 15:
i -= 8;
addrNop8();
break;
case 14:
case 13:
i -= 7;
addrNop7();
break;
case 12:
i -= 6;
emitByte(0x66); // size prefix
addrNop5();
break;
default:
assert i < 12;
}
// Generate second nop for size between 11-1
switch (i) {
case 11:
emitByte(0x66); // size prefix
emitByte(0x66); // size prefix
emitByte(0x66); // size prefix
addrNop8();
break;
case 10:
emitByte(0x66); // size prefix
emitByte(0x66); // size prefix
addrNop8();
break;
case 9:
emitByte(0x66); // size prefix
addrNop8();
break;
case 8:
addrNop8();
break;
case 7:
addrNop7();
break;
case 6:
emitByte(0x66); // size prefix
addrNop5();
break;
case 5:
addrNop5();
break;
case 4:
addrNop4();
break;
case 3:
// Don't use "0x0F 0x1F 0x00" - need patching safe padding
emitByte(0x66); // size prefix
emitByte(0x66); // size prefix
emitByte(0x90); // nop
break;
case 2:
emitByte(0x66); // size prefix
emitByte(0x90); // nop
break;
case 1:
emitByte(0x90); // nop
break;
default:
assert i == 0;
}
return;
}
// Using nops with size prefixes "0x66 0x90".
// From AMD Optimization Guide:
// 1: 0x90
// 2: 0x66 0x90
// 3: 0x66 0x66 0x90
// 4: 0x66 0x66 0x66 0x90
// 5: 0x66 0x66 0x90 0x66 0x90
// 6: 0x66 0x66 0x90 0x66 0x66 0x90
// 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
// 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
// 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
// 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
//
while (i > 12) {
i -= 4;
emitByte(0x66); // size prefix
emitByte(0x66);
emitByte(0x66);
emitByte(0x90); // nop
}
// 1 - 12 nops
if (i > 8) {
if (i > 9) {
i -= 1;
emitByte(0x66);
}
i -= 3;
emitByte(0x66);
emitByte(0x66);
emitByte(0x90);
}
// 1 - 8 nops
if (i > 4) {
if (i > 6) {
i -= 1;
emitByte(0x66);
}
i -= 3;
emitByte(0x66);
emitByte(0x66);
emitByte(0x90);
}
switch (i) {
case 4:
emitByte(0x66);
emitByte(0x66);
emitByte(0x66);
emitByte(0x90);
break;
case 3:
emitByte(0x66);
emitByte(0x66);
emitByte(0x90);
break;
case 2:
emitByte(0x66);
emitByte(0x90);
break;
case 1:
emitByte(0x90);
break;
default:
assert i == 0;
}
}
public final void orl(Register dst, Register src) {
OR.rmOp.emit(this, DWORD, dst, src);
}
public final void orl(Register dst, int imm32) {
OR.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
}
public final void pop(Register dst) {
int encode = prefixAndEncode(dst.encoding);
emitByte(0x58 | encode);
}
public void popfq() {
emitByte(0x9D);
}
public final void ptest(Register dst, Register src) {
assert supports(CPUFeature.SSE4_1);
assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F_38, attributes);
emitByte(0x17);
emitByte(0xC0 | encode);
}
public final void vptest(Register dst, Register src) {
assert supports(CPUFeature.AVX);
assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_256bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = vexPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F_38, attributes);
emitByte(0x17);
emitByte(0xC0 | encode);
}
public final void pcmpestri(Register dst, AMD64Address src, int imm8) {
assert supports(CPUFeature.SSE4_2);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F_3A, attributes);
emitByte(0x61);
emitOperandHelper(dst, src, 0);
emitByte(imm8);
}
public final void pcmpestri(Register dst, Register src, int imm8) {
assert supports(CPUFeature.SSE4_2);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F_3A, attributes);
emitByte(0x61);
emitByte(0xC0 | encode);
emitByte(imm8);
}
public final void pmovzxbw(Register dst, AMD64Address src) {
assert supports(CPUFeature.SSE4_2);
// XXX legacy_mode should be: _legacy_mode_bw
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false, target);
attributes.setAddressAttributes(/* tuple_type */ EvexTupleType.EVEX_HVM, /* input_size_in_bits */ EvexInputSizeInBits.EVEX_NObit);
simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F_38, attributes);
emitByte(0x30);
emitOperandHelper(dst, src, 0);
}
public final void vpmovzxbw(Register dst, AMD64Address src, int vectorLen) {
assert supports(CPUFeature.AVX);
// XXX legacy_mode should be: _legacy_mode_bw
AMD64InstructionAttr attributes = new AMD64InstructionAttr(vectorLen, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false, target);
attributes.setAddressAttributes(/* tuple_type */ EvexTupleType.EVEX_HVM, /* input_size_in_bits */ EvexInputSizeInBits.EVEX_NObit);
vexPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F_38, attributes);
emitByte(0x30);
emitOperandHelper(dst, src, 0);
}
public final void push(Register src) {
int encode = prefixAndEncode(src.encoding);
emitByte(0x50 | encode);
}
public void pushfq() {
emitByte(0x9c);
}
public final void paddd(Register dst, Register src) {
assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0xFE);
emitByte(0xC0 | encode);
}
public final void paddq(Register dst, Register src) {
assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0xD4);
emitByte(0xC0 | encode);
}
public final void pextrw(Register dst, Register src, int imm8) {
assert dst.getRegisterCategory().equals(AMD64.CPU) && src.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0xC5);
emitByte(0xC0 | encode);
emitByte(imm8);
}
public final void pinsrw(Register dst, Register src, int imm8) {
assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0xC4);
emitByte(0xC0 | encode);
emitByte(imm8);
}
public final void por(Register dst, Register src) {
assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0xEB);
emitByte(0xC0 | encode);
}
public final void pand(Register dst, Register src) {
assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0xDB);
emitByte(0xC0 | encode);
}
public final void pxor(Register dst, Register src) {
assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0xEF);
emitByte(0xC0 | encode);
}
public final void vpxor(Register dst, Register nds, Register src) {
assert supports(CPUFeature.AVX);
assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_256bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = vexPrefixAndEncode(dst, nds, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0xEF);
emitByte(0xC0 | encode);
}
public final void vpxor(Register dst, Register nds, AMD64Address src) {
assert supports(CPUFeature.AVX);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true, target);
attributes.setAddressAttributes(/* tuple_type */ EvexTupleType.EVEX_FV, /* input_size_in_bits */ EvexInputSizeInBits.EVEX_32bit);
vexPrefix(src, nds, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0xEF);
emitOperandHelper(dst, src, 0);
}
public final void pslld(Register dst, int imm8) {
assert isUByte(imm8) : "invalid value";
assert dst.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
// XMM6 is for /6 encoding: 66 0F 72 /6 ib
int encode = simdPrefixAndEncode(AMD64.xmm6, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x72);
emitByte(0xC0 | encode);
emitByte(imm8 & 0xFF);
}
public final void psllq(Register dst, Register shift) {
assert dst.getRegisterCategory().equals(AMD64.XMM) && shift.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = simdPrefixAndEncode(dst, dst, shift, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0xF3);
emitByte(0xC0 | encode);
}
public final void psllq(Register dst, int imm8) {
assert isUByte(imm8) : "invalid value";
assert dst.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
// XMM6 is for /6 encoding: 66 0F 73 /6 ib
int encode = simdPrefixAndEncode(AMD64.xmm6, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x73);
emitByte(0xC0 | encode);
emitByte(imm8);
}
public final void psrad(Register dst, int imm8) {
assert isUByte(imm8) : "invalid value";
assert dst.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
// XMM4 is for /2 encoding: 66 0F 72 /4 ib
int encode = simdPrefixAndEncode(AMD64.xmm4, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x72);
emitByte(0xC0 | encode);
emitByte(imm8);
}
public final void psrld(Register dst, int imm8) {
assert isUByte(imm8) : "invalid value";
assert dst.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
// XMM2 is for /2 encoding: 66 0F 72 /2 ib
int encode = simdPrefixAndEncode(AMD64.xmm2, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x72);
emitByte(0xC0 | encode);
emitByte(imm8);
}
public final void psrlq(Register dst, int imm8) {
assert isUByte(imm8) : "invalid value";
assert dst.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
// XMM2 is for /2 encoding: 66 0F 73 /2 ib
int encode = simdPrefixAndEncode(AMD64.xmm2, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x73);
emitByte(0xC0 | encode);
emitByte(imm8);
}
public final void psrldq(Register dst, int imm8) {
assert isUByte(imm8) : "invalid value";
assert dst.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = simdPrefixAndEncode(AMD64.xmm3, dst, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x73);
emitByte(0xC0 | encode);
emitByte(imm8);
}
public final void pshufd(Register dst, Register src, int imm8) {
assert isUByte(imm8) : "invalid value";
assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x70);
emitByte(0xC0 | encode);
emitByte(imm8);
}
public final void psubd(Register dst, Register src) {
assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0xFA);
emitByte(0xC0 | encode);
}
public final void rcpps(Register dst, Register src) {
assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ true, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x53);
emitByte(0xC0 | encode);
}
public final void ret(int imm16) {
if (imm16 == 0) {
emitByte(0xC3);
} else {
emitByte(0xC2);
emitShort(imm16);
}
}
public final void sarl(Register dst, int imm8) {
int encode = prefixAndEncode(dst.encoding);
assert isShiftCount(imm8 >> 1) : "illegal shift count";
if (imm8 == 1) {
emitByte(0xD1);
emitByte(0xF8 | encode);
} else {
emitByte(0xC1);
emitByte(0xF8 | encode);
emitByte(imm8);
}
}
public final void shll(Register dst, int imm8) {
assert isShiftCount(imm8 >> 1) : "illegal shift count";
int encode = prefixAndEncode(dst.encoding);
if (imm8 == 1) {
emitByte(0xD1);
emitByte(0xE0 | encode);
} else {
emitByte(0xC1);
emitByte(0xE0 | encode);
emitByte(imm8);
}
}
public final void shll(Register dst) {
int encode = prefixAndEncode(dst.encoding);
emitByte(0xD3);
emitByte(0xE0 | encode);
}
public final void shrl(Register dst, int imm8) {
assert isShiftCount(imm8 >> 1) : "illegal shift count";
int encode = prefixAndEncode(dst.encoding);
emitByte(0xC1);
emitByte(0xE8 | encode);
emitByte(imm8);
}
public final void shrl(Register dst) {
int encode = prefixAndEncode(dst.encoding);
emitByte(0xD3);
emitByte(0xE8 | encode);
}
public final void subl(AMD64Address dst, int imm32) {
SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
}
public final void subl(Register dst, int imm32) {
SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
}
public final void subl(Register dst, Register src) {
SUB.rmOp.emit(this, DWORD, dst, src);
}
public final void subpd(Register dst, Register src) {
assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x5C);
emitByte(0xC0 | encode);
}
public final void subsd(Register dst, Register src) {
assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x5C);
emitByte(0xC0 | encode);
}
public final void subsd(Register dst, AMD64Address src) {
assert dst.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
simdPrefix(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x5C);
emitOperandHelper(dst, src, 0);
}
public final void testl(Register dst, int imm32) {
// not using emitArith because test
// doesn't support sign-extension of
// 8bit operands
int encode = dst.encoding;
if (encode == 0) {
emitByte(0xA9);
} else {
encode = prefixAndEncode(encode);
emitByte(0xF7);
emitByte(0xC0 | encode);
}
emitInt(imm32);
}
public final void testl(Register dst, Register src) {
int encode = prefixAndEncode(dst.encoding, src.encoding);
emitByte(0x85);
emitByte(0xC0 | encode);
}
public final void testl(Register dst, AMD64Address src) {
prefix(src, dst);
emitByte(0x85);
emitOperandHelper(dst, src, 0);
}
public final void unpckhpd(Register dst, Register src) {
assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x15);
emitByte(0xC0 | encode);
}
public final void unpcklpd(Register dst, Register src) {
assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x14);
emitByte(0xC0 | encode);
}
public final void xorl(Register dst, Register src) {
XOR.rmOp.emit(this, DWORD, dst, src);
}
public final void xorpd(Register dst, Register src) {
assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x57);
emitByte(0xC0 | encode);
}
public final void xorps(Register dst, Register src) {
assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x57);
emitByte(0xC0 | encode);
}
protected final void decl(Register dst) {
// Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
int encode = prefixAndEncode(dst.encoding);
emitByte(0xFF);
emitByte(0xC8 | encode);
}
protected final void incl(Register dst) {
// Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
int encode = prefixAndEncode(dst.encoding);
emitByte(0xFF);
emitByte(0xC0 | encode);
}
private int prefixAndEncode(int regEnc) {
return prefixAndEncode(regEnc, false);
}
private int prefixAndEncode(int regEnc, boolean byteinst) {
if (regEnc >= 8) {
emitByte(Prefix.REXB);
return regEnc - 8;
} else if (byteinst && regEnc >= 4) {
emitByte(Prefix.REX);
}
return regEnc;
}
private int prefixqAndEncode(int regEnc) {
if (regEnc < 8) {
emitByte(Prefix.REXW);
return regEnc;
} else {
emitByte(Prefix.REXWB);
return regEnc - 8;
}
}
private int prefixAndEncode(int dstEnc, int srcEnc) {
return prefixAndEncode(dstEnc, false, srcEnc, false);
}
private int prefixAndEncode(int dstEncoding, boolean dstIsByte, int srcEncoding, boolean srcIsByte) {
int srcEnc = srcEncoding;
int dstEnc = dstEncoding;
if (dstEnc < 8) {
if (srcEnc >= 8) {
emitByte(Prefix.REXB);
srcEnc -= 8;
} else if ((srcIsByte && srcEnc >= 4) || (dstIsByte && dstEnc >= 4)) {
emitByte(Prefix.REX);
}
} else {
if (srcEnc < 8) {
emitByte(Prefix.REXR);
} else {
emitByte(Prefix.REXRB);
srcEnc -= 8;
}
dstEnc -= 8;
}
return dstEnc << 3 | srcEnc;
}
/**
* Creates prefix and the encoding of the lower 6 bits of the ModRM-Byte. It emits an operand
* prefix. If the given operands exceed 3 bits, the 4th bit is encoded in the prefix.
*
* @param regEncoding the encoding of the register part of the ModRM-Byte
* @param rmEncoding the encoding of the r/m part of the ModRM-Byte
* @return the lower 6 bits of the ModRM-Byte that should be emitted
*/
private int prefixqAndEncode(int regEncoding, int rmEncoding) {
int rmEnc = rmEncoding;
int regEnc = regEncoding;
if (regEnc < 8) {
if (rmEnc < 8) {
emitByte(Prefix.REXW);
} else {
emitByte(Prefix.REXWB);
rmEnc -= 8;
}
} else {
if (rmEnc < 8) {
emitByte(Prefix.REXWR);
} else {
emitByte(Prefix.REXWRB);
rmEnc -= 8;
}
regEnc -= 8;
}
return regEnc << 3 | rmEnc;
}
private void vexPrefix(int rxb, int ndsEncoding, int pre, int opc, AMD64InstructionAttr attributes) {
int vectorLen = attributes.getVectorLen();
boolean vexW = attributes.isRexVexW();
boolean isXorB = ((rxb & 0x3) > 0);
if (isXorB || vexW || (opc == VexOpcode.VEX_OPCODE_0F_38) || (opc == VexOpcode.VEX_OPCODE_0F_3A)) {
emitByte(Prefix.VEX_3BYTES);
int byte1 = (rxb << 5);
byte1 = ((~byte1) & 0xE0) | opc;
emitByte(byte1);
int byte2 = ((~ndsEncoding) & 0xf) << 3;
byte2 |= (vexW ? VexPrefix.VEX_W : 0) | ((vectorLen > 0) ? 4 : 0) | pre;
emitByte(byte2);
} else {
emitByte(Prefix.VEX_2BYTES);
int byte1 = ((rxb & 0x4) > 0) ? VexPrefix.VEX_R : 0;
byte1 = (~byte1) & 0x80;
byte1 |= ((~ndsEncoding) & 0xf) << 3;
byte1 |= ((vectorLen > 0) ? 4 : 0) | pre;
emitByte(byte1);
}
}
private void vexPrefix(AMD64Address adr, Register nds, Register src, int pre, int opc, AMD64InstructionAttr attributes) {
int rxb = getRXB(src, adr);
int ndsEncoding = nds.isValid() ? nds.encoding : 0;
vexPrefix(rxb, ndsEncoding, pre, opc, attributes);
setCurAttributes(attributes);
}
private int vexPrefixAndEncode(Register dst, Register nds, Register src, int pre, int opc, AMD64InstructionAttr attributes) {
int rxb = getRXB(dst, src);
int ndsEncoding = nds.isValid() ? nds.encoding : 0;
vexPrefix(rxb, ndsEncoding, pre, opc, attributes);
// return modrm byte components for operands
return (((dst.encoding & 7) << 3) | (src.encoding & 7));
}
private void simdPrefix(Register xreg, Register nds, AMD64Address adr, int pre, int opc, AMD64InstructionAttr attributes) {
if (supports(CPUFeature.AVX)) {
vexPrefix(adr, nds, xreg, pre, opc, attributes);
} else {
switch (pre) {
case VexSimdPrefix.VEX_SIMD_66:
emitByte(0x66);
break;
case VexSimdPrefix.VEX_SIMD_F2:
emitByte(0xF2);
break;
case VexSimdPrefix.VEX_SIMD_F3:
emitByte(0xF3);
break;
}
if (attributes.isRexVexW()) {
prefixq(adr, xreg);
} else {
prefix(adr, xreg);
}
switch (opc) {
case VexOpcode.VEX_OPCODE_0F:
emitByte(0x0F);
break;
case VexOpcode.VEX_OPCODE_0F_38:
emitByte(0x0F);
emitByte(0x38);
break;
case VexOpcode.VEX_OPCODE_0F_3A:
emitByte(0x0F);
emitByte(0x3A);
break;
}
}
}
private int simdPrefixAndEncode(Register dst, Register nds, Register src, int pre, int opc, AMD64InstructionAttr attributes) {
if (supports(CPUFeature.AVX)) {
return vexPrefixAndEncode(dst, nds, src, pre, opc, attributes);
} else {
switch (pre) {
case VexSimdPrefix.VEX_SIMD_66:
emitByte(0x66);
break;
case VexSimdPrefix.VEX_SIMD_F2:
emitByte(0xF2);
break;
case VexSimdPrefix.VEX_SIMD_F3:
emitByte(0xF3);
break;
}
int encode;
int dstEncoding = dst.encoding;
int srcEncoding = src.encoding;
if (attributes.isRexVexW()) {
encode = prefixqAndEncode(dstEncoding, srcEncoding);
} else {
encode = prefixAndEncode(dstEncoding, srcEncoding);
}
switch (opc) {
case VexOpcode.VEX_OPCODE_0F:
emitByte(0x0F);
break;
case VexOpcode.VEX_OPCODE_0F_38:
emitByte(0x0F);
emitByte(0x38);
break;
case VexOpcode.VEX_OPCODE_0F_3A:
emitByte(0x0F);
emitByte(0x3A);
break;
}
return encode;
}
}
private static boolean needsRex(Register reg) {
return reg.encoding >= MinEncodingNeedsRex;
}
private void prefix(AMD64Address adr) {
if (needsRex(adr.getBase())) {
if (needsRex(adr.getIndex())) {
emitByte(Prefix.REXXB);
} else {
emitByte(Prefix.REXB);
}
} else {
if (needsRex(adr.getIndex())) {
emitByte(Prefix.REXX);
}
}
}
private void prefixq(AMD64Address adr) {
if (needsRex(adr.getBase())) {
if (needsRex(adr.getIndex())) {
emitByte(Prefix.REXWXB);
} else {
emitByte(Prefix.REXWB);
}
} else {
if (needsRex(adr.getIndex())) {
emitByte(Prefix.REXWX);
} else {
emitByte(Prefix.REXW);
}
}
}
private void prefixb(AMD64Address adr, Register reg) {
prefix(adr, reg, true);
}
private void prefix(AMD64Address adr, Register reg) {
prefix(adr, reg, false);
}
private void prefix(AMD64Address adr, Register reg, boolean byteinst) {
if (reg.encoding < 8) {
if (needsRex(adr.getBase())) {
if (needsRex(adr.getIndex())) {
emitByte(Prefix.REXXB);
} else {
emitByte(Prefix.REXB);
}
} else {
if (needsRex(adr.getIndex())) {
emitByte(Prefix.REXX);
} else if (byteinst && reg.encoding >= 4) {
emitByte(Prefix.REX);
}
}
} else {
if (needsRex(adr.getBase())) {
if (needsRex(adr.getIndex())) {
emitByte(Prefix.REXRXB);
} else {
emitByte(Prefix.REXRB);
}
} else {
if (needsRex(adr.getIndex())) {
emitByte(Prefix.REXRX);
} else {
emitByte(Prefix.REXR);
}
}
}
}
private void prefixq(AMD64Address adr, Register src) {
if (src.encoding < 8) {
if (needsRex(adr.getBase())) {
if (needsRex(adr.getIndex())) {
emitByte(Prefix.REXWXB);
} else {
emitByte(Prefix.REXWB);
}
} else {
if (needsRex(adr.getIndex())) {
emitByte(Prefix.REXWX);
} else {
emitByte(Prefix.REXW);
}
}
} else {
if (needsRex(adr.getBase())) {
if (needsRex(adr.getIndex())) {
emitByte(Prefix.REXWRXB);
} else {
emitByte(Prefix.REXWRB);
}
} else {
if (needsRex(adr.getIndex())) {
emitByte(Prefix.REXWRX);
} else {
emitByte(Prefix.REXWR);
}
}
}
}
public final void addq(Register dst, int imm32) {
ADD.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
}
public final void addq(AMD64Address dst, int imm32) {
ADD.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
}
public final void addq(Register dst, Register src) {
ADD.rmOp.emit(this, QWORD, dst, src);
}
public final void addq(AMD64Address dst, Register src) {
ADD.mrOp.emit(this, QWORD, dst, src);
}
public final void andq(Register dst, int imm32) {
AND.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
}
public final void bsrq(Register dst, Register src) {
int encode = prefixqAndEncode(dst.encoding(), src.encoding());
emitByte(0x0F);
emitByte(0xBD);
emitByte(0xC0 | encode);
}
public final void bswapq(Register reg) {
int encode = prefixqAndEncode(reg.encoding);
emitByte(0x0F);
emitByte(0xC8 | encode);
}
public final void cdqq() {
emitByte(Prefix.REXW);
emitByte(0x99);
}
public final void cmovq(ConditionFlag cc, Register dst, Register src) {
int encode = prefixqAndEncode(dst.encoding, src.encoding);
emitByte(0x0F);
emitByte(0x40 | cc.getValue());
emitByte(0xC0 | encode);
}
public final void setb(ConditionFlag cc, Register dst) {
int encode = prefixAndEncode(dst.encoding, true);
emitByte(0x0F);
emitByte(0x90 | cc.getValue());
emitByte(0xC0 | encode);
}
public final void cmovq(ConditionFlag cc, Register dst, AMD64Address src) {
prefixq(src, dst);
emitByte(0x0F);
emitByte(0x40 | cc.getValue());
emitOperandHelper(dst, src, 0);
}
public final void cmpq(Register dst, int imm32) {
CMP.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
}
public final void cmpq(Register dst, Register src) {
CMP.rmOp.emit(this, QWORD, dst, src);
}
public final void cmpq(Register dst, AMD64Address src) {
CMP.rmOp.emit(this, QWORD, dst, src);
}
public final void cmpxchgq(Register reg, AMD64Address adr) {
prefixq(adr, reg);
emitByte(0x0F);
emitByte(0xB1);
emitOperandHelper(reg, adr, 0);
}
public final void cvtdq2pd(Register dst, Register src) {
assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0xE6);
emitByte(0xC0 | encode);
}
public final void cvtsi2sdq(Register dst, Register src) {
assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = simdPrefixAndEncode(dst, dst, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x2A);
emitByte(0xC0 | encode);
}
public final void cvttsd2siq(Register dst, Register src) {
assert dst.getRegisterCategory().equals(AMD64.CPU) && src.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x2C);
emitByte(0xC0 | encode);
}
public final void cvttpd2dq(Register dst, Register src) {
assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0xE6);
emitByte(0xC0 | encode);
}
protected final void decq(Register dst) {
// Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
int encode = prefixqAndEncode(dst.encoding);
emitByte(0xFF);
emitByte(0xC8 | encode);
}
public final void decq(AMD64Address dst) {
DEC.emit(this, QWORD, dst);
}
public final void imulq(Register dst, Register src) {
int encode = prefixqAndEncode(dst.encoding, src.encoding);
emitByte(0x0F);
emitByte(0xAF);
emitByte(0xC0 | encode);
}
public final void incq(Register dst) {
// Don't use it directly. Use Macroincrementq() instead.
// Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
int encode = prefixqAndEncode(dst.encoding);
emitByte(0xFF);
emitByte(0xC0 | encode);
}
public final void incq(AMD64Address dst) {
INC.emit(this, QWORD, dst);
}
public final void movq(Register dst, long imm64) {
int encode = prefixqAndEncode(dst.encoding);
emitByte(0xB8 | encode);
emitLong(imm64);
}
public final void movslq(Register dst, int imm32) {
int encode = prefixqAndEncode(dst.encoding);
emitByte(0xC7);
emitByte(0xC0 | encode);
emitInt(imm32);
}
public final void movdq(Register dst, AMD64Address src) {
assert dst.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x6E);
emitOperandHelper(dst, src, 0);
}
public final void movdq(AMD64Address dst, Register src) {
assert src.getRegisterCategory().equals(AMD64.XMM);
// swap src/dst to get correct prefix
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
simdPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x7E);
emitOperandHelper(src, dst, 0);
}
public final void movdq(Register dst, Register src) {
if (dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU)) {
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x6E);
emitByte(0xC0 | encode);
} else if (src.getRegisterCategory().equals(AMD64.XMM) && dst.getRegisterCategory().equals(AMD64.CPU)) {
// swap src/dst to get correct prefix
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ true, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = simdPrefixAndEncode(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x7E);
emitByte(0xC0 | encode);
} else {
throw new InternalError("should not reach here");
}
}
public final void movdl(Register dst, Register src) {
if (dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.CPU)) {
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x6E);
emitByte(0xC0 | encode);
} else if (src.getRegisterCategory().equals(AMD64.XMM) && dst.getRegisterCategory().equals(AMD64.CPU)) {
// swap src/dst to get correct prefix
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = simdPrefixAndEncode(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x7E);
emitByte(0xC0 | encode);
} else {
throw new InternalError("should not reach here");
}
}
public final void movdl(Register dst, AMD64Address src) {
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_66, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x6E);
emitOperandHelper(dst, src, 0);
}
public final void movddup(Register dst, Register src) {
assert supports(CPUFeature.SSE3);
assert dst.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F2, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x12);
emitByte(0xC0 | encode);
}
public final void movdqu(Register dst, AMD64Address src) {
assert dst.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
simdPrefix(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x6F);
emitOperandHelper(dst, src, 0);
}
public final void movdqu(Register dst, Register src) {
assert dst.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
int encode = simdPrefixAndEncode(dst, Register.None, src, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x6F);
emitByte(0xC0 | encode);
}
public final void vmovdqu(Register dst, AMD64Address src) {
assert supports(CPUFeature.AVX);
assert dst.getRegisterCategory().equals(AMD64.XMM);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_256bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
vexPrefix(src, Register.None, dst, VexSimdPrefix.VEX_SIMD_F3, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x6F);
emitOperandHelper(dst, src, 0);
}
public final void vzeroupper() {
assert supports(CPUFeature.AVX);
AMD64InstructionAttr attributes = new AMD64InstructionAttr(AvxVectorLen.AVX_128bit, /* rexVexW */ false, /* legacyMode */ false, /* noMaskReg */ false, /* usesVl */ false, target);
vexPrefixAndEncode(AMD64.xmm0, AMD64.xmm0, AMD64.xmm0, VexSimdPrefix.VEX_SIMD_NONE, VexOpcode.VEX_OPCODE_0F, attributes);
emitByte(0x77);
}
public final void movslq(AMD64Address dst, int imm32) {
prefixq(dst);
emitByte(0xC7);
emitOperandHelper(0, dst, 4);
emitInt(imm32);
}
public final void movslq(Register dst, AMD64Address src) {
prefixq(src, dst);
emitByte(0x63);
emitOperandHelper(dst, src, 0);
}
public final void movslq(Register dst, Register src) {
int encode = prefixqAndEncode(dst.encoding, src.encoding);
emitByte(0x63);
emitByte(0xC0 | encode);
}
public final void negq(Register dst) {
int encode = prefixqAndEncode(dst.encoding);
emitByte(0xF7);
emitByte(0xD8 | encode);
}
public final void orq(Register dst, Register src) {
OR.rmOp.emit(this, QWORD, dst, src);
}
public final void shlq(Register dst, int imm8) {
assert isShiftCount(imm8 >> 1) : "illegal shift count";
int encode = prefixqAndEncode(dst.encoding);
if (imm8 == 1) {
emitByte(0xD1);
emitByte(0xE0 | encode);
} else {
emitByte(0xC1);
emitByte(0xE0 | encode);
emitByte(imm8);
}
}
public final void shlq(Register dst) {
int encode = prefixqAndEncode(dst.encoding);
emitByte(0xD3);
emitByte(0xE0 | encode);
}
public final void shrq(Register dst, int imm8) {
assert isShiftCount(imm8 >> 1) : "illegal shift count";
int encode = prefixqAndEncode(dst.encoding);
if (imm8 == 1) {
emitByte(0xD1);
emitByte(0xE8 | encode);
} else {
emitByte(0xC1);
emitByte(0xE8 | encode);
emitByte(imm8);
}
}
public final void shrq(Register dst) {
int encode = prefixqAndEncode(dst.encoding);
emitByte(0xD3);
emitByte(0xE8 | encode);
}
public final void sbbq(Register dst, Register src) {
SBB.rmOp.emit(this, QWORD, dst, src);
}
public final void subq(Register dst, int imm32) {
SUB.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
}
public final void subq(AMD64Address dst, int imm32) {
SUB.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
}
public final void subqWide(Register dst, int imm32) {
// don't use the sign-extending version, forcing a 32-bit immediate
SUB.getMIOpcode(QWORD, false).emit(this, QWORD, dst, imm32);
}
public final void subq(Register dst, Register src) {
SUB.rmOp.emit(this, QWORD, dst, src);
}
public final void testq(Register dst, Register src) {
int encode = prefixqAndEncode(dst.encoding, src.encoding);
emitByte(0x85);
emitByte(0xC0 | encode);
}
public final void btrq(Register src, int imm8) {
int encode = prefixqAndEncode(src.encoding);
emitByte(0x0F);
emitByte(0xBA);
emitByte(0xF0 | encode);
emitByte(imm8);
}
public final void xaddb(AMD64Address dst, Register src) {
prefixb(dst, src);
emitByte(0x0F);
emitByte(0xC0);
emitOperandHelper(src, dst, 0);
}
public final void xaddw(AMD64Address dst, Register src) {
emitByte(0x66); // Switch to 16-bit mode.
prefix(dst, src);
emitByte(0x0F);
emitByte(0xC1);
emitOperandHelper(src, dst, 0);
}
public final void xaddl(AMD64Address dst, Register src) {
prefix(dst, src);
emitByte(0x0F);
emitByte(0xC1);
emitOperandHelper(src, dst, 0);
}
public final void xaddq(AMD64Address dst, Register src) {
prefixq(dst, src);
emitByte(0x0F);
emitByte(0xC1);
emitOperandHelper(src, dst, 0);
}
public final void xchgb(Register dst, AMD64Address src) {
prefixb(src, dst);
emitByte(0x86);
emitOperandHelper(dst, src, 0);
}
public final void xchgw(Register dst, AMD64Address src) {
emitByte(0x66);
prefix(src, dst);
emitByte(0x87);
emitOperandHelper(dst, src, 0);
}
public final void xchgl(Register dst, AMD64Address src) {
prefix(src, dst);
emitByte(0x87);
emitOperandHelper(dst, src, 0);
}
public final void xchgq(Register dst, AMD64Address src) {
prefixq(src, dst);
emitByte(0x87);
emitOperandHelper(dst, src, 0);
}
public final void membar(int barriers) {
if (target.isMP) {
// We only have to handle StoreLoad
if ((barriers & STORE_LOAD) != 0) {
// All usable chips support "locked" instructions which suffice
// as barriers, and are much faster than the alternative of
// using cpuid instruction. We use here a locked add [rsp],0.
// This is conveniently otherwise a no-op except for blowing
// flags.
// Any change to this code may need to revisit other places in
// the code where this idiom is used, in particular the
// orderAccess code.
lock();
addl(new AMD64Address(rsp, 0), 0); // Assert the lock# signal here
}
}
}
@Override
protected final void patchJumpTarget(int branch, int branchTarget) {
int op = getByte(branch);
assert op == 0xE8 // call
||
op == 0x00 // jump table entry
|| op == 0xE9 // jmp
|| op == 0xEB // short jmp
|| (op & 0xF0) == 0x70 // short jcc
|| op == 0x0F && (getByte(branch + 1) & 0xF0) == 0x80 // jcc
: "Invalid opcode at patch point branch=" + branch + ", branchTarget=" + branchTarget + ", op=" + op;
if (op == 0x00) {
int offsetToJumpTableBase = getShort(branch + 1);
int jumpTableBase = branch - offsetToJumpTableBase;
int imm32 = branchTarget - jumpTableBase;
emitInt(imm32, branch);
} else if (op == 0xEB || (op & 0xF0) == 0x70) {
// short offset operators (jmp and jcc)
final int imm8 = branchTarget - (branch + 2);
/*
* Since a wrongly patched short branch can potentially lead to working but really bad
* behaving code we should always fail with an exception instead of having an assert.
*/
if (!NumUtil.isByte(imm8)) {
throw new InternalError("branch displacement out of range: " + imm8);
}
emitByte(imm8, branch + 1);
} else {
int off = 1;
if (op == 0x0F) {
off = 2;
}
int imm32 = branchTarget - (branch + 4 + off);
emitInt(imm32, branch + off);
}
}
public void nullCheck(AMD64Address address) {
testl(AMD64.rax, address);
}
@Override
public void align(int modulus) {
if (position() % modulus != 0) {
nop(modulus - (position() % modulus));
}
}
/**
* Emits a direct call instruction. Note that the actual call target is not specified, because
* all calls need patching anyway. Therefore, 0 is emitted as the call target, and the user is
* responsible to add the call address to the appropriate patching tables.
*/
public final void call() {
if (codePatchingAnnotationConsumer != null) {
int pos = position();
codePatchingAnnotationConsumer.accept(new ImmediateOperandAnnotation(pos, pos + 1, 4, pos + 5));
}
emitByte(0xE8);
emitInt(0);
}
public final void call(Register src) {
int encode = prefixAndEncode(src.encoding);
emitByte(0xFF);
emitByte(0xD0 | encode);
}
public final void int3() {
emitByte(0xCC);
}
public final void pause() {
emitByte(0xF3);
emitByte(0x90);
}
private void emitx87(int b1, int b2, int i) {
assert 0 <= i && i < 8 : "illegal stack offset";
emitByte(b1);
emitByte(b2 + i);
}
public final void fldd(AMD64Address src) {
emitByte(0xDD);
emitOperandHelper(0, src, 0);
}
public final void flds(AMD64Address src) {
emitByte(0xD9);
emitOperandHelper(0, src, 0);
}
public final void fldln2() {
emitByte(0xD9);
emitByte(0xED);
}
public final void fldlg2() {
emitByte(0xD9);
emitByte(0xEC);
}
public final void fyl2x() {
emitByte(0xD9);
emitByte(0xF1);
}
public final void fstps(AMD64Address src) {
emitByte(0xD9);
emitOperandHelper(3, src, 0);
}
public final void fstpd(AMD64Address src) {
emitByte(0xDD);
emitOperandHelper(3, src, 0);
}
private void emitFPUArith(int b1, int b2, int i) {
assert 0 <= i && i < 8 : "illegal FPU register: " + i;
emitByte(b1);
emitByte(b2 + i);
}
public void ffree(int i) {
emitFPUArith(0xDD, 0xC0, i);
}
public void fincstp() {
emitByte(0xD9);
emitByte(0xF7);
}
public void fxch(int i) {
emitFPUArith(0xD9, 0xC8, i);
}
public void fnstswAX() {
emitByte(0xDF);
emitByte(0xE0);
}
public void fwait() {
emitByte(0x9B);
}
public void fprem() {
emitByte(0xD9);
emitByte(0xF8);
}
public final void fsin() {
emitByte(0xD9);
emitByte(0xFE);
}
public final void fcos() {
emitByte(0xD9);
emitByte(0xFF);
}
public final void fptan() {
emitByte(0xD9);
emitByte(0xF2);
}
public final void fstp(int i) {
emitx87(0xDD, 0xD8, i);
}
@Override
public AMD64Address makeAddress(Register base, int displacement) {
return new AMD64Address(base, displacement);
}
@Override
public AMD64Address getPlaceholder(int instructionStartPosition) {
return new AMD64Address(rip, Register.None, Scale.Times1, 0, instructionStartPosition);
}
private void prefetchPrefix(AMD64Address src) {
prefix(src);
emitByte(0x0F);
}
public void prefetchnta(AMD64Address src) {
prefetchPrefix(src);
emitByte(0x18);
emitOperandHelper(0, src, 0);
}
void prefetchr(AMD64Address src) {
assert supports(CPUFeature.AMD_3DNOW_PREFETCH);
prefetchPrefix(src);
emitByte(0x0D);
emitOperandHelper(0, src, 0);
}
public void prefetcht0(AMD64Address src) {
assert supports(CPUFeature.SSE);
prefetchPrefix(src);
emitByte(0x18);
emitOperandHelper(1, src, 0);
}
public void prefetcht1(AMD64Address src) {
assert supports(CPUFeature.SSE);
prefetchPrefix(src);
emitByte(0x18);
emitOperandHelper(2, src, 0);
}
public void prefetcht2(AMD64Address src) {
assert supports(CPUFeature.SSE);
prefix(src);
emitByte(0x0f);
emitByte(0x18);
emitOperandHelper(3, src, 0);
}
public void prefetchw(AMD64Address src) {
assert supports(CPUFeature.AMD_3DNOW_PREFETCH);
prefix(src);
emitByte(0x0f);
emitByte(0x0D);
emitOperandHelper(1, src, 0);
}
public void rdtsc() {
emitByte(0x0F);
emitByte(0x31);
}
/**
* Emits an instruction which is considered to be illegal. This is used if we deliberately want
* to crash the program (debugging etc.).
*/
public void illegal() {
emitByte(0x0f);
emitByte(0x0b);
}
public void lfence() {
emitByte(0x0f);
emitByte(0xae);
emitByte(0xe8);
}
}