blob: 336ecbf78580de582a70cc0d97495d8c629f830d [file] [log] [blame]
/*
* Copyright (C) 2014 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "assembler_x86_64.h"
#include "base/casts.h"
#include "base/memory_region.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "thread.h"
namespace art {
namespace x86_64 {
std::ostream& operator<<(std::ostream& os, const CpuRegister& reg) {
return os << reg.AsRegister();
}
std::ostream& operator<<(std::ostream& os, const XmmRegister& reg) {
return os << reg.AsFloatRegister();
}
std::ostream& operator<<(std::ostream& os, const X87Register& reg) {
return os << "ST" << static_cast<int>(reg);
}
std::ostream& operator<<(std::ostream& os, const Address& addr) {
switch (addr.mod()) {
case 0:
if (addr.rm() != RSP || addr.cpu_index().AsRegister() == RSP) {
return os << "(%" << addr.cpu_rm() << ")";
} else if (addr.base() == RBP) {
return os << static_cast<int>(addr.disp32()) << "(,%" << addr.cpu_index()
<< "," << (1 << addr.scale()) << ")";
}
return os << "(%" << addr.cpu_base() << ",%"
<< addr.cpu_index() << "," << (1 << addr.scale()) << ")";
case 1:
if (addr.rm() != RSP || addr.cpu_index().AsRegister() == RSP) {
return os << static_cast<int>(addr.disp8()) << "(%" << addr.cpu_rm() << ")";
}
return os << static_cast<int>(addr.disp8()) << "(%" << addr.cpu_base() << ",%"
<< addr.cpu_index() << "," << (1 << addr.scale()) << ")";
case 2:
if (addr.rm() != RSP || addr.cpu_index().AsRegister() == RSP) {
return os << static_cast<int>(addr.disp32()) << "(%" << addr.cpu_rm() << ")";
}
return os << static_cast<int>(addr.disp32()) << "(%" << addr.cpu_base() << ",%"
<< addr.cpu_index() << "," << (1 << addr.scale()) << ")";
default:
return os << "<address?>";
}
}
bool X86_64Assembler::CpuHasAVXorAVX2FeatureFlag() {
if (has_AVX_ || has_AVX2_) {
return true;
}
return false;
}
void X86_64Assembler::call(CpuRegister reg) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(reg);
EmitUint8(0xFF);
EmitRegisterOperand(2, reg.LowBits());
}
void X86_64Assembler::call(const Address& address) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(address);
EmitUint8(0xFF);
EmitOperand(2, address);
}
void X86_64Assembler::call(Label* label) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xE8);
static const int kSize = 5;
// Offset by one because we already have emitted the opcode.
EmitLabel(label, kSize - 1);
}
void X86_64Assembler::pushq(CpuRegister reg) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(reg);
EmitUint8(0x50 + reg.LowBits());
}
void X86_64Assembler::pushq(const Address& address) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(address);
EmitUint8(0xFF);
EmitOperand(6, address);
}
void X86_64Assembler::pushq(const Immediate& imm) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
CHECK(imm.is_int32()); // pushq only supports 32b immediate.
if (imm.is_int8()) {
EmitUint8(0x6A);
EmitUint8(imm.value() & 0xFF);
} else {
EmitUint8(0x68);
EmitImmediate(imm);
}
}
void X86_64Assembler::popq(CpuRegister reg) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(reg);
EmitUint8(0x58 + reg.LowBits());
}
void X86_64Assembler::popq(const Address& address) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(address);
EmitUint8(0x8F);
EmitOperand(0, address);
}
void X86_64Assembler::movq(CpuRegister dst, const Immediate& imm) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
if (imm.is_int32()) {
// 32 bit. Note: sign-extends.
EmitRex64(dst);
EmitUint8(0xC7);
EmitRegisterOperand(0, dst.LowBits());
EmitInt32(static_cast<int32_t>(imm.value()));
} else {
EmitRex64(dst);
EmitUint8(0xB8 + dst.LowBits());
EmitInt64(imm.value());
}
}
void X86_64Assembler::movl(CpuRegister dst, const Immediate& imm) {
CHECK(imm.is_int32());
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(dst);
EmitUint8(0xB8 + dst.LowBits());
EmitImmediate(imm);
}
void X86_64Assembler::movq(const Address& dst, const Immediate& imm) {
CHECK(imm.is_int32());
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitRex64(dst);
EmitUint8(0xC7);
EmitOperand(0, dst);
EmitImmediate(imm);
}
void X86_64Assembler::movq(CpuRegister dst, CpuRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
// 0x89 is movq r/m64 <- r64, with op1 in r/m and op2 in reg: so reverse EmitRex64
EmitRex64(src, dst);
EmitUint8(0x89);
EmitRegisterOperand(src.LowBits(), dst.LowBits());
}
void X86_64Assembler::movl(CpuRegister dst, CpuRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(dst, src);
EmitUint8(0x8B);
EmitRegisterOperand(dst.LowBits(), src.LowBits());
}
void X86_64Assembler::movq(CpuRegister dst, const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitRex64(dst, src);
EmitUint8(0x8B);
EmitOperand(dst.LowBits(), src);
}
void X86_64Assembler::movl(CpuRegister dst, const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(dst, src);
EmitUint8(0x8B);
EmitOperand(dst.LowBits(), src);
}
void X86_64Assembler::movq(const Address& dst, CpuRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitRex64(src, dst);
EmitUint8(0x89);
EmitOperand(src.LowBits(), dst);
}
void X86_64Assembler::movl(const Address& dst, CpuRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(src, dst);
EmitUint8(0x89);
EmitOperand(src.LowBits(), dst);
}
void X86_64Assembler::movl(const Address& dst, const Immediate& imm) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(dst);
EmitUint8(0xC7);
EmitOperand(0, dst);
EmitImmediate(imm);
}
void X86_64Assembler::movntl(const Address& dst, CpuRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(src, dst);
EmitUint8(0x0F);
EmitUint8(0xC3);
EmitOperand(src.LowBits(), dst);
}
void X86_64Assembler::movntq(const Address& dst, CpuRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitRex64(src, dst);
EmitUint8(0x0F);
EmitUint8(0xC3);
EmitOperand(src.LowBits(), dst);
}
void X86_64Assembler::cmov(Condition c, CpuRegister dst, CpuRegister src) {
cmov(c, dst, src, true);
}
void X86_64Assembler::cmov(Condition c, CpuRegister dst, CpuRegister src, bool is64bit) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex(false, is64bit, dst.NeedsRex(), false, src.NeedsRex());
EmitUint8(0x0F);
EmitUint8(0x40 + c);
EmitRegisterOperand(dst.LowBits(), src.LowBits());
}
void X86_64Assembler::cmov(Condition c, CpuRegister dst, const Address& src, bool is64bit) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
if (is64bit) {
EmitRex64(dst, src);
} else {
EmitOptionalRex32(dst, src);
}
EmitUint8(0x0F);
EmitUint8(0x40 + c);
EmitOperand(dst.LowBits(), src);
}
void X86_64Assembler::movzxb(CpuRegister dst, CpuRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalByteRegNormalizingRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0xB6);
EmitRegisterOperand(dst.LowBits(), src.LowBits());
}
void X86_64Assembler::movzxb(CpuRegister dst, const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
// Byte register is only in the source register form, so we don't use
// EmitOptionalByteRegNormalizingRex32(dst, src);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0xB6);
EmitOperand(dst.LowBits(), src);
}
void X86_64Assembler::movsxb(CpuRegister dst, CpuRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalByteRegNormalizingRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0xBE);
EmitRegisterOperand(dst.LowBits(), src.LowBits());
}
void X86_64Assembler::movsxb(CpuRegister dst, const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
// Byte register is only in the source register form, so we don't use
// EmitOptionalByteRegNormalizingRex32(dst, src);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0xBE);
EmitOperand(dst.LowBits(), src);
}
void X86_64Assembler::movb(CpuRegister /*dst*/, const Address& /*src*/) {
LOG(FATAL) << "Use movzxb or movsxb instead.";
}
void X86_64Assembler::movb(const Address& dst, CpuRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalByteRegNormalizingRex32(src, dst);
EmitUint8(0x88);
EmitOperand(src.LowBits(), dst);
}
void X86_64Assembler::movb(const Address& dst, const Immediate& imm) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(dst);
EmitUint8(0xC6);
EmitOperand(Register::RAX, dst);
CHECK(imm.is_int8());
EmitUint8(imm.value() & 0xFF);
}
void X86_64Assembler::movzxw(CpuRegister dst, CpuRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0xB7);
EmitRegisterOperand(dst.LowBits(), src.LowBits());
}
void X86_64Assembler::movzxw(CpuRegister dst, const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0xB7);
EmitOperand(dst.LowBits(), src);
}
void X86_64Assembler::movsxw(CpuRegister dst, CpuRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0xBF);
EmitRegisterOperand(dst.LowBits(), src.LowBits());
}
void X86_64Assembler::movsxw(CpuRegister dst, const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0xBF);
EmitOperand(dst.LowBits(), src);
}
void X86_64Assembler::movw(CpuRegister /*dst*/, const Address& /*src*/) {
LOG(FATAL) << "Use movzxw or movsxw instead.";
}
void X86_64Assembler::movw(const Address& dst, CpuRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOperandSizeOverride();
EmitOptionalRex32(src, dst);
EmitUint8(0x89);
EmitOperand(src.LowBits(), dst);
}
void X86_64Assembler::movw(const Address& dst, const Immediate& imm) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOperandSizeOverride();
EmitOptionalRex32(dst);
EmitUint8(0xC7);
EmitOperand(Register::RAX, dst);
CHECK(imm.is_uint16() || imm.is_int16());
EmitUint8(imm.value() & 0xFF);
EmitUint8(imm.value() >> 8);
}
void X86_64Assembler::leaq(CpuRegister dst, const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitRex64(dst, src);
EmitUint8(0x8D);
EmitOperand(dst.LowBits(), src);
}
void X86_64Assembler::leal(CpuRegister dst, const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(dst, src);
EmitUint8(0x8D);
EmitOperand(dst.LowBits(), src);
}
void X86_64Assembler::movaps(XmmRegister dst, XmmRegister src) {
if (CpuHasAVXorAVX2FeatureFlag()) {
vmovaps(dst, src);
return;
}
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x28);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
/**VEX.128.0F.WIG 28 /r VMOVAPS xmm1, xmm2 */
void X86_64Assembler::vmovaps(XmmRegister dst, XmmRegister src) {
DCHECK(CpuHasAVXorAVX2FeatureFlag());
uint8_t byte_zero, byte_one, byte_two;
bool is_twobyte_form = true;
bool load = dst.NeedsRex();
bool store = !load;
if (src.NeedsRex()&& dst.NeedsRex()) {
is_twobyte_form = false;
}
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
// Instruction VEX Prefix
byte_zero = EmitVexPrefixByteZero(is_twobyte_form);
X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
if (is_twobyte_form) {
bool rex_bit = (load) ? dst.NeedsRex() : src.NeedsRex();
byte_one = EmitVexPrefixByteOne(rex_bit,
vvvv_reg,
SET_VEX_L_128,
SET_VEX_PP_NONE);
} else {
byte_one = EmitVexPrefixByteOne(dst.NeedsRex(),
/*X=*/ false,
src.NeedsRex(),
SET_VEX_M_0F);
byte_two = EmitVexPrefixByteTwo(/*W=*/ false,
SET_VEX_L_128,
SET_VEX_PP_NONE);
}
EmitUint8(byte_zero);
EmitUint8(byte_one);
if (!is_twobyte_form) {
EmitUint8(byte_two);
}
// Instruction Opcode
if (is_twobyte_form && store) {
EmitUint8(0x29);
} else {
EmitUint8(0x28);
}
// Instruction Operands
if (is_twobyte_form && store) {
EmitXmmRegisterOperand(src.LowBits(), dst);
} else {
EmitXmmRegisterOperand(dst.LowBits(), src);
}
}
void X86_64Assembler::movaps(XmmRegister dst, const Address& src) {
if (CpuHasAVXorAVX2FeatureFlag()) {
vmovaps(dst, src);
return;
}
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x28);
EmitOperand(dst.LowBits(), src);
}
/**VEX.128.0F.WIG 28 /r VMOVAPS xmm1, m128 */
void X86_64Assembler::vmovaps(XmmRegister dst, const Address& src) {
DCHECK(CpuHasAVXorAVX2FeatureFlag());
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
uint8_t ByteZero, ByteOne, ByteTwo;
bool is_twobyte_form = false;
// Instruction VEX Prefix
uint8_t rex = src.rex();
bool Rex_x = rex & GET_REX_X;
bool Rex_b = rex & GET_REX_B;
if (!Rex_b && !Rex_x) {
is_twobyte_form = true;
}
ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
if (is_twobyte_form) {
X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
vvvv_reg,
SET_VEX_L_128,
SET_VEX_PP_NONE);
} else {
ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
Rex_x,
Rex_b,
SET_VEX_M_0F);
ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
SET_VEX_L_128,
SET_VEX_PP_NONE);
}
EmitUint8(ByteZero);
EmitUint8(ByteOne);
if (!is_twobyte_form) {
EmitUint8(ByteTwo);
}
// Instruction Opcode
EmitUint8(0x28);
// Instruction Operands
EmitOperand(dst.LowBits(), src);
}
void X86_64Assembler::movups(XmmRegister dst, const Address& src) {
if (CpuHasAVXorAVX2FeatureFlag()) {
vmovups(dst, src);
return;
}
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x10);
EmitOperand(dst.LowBits(), src);
}
/** VEX.128.0F.WIG 10 /r VMOVUPS xmm1, m128 */
void X86_64Assembler::vmovups(XmmRegister dst, const Address& src) {
DCHECK(CpuHasAVXorAVX2FeatureFlag());
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
uint8_t ByteZero, ByteOne, ByteTwo;
bool is_twobyte_form = false;
// Instruction VEX Prefix
uint8_t rex = src.rex();
bool Rex_x = rex & GET_REX_X;
bool Rex_b = rex & GET_REX_B;
if (!Rex_x && !Rex_b) {
is_twobyte_form = true;
}
ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
if (is_twobyte_form) {
X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
vvvv_reg,
SET_VEX_L_128,
SET_VEX_PP_NONE);
} else {
ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
Rex_x,
Rex_b,
SET_VEX_M_0F);
ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
SET_VEX_L_128,
SET_VEX_PP_NONE);
}
EmitUint8(ByteZero);
EmitUint8(ByteOne);
if (!is_twobyte_form) {
EmitUint8(ByteTwo);
}
// Instruction Opcode
EmitUint8(0x10);
// Instruction Operands
EmitOperand(dst.LowBits(), src);
}
void X86_64Assembler::movaps(const Address& dst, XmmRegister src) {
if (CpuHasAVXorAVX2FeatureFlag()) {
vmovaps(dst, src);
return;
}
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(src, dst);
EmitUint8(0x0F);
EmitUint8(0x29);
EmitOperand(src.LowBits(), dst);
}
/** VEX.128.0F.WIG 29 /r VMOVAPS m128, xmm1 */
void X86_64Assembler::vmovaps(const Address& dst, XmmRegister src) {
DCHECK(CpuHasAVXorAVX2FeatureFlag());
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
uint8_t ByteZero, ByteOne, ByteTwo;
bool is_twobyte_form = false;
// Instruction VEX Prefix
uint8_t rex = dst.rex();
bool Rex_x = rex & GET_REX_X;
bool Rex_b = rex & GET_REX_B;
if (!Rex_b && !Rex_x) {
is_twobyte_form = true;
}
ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
if (is_twobyte_form) {
X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
vvvv_reg,
SET_VEX_L_128,
SET_VEX_PP_NONE);
} else {
ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
Rex_x ,
Rex_b ,
SET_VEX_M_0F);
ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
SET_VEX_L_128,
SET_VEX_PP_NONE);
}
EmitUint8(ByteZero);
EmitUint8(ByteOne);
if (!is_twobyte_form) {
EmitUint8(ByteTwo);
}
// Instruction Opcode
EmitUint8(0x29);
// Instruction Operands
EmitOperand(src.LowBits(), dst);
}
void X86_64Assembler::movups(const Address& dst, XmmRegister src) {
if (CpuHasAVXorAVX2FeatureFlag()) {
vmovups(dst, src);
return;
}
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(src, dst);
EmitUint8(0x0F);
EmitUint8(0x11);
EmitOperand(src.LowBits(), dst);
}
/** VEX.128.0F.WIG 11 /r VMOVUPS m128, xmm1 */
void X86_64Assembler::vmovups(const Address& dst, XmmRegister src) {
DCHECK(CpuHasAVXorAVX2FeatureFlag());
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
uint8_t ByteZero, ByteOne, ByteTwo;
bool is_twobyte_form = false;
// Instruction VEX Prefix
uint8_t rex = dst.rex();
bool Rex_x = rex & GET_REX_X;
bool Rex_b = rex & GET_REX_B;
if (!Rex_b && !Rex_x) {
is_twobyte_form = true;
}
ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
if (is_twobyte_form) {
X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
vvvv_reg,
SET_VEX_L_128,
SET_VEX_PP_NONE);
} else {
ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
Rex_x,
Rex_b,
SET_VEX_M_0F);
ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
SET_VEX_L_128,
SET_VEX_PP_NONE);
}
EmitUint8(ByteZero);
EmitUint8(ByteOne);
if (!is_twobyte_form) {
EmitUint8(ByteTwo);
}
// Instruction Opcode
EmitUint8(0x11);
// Instruction Operands
EmitOperand(src.LowBits(), dst);
}
void X86_64Assembler::movss(XmmRegister dst, const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF3);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x10);
EmitOperand(dst.LowBits(), src);
}
void X86_64Assembler::movss(const Address& dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF3);
EmitOptionalRex32(src, dst);
EmitUint8(0x0F);
EmitUint8(0x11);
EmitOperand(src.LowBits(), dst);
}
void X86_64Assembler::movss(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF3);
EmitOptionalRex32(src, dst); // Movss is MR encoding instead of the usual RM.
EmitUint8(0x0F);
EmitUint8(0x11);
EmitXmmRegisterOperand(src.LowBits(), dst);
}
void X86_64Assembler::movsxd(CpuRegister dst, CpuRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitRex64(dst, src);
EmitUint8(0x63);
EmitRegisterOperand(dst.LowBits(), src.LowBits());
}
void X86_64Assembler::movsxd(CpuRegister dst, const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitRex64(dst, src);
EmitUint8(0x63);
EmitOperand(dst.LowBits(), src);
}
void X86_64Assembler::movd(XmmRegister dst, CpuRegister src) {
movd(dst, src, true);
}
void X86_64Assembler::movd(CpuRegister dst, XmmRegister src) {
movd(dst, src, true);
}
void X86_64Assembler::movd(XmmRegister dst, CpuRegister src, bool is64bit) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex(false, is64bit, dst.NeedsRex(), false, src.NeedsRex());
EmitUint8(0x0F);
EmitUint8(0x6E);
EmitOperand(dst.LowBits(), Operand(src));
}
void X86_64Assembler::movd(CpuRegister dst, XmmRegister src, bool is64bit) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex(false, is64bit, src.NeedsRex(), false, dst.NeedsRex());
EmitUint8(0x0F);
EmitUint8(0x7E);
EmitOperand(src.LowBits(), Operand(dst));
}
void X86_64Assembler::addss(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF3);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x58);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::addss(XmmRegister dst, const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF3);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x58);
EmitOperand(dst.LowBits(), src);
}
void X86_64Assembler::subss(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF3);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x5C);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::subss(XmmRegister dst, const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF3);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x5C);
EmitOperand(dst.LowBits(), src);
}
void X86_64Assembler::mulss(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF3);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x59);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::mulss(XmmRegister dst, const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF3);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x59);
EmitOperand(dst.LowBits(), src);
}
void X86_64Assembler::divss(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF3);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x5E);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::divss(XmmRegister dst, const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF3);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x5E);
EmitOperand(dst.LowBits(), src);
}
void X86_64Assembler::addps(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x58);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::subps(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x5C);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::mulps(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x59);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::divps(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x5E);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::flds(const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xD9);
EmitOperand(0, src);
}
void X86_64Assembler::fsts(const Address& dst) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xD9);
EmitOperand(2, dst);
}
void X86_64Assembler::fstps(const Address& dst) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xD9);
EmitOperand(3, dst);
}
void X86_64Assembler::movapd(XmmRegister dst, XmmRegister src) {
if (CpuHasAVXorAVX2FeatureFlag()) {
vmovapd(dst, src);
return;
}
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x28);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
/** VEX.128.66.0F.WIG 28 /r VMOVAPD xmm1, xmm2 */
void X86_64Assembler::vmovapd(XmmRegister dst, XmmRegister src) {
DCHECK(CpuHasAVXorAVX2FeatureFlag());
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
uint8_t ByteZero, ByteOne, ByteTwo;
bool is_twobyte_form = true;
if (src.NeedsRex() && dst.NeedsRex()) {
is_twobyte_form = false;
}
// Instruction VEX Prefix
ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
bool load = dst.NeedsRex();
if (is_twobyte_form) {
X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
bool rex_bit = load ? dst.NeedsRex() : src.NeedsRex();
ByteOne = EmitVexPrefixByteOne(rex_bit,
vvvv_reg,
SET_VEX_L_128,
SET_VEX_PP_66);
} else {
ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
/*X=*/ false ,
src.NeedsRex(),
SET_VEX_M_0F);
ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
SET_VEX_L_128,
SET_VEX_PP_66);
}
EmitUint8(ByteZero);
EmitUint8(ByteOne);
if (!is_twobyte_form) {
EmitUint8(ByteTwo);
}
// Instruction Opcode
if (is_twobyte_form && !load) {
EmitUint8(0x29);
} else {
EmitUint8(0x28);
}
// Instruction Operands
if (is_twobyte_form && !load) {
EmitXmmRegisterOperand(src.LowBits(), dst);
} else {
EmitXmmRegisterOperand(dst.LowBits(), src);
}
}
void X86_64Assembler::movapd(XmmRegister dst, const Address& src) {
if (CpuHasAVXorAVX2FeatureFlag()) {
vmovapd(dst, src);
return;
}
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x28);
EmitOperand(dst.LowBits(), src);
}
/** VEX.128.66.0F.WIG 28 /r VMOVAPD xmm1, m128 */
void X86_64Assembler::vmovapd(XmmRegister dst, const Address& src) {
DCHECK(CpuHasAVXorAVX2FeatureFlag());
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
uint8_t ByteZero, ByteOne, ByteTwo;
bool is_twobyte_form = false;
// Instruction VEX Prefix
uint8_t rex = src.rex();
bool Rex_x = rex & GET_REX_X;
bool Rex_b = rex & GET_REX_B;
if (!Rex_b && !Rex_x) {
is_twobyte_form = true;
}
ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
if (is_twobyte_form) {
X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
vvvv_reg,
SET_VEX_L_128,
SET_VEX_PP_66);
} else {
ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
Rex_x,
Rex_b,
SET_VEX_M_0F);
ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
SET_VEX_L_128,
SET_VEX_PP_66);
}
EmitUint8(ByteZero);
EmitUint8(ByteOne);
if (!is_twobyte_form) {
EmitUint8(ByteTwo);
}
// Instruction Opcode
EmitUint8(0x28);
// Instruction Operands
EmitOperand(dst.LowBits(), src);
}
void X86_64Assembler::movupd(XmmRegister dst, const Address& src) {
if (CpuHasAVXorAVX2FeatureFlag()) {
vmovupd(dst, src);
return;
}
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x10);
EmitOperand(dst.LowBits(), src);
}
/** VEX.128.66.0F.WIG 10 /r VMOVUPD xmm1, m128 */
void X86_64Assembler::vmovupd(XmmRegister dst, const Address& src) {
DCHECK(CpuHasAVXorAVX2FeatureFlag());
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
bool is_twobyte_form = false;
uint8_t ByteZero, ByteOne, ByteTwo;
// Instruction VEX Prefix
uint8_t rex = src.rex();
bool Rex_x = rex & GET_REX_X;
bool Rex_b = rex & GET_REX_B;
if (!Rex_b && !Rex_x) {
is_twobyte_form = true;
}
ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
if (is_twobyte_form) {
X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
vvvv_reg,
SET_VEX_L_128,
SET_VEX_PP_66);
} else {
ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
Rex_x,
Rex_b,
SET_VEX_M_0F);
ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
SET_VEX_L_128,
SET_VEX_PP_66);
}
EmitUint8(ByteZero);
EmitUint8(ByteOne);
if (!is_twobyte_form)
EmitUint8(ByteTwo);
// Instruction Opcode
EmitUint8(0x10);
// Instruction Operands
EmitOperand(dst.LowBits(), src);
}
void X86_64Assembler::movapd(const Address& dst, XmmRegister src) {
if (CpuHasAVXorAVX2FeatureFlag()) {
vmovapd(dst, src);
return;
}
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(src, dst);
EmitUint8(0x0F);
EmitUint8(0x29);
EmitOperand(src.LowBits(), dst);
}
/** VEX.128.66.0F.WIG 29 /r VMOVAPD m128, xmm1 */
void X86_64Assembler::vmovapd(const Address& dst, XmmRegister src) {
DCHECK(CpuHasAVXorAVX2FeatureFlag());
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
bool is_twobyte_form = false;
uint8_t ByteZero, ByteOne, ByteTwo;
// Instruction VEX Prefix
uint8_t rex = dst.rex();
bool Rex_x = rex & GET_REX_X;
bool Rex_b = rex & GET_REX_B;
if (!Rex_x && !Rex_b) {
is_twobyte_form = true;
}
ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
if (is_twobyte_form) {
X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
vvvv_reg,
SET_VEX_L_128,
SET_VEX_PP_66);
} else {
ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
Rex_x,
Rex_b,
SET_VEX_M_0F);
ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
SET_VEX_L_128,
SET_VEX_PP_66);
}
EmitUint8(ByteZero);
EmitUint8(ByteOne);
if (!is_twobyte_form) {
EmitUint8(ByteTwo);
}
// Instruction Opcode
EmitUint8(0x29);
// Instruction Operands
EmitOperand(src.LowBits(), dst);
}
void X86_64Assembler::movupd(const Address& dst, XmmRegister src) {
if (CpuHasAVXorAVX2FeatureFlag()) {
vmovupd(dst, src);
return;
}
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(src, dst);
EmitUint8(0x0F);
EmitUint8(0x11);
EmitOperand(src.LowBits(), dst);
}
/** VEX.128.66.0F.WIG 11 /r VMOVUPD m128, xmm1 */
void X86_64Assembler::vmovupd(const Address& dst, XmmRegister src) {
DCHECK(CpuHasAVXorAVX2FeatureFlag());
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
bool is_twobyte_form = false;
uint8_t ByteZero, ByteOne, ByteTwo;
// Instruction VEX Prefix
uint8_t rex = dst.rex();
bool Rex_x = rex & GET_REX_X;
bool Rex_b = rex & GET_REX_B;
if (!Rex_x && !Rex_b) {
is_twobyte_form = true;
}
ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
if (is_twobyte_form) {
X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
vvvv_reg,
SET_VEX_L_128,
SET_VEX_PP_66);
} else {
ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
Rex_x,
Rex_b,
SET_VEX_M_0F);
ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
SET_VEX_L_128,
SET_VEX_PP_66);
}
EmitUint8(ByteZero);
EmitUint8(ByteOne);
if (!is_twobyte_form) {
EmitUint8(ByteTwo);
}
// Instruction Opcode
EmitUint8(0x11);
// Instruction Operands
EmitOperand(src.LowBits(), dst);
}
void X86_64Assembler::movsd(XmmRegister dst, const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF2);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x10);
EmitOperand(dst.LowBits(), src);
}
void X86_64Assembler::movsd(const Address& dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF2);
EmitOptionalRex32(src, dst);
EmitUint8(0x0F);
EmitUint8(0x11);
EmitOperand(src.LowBits(), dst);
}
void X86_64Assembler::movsd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF2);
EmitOptionalRex32(src, dst); // Movsd is MR encoding instead of the usual RM.
EmitUint8(0x0F);
EmitUint8(0x11);
EmitXmmRegisterOperand(src.LowBits(), dst);
}
void X86_64Assembler::addsd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF2);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x58);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::addsd(XmmRegister dst, const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF2);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x58);
EmitOperand(dst.LowBits(), src);
}
void X86_64Assembler::subsd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF2);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x5C);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::subsd(XmmRegister dst, const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF2);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x5C);
EmitOperand(dst.LowBits(), src);
}
void X86_64Assembler::mulsd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF2);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x59);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::mulsd(XmmRegister dst, const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF2);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x59);
EmitOperand(dst.LowBits(), src);
}
void X86_64Assembler::divsd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF2);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x5E);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::divsd(XmmRegister dst, const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF2);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x5E);
EmitOperand(dst.LowBits(), src);
}
void X86_64Assembler::addpd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x58);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::subpd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x5C);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::mulpd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x59);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::divpd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x5E);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::movdqa(XmmRegister dst, XmmRegister src) {
if (CpuHasAVXorAVX2FeatureFlag()) {
vmovdqa(dst, src);
return;
}
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x6F);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
/** VEX.128.66.0F.WIG 6F /r VMOVDQA xmm1, xmm2 */
void X86_64Assembler::vmovdqa(XmmRegister dst, XmmRegister src) {
DCHECK(CpuHasAVXorAVX2FeatureFlag());
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
uint8_t ByteZero, ByteOne, ByteTwo;
bool is_twobyte_form = true;
// Instruction VEX Prefix
if (src.NeedsRex() && dst.NeedsRex()) {
is_twobyte_form = false;
}
bool load = dst.NeedsRex();
ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
if (is_twobyte_form) {
X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
bool rex_bit = load ? dst.NeedsRex() : src.NeedsRex();
ByteOne = EmitVexPrefixByteOne(rex_bit,
vvvv_reg,
SET_VEX_L_128,
SET_VEX_PP_66);
} else {
ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
/*X=*/ false,
src.NeedsRex(),
SET_VEX_M_0F);
ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
SET_VEX_L_128,
SET_VEX_PP_66);
}
EmitUint8(ByteZero);
EmitUint8(ByteOne);
if (!is_twobyte_form) {
EmitUint8(ByteTwo);
}
// Instruction Opcode
if (is_twobyte_form && !load) {
EmitUint8(0x7F);
} else {
EmitUint8(0x6F);
}
// Instruction Operands
if (is_twobyte_form && !load) {
EmitXmmRegisterOperand(src.LowBits(), dst);
} else {
EmitXmmRegisterOperand(dst.LowBits(), src);
}
}
void X86_64Assembler::movdqa(XmmRegister dst, const Address& src) {
if (CpuHasAVXorAVX2FeatureFlag()) {
vmovdqa(dst, src);
return;
}
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x6F);
EmitOperand(dst.LowBits(), src);
}
/** VEX.128.66.0F.WIG 6F /r VMOVDQA xmm1, m128 */
void X86_64Assembler::vmovdqa(XmmRegister dst, const Address& src) {
DCHECK(CpuHasAVXorAVX2FeatureFlag());
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
uint8_t ByteZero, ByteOne, ByteTwo;
bool is_twobyte_form = false;
// Instruction VEX Prefix
uint8_t rex = src.rex();
bool Rex_x = rex & GET_REX_X;
bool Rex_b = rex & GET_REX_B;
if (!Rex_x && !Rex_b) {
is_twobyte_form = true;
}
ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
if (is_twobyte_form) {
X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
vvvv_reg,
SET_VEX_L_128,
SET_VEX_PP_66);
} else {
ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
Rex_x,
Rex_b,
SET_VEX_M_0F);
ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
SET_VEX_L_128,
SET_VEX_PP_66);
}
EmitUint8(ByteZero);
EmitUint8(ByteOne);
if (!is_twobyte_form) {
EmitUint8(ByteTwo);
}
// Instruction Opcode
EmitUint8(0x6F);
// Instruction Operands
EmitOperand(dst.LowBits(), src);
}
void X86_64Assembler::movdqu(XmmRegister dst, const Address& src) {
if (CpuHasAVXorAVX2FeatureFlag()) {
vmovdqu(dst, src);
return;
}
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF3);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x6F);
EmitOperand(dst.LowBits(), src);
}
/** VEX.128.F3.0F.WIG 6F /r VMOVDQU xmm1, m128
Load Unaligned */
void X86_64Assembler::vmovdqu(XmmRegister dst, const Address& src) {
DCHECK(CpuHasAVXorAVX2FeatureFlag());
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
uint8_t ByteZero, ByteOne, ByteTwo;
bool is_twobyte_form = false;
// Instruction VEX Prefix
uint8_t rex = src.rex();
bool Rex_x = rex & GET_REX_X;
bool Rex_b = rex & GET_REX_B;
if (!Rex_x && !Rex_b) {
is_twobyte_form = true;
}
ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
if (is_twobyte_form) {
X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
vvvv_reg,
SET_VEX_L_128,
SET_VEX_PP_F3);
} else {
ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
Rex_x,
Rex_b,
SET_VEX_M_0F);
ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
SET_VEX_L_128,
SET_VEX_PP_F3);
}
EmitUint8(ByteZero);
EmitUint8(ByteOne);
if (!is_twobyte_form) {
EmitUint8(ByteTwo);
}
// Instruction Opcode
EmitUint8(0x6F);
// Instruction Operands
EmitOperand(dst.LowBits(), src);
}
void X86_64Assembler::movdqa(const Address& dst, XmmRegister src) {
if (CpuHasAVXorAVX2FeatureFlag()) {
vmovdqa(dst, src);
return;
}
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(src, dst);
EmitUint8(0x0F);
EmitUint8(0x7F);
EmitOperand(src.LowBits(), dst);
}
/** VEX.128.66.0F.WIG 7F /r VMOVDQA m128, xmm1 */
void X86_64Assembler::vmovdqa(const Address& dst, XmmRegister src) {
DCHECK(CpuHasAVXorAVX2FeatureFlag());
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
bool is_twobyte_form = false;
uint8_t ByteZero, ByteOne, ByteTwo;
// Instruction VEX Prefix
uint8_t rex = dst.rex();
bool Rex_x = rex & GET_REX_X;
bool Rex_b = rex & GET_REX_B;
if (!Rex_x && !Rex_b) {
is_twobyte_form = true;
}
ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
if (is_twobyte_form) {
X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
vvvv_reg,
SET_VEX_L_128,
SET_VEX_PP_66);
} else {
ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
Rex_x,
Rex_b,
SET_VEX_M_0F);
ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
SET_VEX_L_128,
SET_VEX_PP_66);
}
EmitUint8(ByteZero);
EmitUint8(ByteOne);
if (!is_twobyte_form) {
EmitUint8(ByteTwo);
}
// Instruction Opcode
EmitUint8(0x7F);
// Instruction Operands
EmitOperand(src.LowBits(), dst);
}
void X86_64Assembler::movdqu(const Address& dst, XmmRegister src) {
if (CpuHasAVXorAVX2FeatureFlag()) {
vmovdqu(dst, src);
return;
}
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF3);
EmitOptionalRex32(src, dst);
EmitUint8(0x0F);
EmitUint8(0x7F);
EmitOperand(src.LowBits(), dst);
}
/** VEX.128.F3.0F.WIG 7F /r VMOVDQU m128, xmm1 */
void X86_64Assembler::vmovdqu(const Address& dst, XmmRegister src) {
DCHECK(CpuHasAVXorAVX2FeatureFlag());
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
uint8_t ByteZero, ByteOne, ByteTwo;
bool is_twobyte_form = false;
// Instruction VEX Prefix
uint8_t rex = dst.rex();
bool Rex_x = rex & GET_REX_X;
bool Rex_b = rex & GET_REX_B;
if (!Rex_b && !Rex_x) {
is_twobyte_form = true;
}
ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
if (is_twobyte_form) {
X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
vvvv_reg,
SET_VEX_L_128,
SET_VEX_PP_F3);
} else {
ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
Rex_x,
Rex_b,
SET_VEX_M_0F);
ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
SET_VEX_L_128,
SET_VEX_PP_F3);
}
EmitUint8(ByteZero);
EmitUint8(ByteOne);
if (!is_twobyte_form) {
EmitUint8(ByteTwo);
}
// Instruction Opcode
EmitUint8(0x7F);
// Instruction Operands
EmitOperand(src.LowBits(), dst);
}
void X86_64Assembler::paddb(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0xFC);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::psubb(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0xF8);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::paddw(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0xFD);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::psubw(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0xF9);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::pmullw(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0xD5);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::paddd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0xFE);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::psubd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0xFA);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::pmulld(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x38);
EmitUint8(0x40);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::paddq(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0xD4);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::psubq(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0xFB);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::paddusb(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0xDC);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::paddsb(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0xEC);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::paddusw(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0xDD);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::paddsw(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0xED);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::psubusb(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0xD8);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::psubsb(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0xE8);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::psubusw(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0xD9);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::psubsw(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0xE9);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::cvtsi2ss(XmmRegister dst, CpuRegister src) {
cvtsi2ss(dst, src, false);
}
void X86_64Assembler::cvtsi2ss(XmmRegister dst, CpuRegister src, bool is64bit) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF3);
if (is64bit) {
// Emit a REX.W prefix if the operand size is 64 bits.
EmitRex64(dst, src);
} else {
EmitOptionalRex32(dst, src);
}
EmitUint8(0x0F);
EmitUint8(0x2A);
EmitOperand(dst.LowBits(), Operand(src));
}
void X86_64Assembler::cvtsi2ss(XmmRegister dst, const Address& src, bool is64bit) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF3);
if (is64bit) {
// Emit a REX.W prefix if the operand size is 64 bits.
EmitRex64(dst, src);
} else {
EmitOptionalRex32(dst, src);
}
EmitUint8(0x0F);
EmitUint8(0x2A);
EmitOperand(dst.LowBits(), src);
}
void X86_64Assembler::cvtsi2sd(XmmRegister dst, CpuRegister src) {
cvtsi2sd(dst, src, false);
}
void X86_64Assembler::cvtsi2sd(XmmRegister dst, CpuRegister src, bool is64bit) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF2);
if (is64bit) {
// Emit a REX.W prefix if the operand size is 64 bits.
EmitRex64(dst, src);
} else {
EmitOptionalRex32(dst, src);
}
EmitUint8(0x0F);
EmitUint8(0x2A);
EmitOperand(dst.LowBits(), Operand(src));
}
void X86_64Assembler::cvtsi2sd(XmmRegister dst, const Address& src, bool is64bit) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF2);
if (is64bit) {
// Emit a REX.W prefix if the operand size is 64 bits.
EmitRex64(dst, src);
} else {
EmitOptionalRex32(dst, src);
}
EmitUint8(0x0F);
EmitUint8(0x2A);
EmitOperand(dst.LowBits(), src);
}
void X86_64Assembler::cvtss2si(CpuRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF3);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x2D);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::cvtss2sd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF3);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x5A);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::cvtss2sd(XmmRegister dst, const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF3);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x5A);
EmitOperand(dst.LowBits(), src);
}
void X86_64Assembler::cvtsd2si(CpuRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF2);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x2D);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::cvttss2si(CpuRegister dst, XmmRegister src) {
cvttss2si(dst, src, false);
}
void X86_64Assembler::cvttss2si(CpuRegister dst, XmmRegister src, bool is64bit) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF3);
if (is64bit) {
// Emit a REX.W prefix if the operand size is 64 bits.
EmitRex64(dst, src);
} else {
EmitOptionalRex32(dst, src);
}
EmitUint8(0x0F);
EmitUint8(0x2C);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::cvttsd2si(CpuRegister dst, XmmRegister src) {
cvttsd2si(dst, src, false);
}
void X86_64Assembler::cvttsd2si(CpuRegister dst, XmmRegister src, bool is64bit) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF2);
if (is64bit) {
// Emit a REX.W prefix if the operand size is 64 bits.
EmitRex64(dst, src);
} else {
EmitOptionalRex32(dst, src);
}
EmitUint8(0x0F);
EmitUint8(0x2C);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::cvtsd2ss(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF2);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x5A);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::cvtsd2ss(XmmRegister dst, const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF2);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x5A);
EmitOperand(dst.LowBits(), src);
}
void X86_64Assembler::cvtdq2ps(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x5B);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::cvtdq2pd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF3);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0xE6);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::comiss(XmmRegister a, XmmRegister b) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(a, b);
EmitUint8(0x0F);
EmitUint8(0x2F);
EmitXmmRegisterOperand(a.LowBits(), b);
}
void X86_64Assembler::comiss(XmmRegister a, const Address& b) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(a, b);
EmitUint8(0x0F);
EmitUint8(0x2F);
EmitOperand(a.LowBits(), b);
}
void X86_64Assembler::comisd(XmmRegister a, XmmRegister b) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(a, b);
EmitUint8(0x0F);
EmitUint8(0x2F);
EmitXmmRegisterOperand(a.LowBits(), b);
}
void X86_64Assembler::comisd(XmmRegister a, const Address& b) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(a, b);
EmitUint8(0x0F);
EmitUint8(0x2F);
EmitOperand(a.LowBits(), b);
}
void X86_64Assembler::ucomiss(XmmRegister a, XmmRegister b) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(a, b);
EmitUint8(0x0F);
EmitUint8(0x2E);
EmitXmmRegisterOperand(a.LowBits(), b);
}
void X86_64Assembler::ucomiss(XmmRegister a, const Address& b) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(a, b);
EmitUint8(0x0F);
EmitUint8(0x2E);
EmitOperand(a.LowBits(), b);
}
void X86_64Assembler::ucomisd(XmmRegister a, XmmRegister b) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(a, b);
EmitUint8(0x0F);
EmitUint8(0x2E);
EmitXmmRegisterOperand(a.LowBits(), b);
}
void X86_64Assembler::ucomisd(XmmRegister a, const Address& b) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(a, b);
EmitUint8(0x0F);
EmitUint8(0x2E);
EmitOperand(a.LowBits(), b);
}
void X86_64Assembler::roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x3A);
EmitUint8(0x0B);
EmitXmmRegisterOperand(dst.LowBits(), src);
EmitUint8(imm.value());
}
void X86_64Assembler::roundss(XmmRegister dst, XmmRegister src, const Immediate& imm) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x3A);
EmitUint8(0x0A);
EmitXmmRegisterOperand(dst.LowBits(), src);
EmitUint8(imm.value());
}
void X86_64Assembler::sqrtsd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF2);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x51);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::sqrtss(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF3);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x51);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::xorpd(XmmRegister dst, const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x57);
EmitOperand(dst.LowBits(), src);
}
void X86_64Assembler::xorpd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x57);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::xorps(XmmRegister dst, const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x57);
EmitOperand(dst.LowBits(), src);
}
void X86_64Assembler::xorps(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x57);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::pxor(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0xEF);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::andpd(XmmRegister dst, const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x54);
EmitOperand(dst.LowBits(), src);
}
void X86_64Assembler::andpd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x54);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::andps(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x54);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::pand(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0xDB);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::andn(CpuRegister dst, CpuRegister src1, CpuRegister src2) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
uint8_t byte_one = EmitVexPrefixByteOne(dst.NeedsRex(),
/*X=*/ false,
src2.NeedsRex(),
SET_VEX_M_0F_38);
uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/ true,
X86_64ManagedRegister::FromCpuRegister(src1.AsRegister()),
SET_VEX_L_128,
SET_VEX_PP_NONE);
EmitUint8(byte_zero);
EmitUint8(byte_one);
EmitUint8(byte_two);
// Opcode field
EmitUint8(0xF2);
EmitRegisterOperand(dst.LowBits(), src2.LowBits());
}
void X86_64Assembler::andnpd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x55);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::andnps(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x55);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::pandn(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0xDF);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::orpd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x56);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::orps(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x56);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::por(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0xEB);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::pavgb(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0xE0);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::pavgw(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0xE3);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::psadbw(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0xF6);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::pmaddwd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0xF5);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::phaddw(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x38);
EmitUint8(0x01);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::phaddd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x38);
EmitUint8(0x02);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::haddps(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF2);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x7C);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::haddpd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x7C);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::phsubw(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x38);
EmitUint8(0x05);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::phsubd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x38);
EmitUint8(0x06);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::hsubps(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF2);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x7D);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::hsubpd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x7D);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::pminsb(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x38);
EmitUint8(0x38);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::pmaxsb(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x38);
EmitUint8(0x3C);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::pminsw(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0xEA);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::pmaxsw(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0xEE);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::pminsd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x38);
EmitUint8(0x39);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::pmaxsd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x38);
EmitUint8(0x3D);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::pminub(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0xDA);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::pmaxub(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0xDE);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::pminuw(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x38);
EmitUint8(0x3A);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::pmaxuw(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x38);
EmitUint8(0x3E);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::pminud(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x38);
EmitUint8(0x3B);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::pmaxud(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x38);
EmitUint8(0x3F);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::minps(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x5D);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::maxps(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x5F);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::minpd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x5D);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::maxpd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x5F);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x74);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::pcmpeqw(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x75);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::pcmpeqd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x76);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::pcmpeqq(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x38);
EmitUint8(0x29);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::pcmpgtb(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x64);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::pcmpgtw(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x65);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::pcmpgtd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x66);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::pcmpgtq(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x38);
EmitUint8(0x37);
EmitXmmRegisterOperand(dst.LowBits(), src);
}
void X86_64Assembler::shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0xC6);
EmitXmmRegisterOperand(dst.LowBits(), src);
EmitUint8(imm.value());
}
void X86_64Assembler::shufps(XmmRegister dst, XmmRegister src, const Immediate& imm) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0xC6);
EmitXmmRegisterOperand(dst.LowBits(), src);
EmitUint8(imm.value());
}
void X86_64Assembler::pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x70);
EmitXmmRegisterOperand(dst.LowBits(), src);
EmitUint8(imm.value());
}
void X86_64Assembler::punpcklbw(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0x60);
EmitXmmRegisterOperand(dst.LowBits(), src);