blob: 402f0d2e84fd08e23ed04f0b4dbeb790e6da214b [file] [log] [blame]
// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_WASM_BASELINE_ARM64_LIFTOFF_ASSEMBLER_ARM64_H_
#define V8_WASM_BASELINE_ARM64_LIFTOFF_ASSEMBLER_ARM64_H_
#include "src/heap/memory-chunk.h"
#include "src/wasm/baseline/liftoff-assembler.h"
namespace v8 {
namespace internal {
namespace wasm {
namespace liftoff {
// Liftoff Frames.
//
// slot Frame
// +--------------------+---------------------------
// n+4 | optional padding slot to keep the stack 16 byte aligned.
// n+3 | parameter n |
// ... | ... |
// 4 | parameter 1 | or parameter 2
// 3 | parameter 0 | or parameter 1
// 2 | (result address) | or parameter 0
// -----+--------------------+---------------------------
// 1 | return addr (lr) |
// 0 | previous frame (fp)|
// -----+--------------------+ <-- frame ptr (fp)
// -1 | 0xa: WASM |
// -2 | instance |
// -----+--------------------+---------------------------
// -3 | slot 0 | ^
// -4 | slot 1 | |
// | | Frame slots
// | | |
// | | v
// | optional padding slot to keep the stack 16 byte aligned.
// -----+--------------------+ <-- stack ptr (sp)
//
constexpr int kInstanceOffset = 2 * kSystemPointerSize;
inline MemOperand GetStackSlot(int offset) { return MemOperand(fp, -offset); }
inline MemOperand GetInstanceOperand() { return GetStackSlot(kInstanceOffset); }
inline CPURegister GetRegFromType(const LiftoffRegister& reg, ValueType type) {
switch (type.kind()) {
case ValueType::kI32:
return reg.gp().W();
case ValueType::kI64:
case ValueType::kRef:
case ValueType::kOptRef:
return reg.gp().X();
case ValueType::kF32:
return reg.fp().S();
case ValueType::kF64:
return reg.fp().D();
case ValueType::kS128:
return reg.fp().Q();
default:
UNREACHABLE();
}
}
inline CPURegList PadRegList(RegList list) {
if ((base::bits::CountPopulation(list) & 1) != 0) list |= padreg.bit();
return CPURegList(CPURegister::kRegister, kXRegSizeInBits, list);
}
inline CPURegList PadVRegList(RegList list) {
if ((base::bits::CountPopulation(list) & 1) != 0) list |= fp_scratch.bit();
return CPURegList(CPURegister::kVRegister, kQRegSizeInBits, list);
}
inline CPURegister AcquireByType(UseScratchRegisterScope* temps,
ValueType type) {
switch (type.kind()) {
case ValueType::kI32:
return temps->AcquireW();
case ValueType::kI64:
return temps->AcquireX();
case ValueType::kF32:
return temps->AcquireS();
case ValueType::kF64:
return temps->AcquireD();
default:
UNREACHABLE();
}
}
template <typename T>
inline MemOperand GetMemOp(LiftoffAssembler* assm,
UseScratchRegisterScope* temps, Register addr,
Register offset, T offset_imm) {
if (offset.is_valid()) {
if (offset_imm == 0) return MemOperand(addr.X(), offset.W(), UXTW);
Register tmp = temps->AcquireW();
assm->Add(tmp, offset.W(), offset_imm);
return MemOperand(addr.X(), tmp, UXTW);
}
return MemOperand(addr.X(), offset_imm);
}
enum class ShiftDirection : bool { kLeft, kRight };
enum class ShiftSign : bool { kSigned, kUnsigned };
template <ShiftDirection dir, ShiftSign sign = ShiftSign::kSigned>
inline void EmitSimdShift(LiftoffAssembler* assm, VRegister dst, VRegister lhs,
Register rhs, VectorFormat format) {
DCHECK_IMPLIES(dir == ShiftDirection::kLeft, sign == ShiftSign::kSigned);
DCHECK(dst.IsSameFormat(lhs));
DCHECK_EQ(dst.LaneCount(), LaneCountFromFormat(format));
UseScratchRegisterScope temps(assm);
VRegister tmp = temps.AcquireV(format);
Register shift = dst.Is2D() ? temps.AcquireX() : temps.AcquireW();
int mask = LaneSizeInBitsFromFormat(format) - 1;
assm->And(shift, rhs, mask);
assm->Dup(tmp, shift);
if (dir == ShiftDirection::kRight) {
assm->Neg(tmp, tmp);
}
if (sign == ShiftSign::kSigned) {
assm->Sshl(dst, lhs, tmp);
} else {
assm->Ushl(dst, lhs, tmp);
}
}
template <VectorFormat format, ShiftSign sign>
inline void EmitSimdShiftRightImmediate(LiftoffAssembler* assm, VRegister dst,
VRegister lhs, int32_t rhs) {
// Sshr and Ushr does not allow shifts to be 0, so check for that here.
int mask = LaneSizeInBitsFromFormat(format) - 1;
int32_t shift = rhs & mask;
if (!shift) {
if (dst != lhs) {
assm->Mov(dst, lhs);
}
return;
}
if (sign == ShiftSign::kSigned) {
assm->Sshr(dst, lhs, rhs & mask);
} else {
assm->Ushr(dst, lhs, rhs & mask);
}
}
inline void EmitAnyTrue(LiftoffAssembler* assm, LiftoffRegister dst,
LiftoffRegister src) {
// AnyTrue does not depend on the number of lanes, so we can use V4S for all.
UseScratchRegisterScope scope(assm);
VRegister temp = scope.AcquireV(kFormatS);
assm->Umaxv(temp, src.fp().V4S());
assm->Umov(dst.gp().W(), temp, 0);
assm->Cmp(dst.gp().W(), 0);
assm->Cset(dst.gp().W(), ne);
}
inline void EmitAllTrue(LiftoffAssembler* assm, LiftoffRegister dst,
LiftoffRegister src, VectorFormat format) {
UseScratchRegisterScope scope(assm);
VRegister temp = scope.AcquireV(ScalarFormatFromFormat(format));
assm->Uminv(temp, VRegister::Create(src.fp().code(), format));
assm->Umov(dst.gp().W(), temp, 0);
assm->Cmp(dst.gp().W(), 0);
assm->Cset(dst.gp().W(), ne);
}
} // namespace liftoff
int LiftoffAssembler::PrepareStackFrame() {
int offset = pc_offset();
InstructionAccurateScope scope(this, 1);
sub(sp, sp, 0);
return offset;
}
void LiftoffAssembler::PrepareTailCall(int num_callee_stack_params,
int stack_param_delta) {
UseScratchRegisterScope temps(this);
temps.Exclude(x16, x17);
// This is the previous stack pointer value (before we push the lr and the
// fp). We need to keep it to autenticate the lr and adjust the new stack
// pointer afterwards.
Add(x16, fp, 16);
// Load the fp and lr of the old frame, they will be pushed in the new frame
// during the actual call.
#ifdef V8_ENABLE_CONTROL_FLOW_INTEGRITY
Ldp(fp, x17, MemOperand(fp));
Autib1716();
Mov(lr, x17);
#else
Ldp(fp, lr, MemOperand(fp));
#endif
temps.Include(x17);
Register scratch = temps.AcquireX();
// Shift the whole frame upwards, except for fp and lr.
int slot_count = num_callee_stack_params;
for (int i = slot_count - 1; i >= 0; --i) {
ldr(scratch, MemOperand(sp, i * 8));
str(scratch, MemOperand(x16, (i - stack_param_delta) * 8));
}
// Set the new stack pointer.
Sub(sp, x16, stack_param_delta * 8);
}
void LiftoffAssembler::PatchPrepareStackFrame(int offset, int frame_size) {
static_assert(kStackSlotSize == kXRegSize,
"kStackSlotSize must equal kXRegSize");
// The stack pointer is required to be quadword aligned.
// Misalignment will cause a stack alignment fault.
frame_size = RoundUp(frame_size, kQuadWordSizeInBytes);
if (!IsImmAddSub(frame_size)) {
// Round the stack to a page to try to fit a add/sub immediate.
frame_size = RoundUp(frame_size, 0x1000);
if (!IsImmAddSub(frame_size)) {
// Stack greater than 4M! Because this is a quite improbable case, we
// just fallback to TurboFan.
bailout(kOtherReason, "Stack too big");
return;
}
}
#ifdef USE_SIMULATOR
// When using the simulator, deal with Liftoff which allocates the stack
// before checking it.
// TODO(arm): Remove this when the stack check mechanism will be updated.
if (frame_size > KB / 2) {
bailout(kOtherReason,
"Stack limited to 512 bytes to avoid a bug in StackCheck");
return;
}
#endif
PatchingAssembler patching_assembler(AssemblerOptions{},
buffer_start_ + offset, 1);
#if V8_OS_WIN
if (frame_size > kStackPageSize) {
// Generate OOL code (at the end of the function, where the current
// assembler is pointing) to do the explicit stack limit check (see
// https://docs.microsoft.com/en-us/previous-versions/visualstudio/
// visual-studio-6.0/aa227153(v=vs.60)).
// At the function start, emit a jump to that OOL code (from {offset} to
// {pc_offset()}).
int ool_offset = pc_offset() - offset;
patching_assembler.b(ool_offset >> kInstrSizeLog2);
// Now generate the OOL code.
Claim(frame_size, 1);
// Jump back to the start of the function (from {pc_offset()} to {offset +
// kInstrSize}).
int func_start_offset = offset + kInstrSize - pc_offset();
b(func_start_offset >> kInstrSizeLog2);
return;
}
#endif
patching_assembler.PatchSubSp(frame_size);
}
void LiftoffAssembler::FinishCode() { ForceConstantPoolEmissionWithoutJump(); }
void LiftoffAssembler::AbortCompilation() { AbortedCodeGeneration(); }
// static
constexpr int LiftoffAssembler::StaticStackFrameSize() {
return liftoff::kInstanceOffset;
}
int LiftoffAssembler::SlotSizeForType(ValueType type) {
// TODO(zhin): Unaligned access typically take additional cycles, we should do
// some performance testing to see how big an effect it will take.
switch (type.kind()) {
case ValueType::kS128:
return type.element_size_bytes();
default:
return kStackSlotSize;
}
}
bool LiftoffAssembler::NeedsAlignment(ValueType type) {
return type.kind() == ValueType::kS128 || type.is_reference_type();
}
void LiftoffAssembler::LoadConstant(LiftoffRegister reg, WasmValue value,
RelocInfo::Mode rmode) {
switch (value.type().kind()) {
case ValueType::kI32:
Mov(reg.gp().W(), Immediate(value.to_i32(), rmode));
break;
case ValueType::kI64:
Mov(reg.gp().X(), Immediate(value.to_i64(), rmode));
break;
case ValueType::kF32:
Fmov(reg.fp().S(), value.to_f32_boxed().get_scalar());
break;
case ValueType::kF64:
Fmov(reg.fp().D(), value.to_f64_boxed().get_scalar());
break;
default:
UNREACHABLE();
}
}
void LiftoffAssembler::LoadFromInstance(Register dst, int offset, int size) {
DCHECK_LE(0, offset);
Ldr(dst, liftoff::GetInstanceOperand());
DCHECK(size == 4 || size == 8);
if (size == 4) {
Ldr(dst.W(), MemOperand(dst, offset));
} else {
Ldr(dst, MemOperand(dst, offset));
}
}
void LiftoffAssembler::LoadTaggedPointerFromInstance(Register dst, int offset) {
DCHECK_LE(0, offset);
Ldr(dst, liftoff::GetInstanceOperand());
LoadTaggedPointerField(dst, MemOperand(dst, offset));
}
void LiftoffAssembler::SpillInstance(Register instance) {
Str(instance, liftoff::GetInstanceOperand());
}
void LiftoffAssembler::FillInstanceInto(Register dst) {
Ldr(dst, liftoff::GetInstanceOperand());
}
void LiftoffAssembler::LoadTaggedPointer(Register dst, Register src_addr,
Register offset_reg,
int32_t offset_imm,
LiftoffRegList pinned) {
UseScratchRegisterScope temps(this);
MemOperand src_op =
liftoff::GetMemOp(this, &temps, src_addr, offset_reg, offset_imm);
LoadTaggedPointerField(dst, src_op);
}
void LiftoffAssembler::StoreTaggedPointer(Register dst_addr,
int32_t offset_imm,
LiftoffRegister src,
LiftoffRegList pinned) {
// Store the value.
MemOperand dst_op(dst_addr, offset_imm);
StoreTaggedField(src.gp(), dst_op);
// The write barrier.
Label write_barrier;
Label exit;
CheckPageFlag(dst_addr, MemoryChunk::kPointersFromHereAreInterestingMask, eq,
&write_barrier);
b(&exit);
bind(&write_barrier);
JumpIfSmi(src.gp(), &exit);
if (COMPRESS_POINTERS_BOOL) {
DecompressTaggedPointer(src.gp(), src.gp());
}
CheckPageFlag(src.gp(), MemoryChunk::kPointersToHereAreInterestingMask, ne,
&exit);
CallRecordWriteStub(dst_addr, Operand(offset_imm), EMIT_REMEMBERED_SET,
kSaveFPRegs, wasm::WasmCode::kRecordWrite);
bind(&exit);
}
void LiftoffAssembler::Load(LiftoffRegister dst, Register src_addr,
Register offset_reg, uint32_t offset_imm,
LoadType type, LiftoffRegList pinned,
uint32_t* protected_load_pc, bool is_load_mem) {
UseScratchRegisterScope temps(this);
MemOperand src_op =
liftoff::GetMemOp(this, &temps, src_addr, offset_reg, offset_imm);
if (protected_load_pc) *protected_load_pc = pc_offset();
switch (type.value()) {
case LoadType::kI32Load8U:
case LoadType::kI64Load8U:
Ldrb(dst.gp().W(), src_op);
break;
case LoadType::kI32Load8S:
Ldrsb(dst.gp().W(), src_op);
break;
case LoadType::kI64Load8S:
Ldrsb(dst.gp().X(), src_op);
break;
case LoadType::kI32Load16U:
case LoadType::kI64Load16U:
Ldrh(dst.gp().W(), src_op);
break;
case LoadType::kI32Load16S:
Ldrsh(dst.gp().W(), src_op);
break;
case LoadType::kI64Load16S:
Ldrsh(dst.gp().X(), src_op);
break;
case LoadType::kI32Load:
case LoadType::kI64Load32U:
Ldr(dst.gp().W(), src_op);
break;
case LoadType::kI64Load32S:
Ldrsw(dst.gp().X(), src_op);
break;
case LoadType::kI64Load:
Ldr(dst.gp().X(), src_op);
break;
case LoadType::kF32Load:
Ldr(dst.fp().S(), src_op);
break;
case LoadType::kF64Load:
Ldr(dst.fp().D(), src_op);
break;
case LoadType::kS128Load:
Ldr(dst.fp().Q(), src_op);
break;
}
}
void LiftoffAssembler::Store(Register dst_addr, Register offset_reg,
uint32_t offset_imm, LiftoffRegister src,
StoreType type, LiftoffRegList pinned,
uint32_t* protected_store_pc, bool is_store_mem) {
UseScratchRegisterScope temps(this);
MemOperand dst_op =
liftoff::GetMemOp(this, &temps, dst_addr, offset_reg, offset_imm);
if (protected_store_pc) *protected_store_pc = pc_offset();
switch (type.value()) {
case StoreType::kI32Store8:
case StoreType::kI64Store8:
Strb(src.gp().W(), dst_op);
break;
case StoreType::kI32Store16:
case StoreType::kI64Store16:
Strh(src.gp().W(), dst_op);
break;
case StoreType::kI32Store:
case StoreType::kI64Store32:
Str(src.gp().W(), dst_op);
break;
case StoreType::kI64Store:
Str(src.gp().X(), dst_op);
break;
case StoreType::kF32Store:
Str(src.fp().S(), dst_op);
break;
case StoreType::kF64Store:
Str(src.fp().D(), dst_op);
break;
case StoreType::kS128Store:
Str(src.fp().Q(), dst_op);
break;
}
}
namespace liftoff {
#define __ lasm->
inline Register CalculateActualAddress(LiftoffAssembler* lasm,
Register addr_reg, Register offset_reg,
int32_t offset_imm,
Register result_reg) {
DCHECK_NE(offset_reg, no_reg);
DCHECK_NE(addr_reg, no_reg);
__ Add(result_reg, addr_reg, Operand(offset_reg));
if (offset_imm != 0) {
__ Add(result_reg, result_reg, Operand(offset_imm));
}
return result_reg;
}
enum class Binop { kAdd, kSub, kAnd, kOr, kXor, kExchange };
inline void AtomicBinop(LiftoffAssembler* lasm, Register dst_addr,
Register offset_reg, uint32_t offset_imm,
LiftoffRegister value, LiftoffRegister result,
StoreType type, Binop op) {
LiftoffRegList pinned =
LiftoffRegList::ForRegs(dst_addr, offset_reg, value, result);
Register store_result = pinned.set(__ GetUnusedRegister(kGpReg, pinned)).gp();
// Make sure that {result} is unique.
Register result_reg = result.gp();
if (result_reg == value.gp() || result_reg == dst_addr ||
result_reg == offset_reg) {
result_reg = __ GetUnusedRegister(kGpReg, pinned).gp();
}
UseScratchRegisterScope temps(lasm);
Register actual_addr = liftoff::CalculateActualAddress(
lasm, dst_addr, offset_reg, offset_imm, temps.AcquireX());
// Allocate an additional {temp} register to hold the result that should be
// stored to memory. Note that {temp} and {store_result} are not allowed to be
// the same register.
Register temp = temps.AcquireX();
Label retry;
__ Bind(&retry);
switch (type.value()) {
case StoreType::kI64Store8:
case StoreType::kI32Store8:
__ ldaxrb(result_reg.W(), actual_addr);
break;
case StoreType::kI64Store16:
case StoreType::kI32Store16:
__ ldaxrh(result_reg.W(), actual_addr);
break;
case StoreType::kI64Store32:
case StoreType::kI32Store:
__ ldaxr(result_reg.W(), actual_addr);
break;
case StoreType::kI64Store:
__ ldaxr(result_reg.X(), actual_addr);
break;
default:
UNREACHABLE();
}
switch (op) {
case Binop::kAdd:
__ add(temp, result_reg, value.gp());
break;
case Binop::kSub:
__ sub(temp, result_reg, value.gp());
break;
case Binop::kAnd:
__ and_(temp, result_reg, value.gp());
break;
case Binop::kOr:
__ orr(temp, result_reg, value.gp());
break;
case Binop::kXor:
__ eor(temp, result_reg, value.gp());
break;
case Binop::kExchange:
__ mov(temp, value.gp());
break;
}
switch (type.value()) {
case StoreType::kI64Store8:
case StoreType::kI32Store8:
__ stlxrb(store_result.W(), temp.W(), actual_addr);
break;
case StoreType::kI64Store16:
case StoreType::kI32Store16:
__ stlxrh(store_result.W(), temp.W(), actual_addr);
break;
case StoreType::kI64Store32:
case StoreType::kI32Store:
__ stlxr(store_result.W(), temp.W(), actual_addr);
break;
case StoreType::kI64Store:
__ stlxr(store_result.W(), temp.X(), actual_addr);
break;
default:
UNREACHABLE();
}
__ Cbnz(store_result.W(), &retry);
if (result_reg != result.gp()) {
__ mov(result.gp(), result_reg);
}
}
#undef __
} // namespace liftoff
void LiftoffAssembler::AtomicLoad(LiftoffRegister dst, Register src_addr,
Register offset_reg, uint32_t offset_imm,
LoadType type, LiftoffRegList pinned) {
UseScratchRegisterScope temps(this);
Register src_reg = liftoff::CalculateActualAddress(
this, src_addr, offset_reg, offset_imm, temps.AcquireX());
switch (type.value()) {
case LoadType::kI32Load8U:
case LoadType::kI64Load8U:
Ldarb(dst.gp().W(), src_reg);
return;
case LoadType::kI32Load16U:
case LoadType::kI64Load16U:
Ldarh(dst.gp().W(), src_reg);
return;
case LoadType::kI32Load:
case LoadType::kI64Load32U:
Ldar(dst.gp().W(), src_reg);
return;
case LoadType::kI64Load:
Ldar(dst.gp().X(), src_reg);
return;
default:
UNREACHABLE();
}
}
void LiftoffAssembler::AtomicStore(Register dst_addr, Register offset_reg,
uint32_t offset_imm, LiftoffRegister src,
StoreType type, LiftoffRegList pinned) {
UseScratchRegisterScope temps(this);
Register dst_reg = liftoff::CalculateActualAddress(
this, dst_addr, offset_reg, offset_imm, temps.AcquireX());
switch (type.value()) {
case StoreType::kI64Store8:
case StoreType::kI32Store8:
Stlrb(src.gp().W(), dst_reg);
return;
case StoreType::kI64Store16:
case StoreType::kI32Store16:
Stlrh(src.gp().W(), dst_reg);
return;
case StoreType::kI64Store32:
case StoreType::kI32Store:
Stlr(src.gp().W(), dst_reg);
return;
case StoreType::kI64Store:
Stlr(src.gp().X(), dst_reg);
return;
default:
UNREACHABLE();
}
}
void LiftoffAssembler::AtomicAdd(Register dst_addr, Register offset_reg,
uint32_t offset_imm, LiftoffRegister value,
LiftoffRegister result, StoreType type) {
liftoff::AtomicBinop(this, dst_addr, offset_reg, offset_imm, value, result,
type, liftoff::Binop::kAdd);
}
void LiftoffAssembler::AtomicSub(Register dst_addr, Register offset_reg,
uint32_t offset_imm, LiftoffRegister value,
LiftoffRegister result, StoreType type) {
liftoff::AtomicBinop(this, dst_addr, offset_reg, offset_imm, value, result,
type, liftoff::Binop::kSub);
}
void LiftoffAssembler::AtomicAnd(Register dst_addr, Register offset_reg,
uint32_t offset_imm, LiftoffRegister value,
LiftoffRegister result, StoreType type) {
liftoff::AtomicBinop(this, dst_addr, offset_reg, offset_imm, value, result,
type, liftoff::Binop::kAnd);
}
void LiftoffAssembler::AtomicOr(Register dst_addr, Register offset_reg,
uint32_t offset_imm, LiftoffRegister value,
LiftoffRegister result, StoreType type) {
liftoff::AtomicBinop(this, dst_addr, offset_reg, offset_imm, value, result,
type, liftoff::Binop::kOr);
}
void LiftoffAssembler::AtomicXor(Register dst_addr, Register offset_reg,
uint32_t offset_imm, LiftoffRegister value,
LiftoffRegister result, StoreType type) {
liftoff::AtomicBinop(this, dst_addr, offset_reg, offset_imm, value, result,
type, liftoff::Binop::kXor);
}
void LiftoffAssembler::AtomicExchange(Register dst_addr, Register offset_reg,
uint32_t offset_imm,
LiftoffRegister value,
LiftoffRegister result, StoreType type) {
liftoff::AtomicBinop(this, dst_addr, offset_reg, offset_imm, value, result,
type, liftoff::Binop::kExchange);
}
void LiftoffAssembler::AtomicCompareExchange(
Register dst_addr, Register offset_reg, uint32_t offset_imm,
LiftoffRegister expected, LiftoffRegister new_value, LiftoffRegister result,
StoreType type) {
LiftoffRegList pinned =
LiftoffRegList::ForRegs(dst_addr, offset_reg, expected, new_value);
Register result_reg = result.gp();
if (pinned.has(result)) {
result_reg = GetUnusedRegister(kGpReg, pinned).gp();
}
UseScratchRegisterScope temps(this);
Register actual_addr = liftoff::CalculateActualAddress(
this, dst_addr, offset_reg, offset_imm, temps.AcquireX());
Register store_result = temps.AcquireW();
Label retry;
Label done;
Bind(&retry);
switch (type.value()) {
case StoreType::kI64Store8:
case StoreType::kI32Store8:
ldaxrb(result_reg.W(), actual_addr);
Cmp(result.gp().W(), Operand(expected.gp().W(), UXTB));
B(ne, &done);
stlxrb(store_result.W(), new_value.gp().W(), actual_addr);
break;
case StoreType::kI64Store16:
case StoreType::kI32Store16:
ldaxrh(result_reg.W(), actual_addr);
Cmp(result.gp().W(), Operand(expected.gp().W(), UXTH));
B(ne, &done);
stlxrh(store_result.W(), new_value.gp().W(), actual_addr);
break;
case StoreType::kI64Store32:
case StoreType::kI32Store:
ldaxr(result_reg.W(), actual_addr);
Cmp(result.gp().W(), Operand(expected.gp().W(), UXTW));
B(ne, &done);
stlxr(store_result.W(), new_value.gp().W(), actual_addr);
break;
case StoreType::kI64Store:
ldaxr(result_reg.X(), actual_addr);
Cmp(result.gp().X(), Operand(expected.gp().X(), UXTX));
B(ne, &done);
stlxr(store_result.W(), new_value.gp().X(), actual_addr);
break;
default:
UNREACHABLE();
}
Cbnz(store_result.W(), &retry);
Bind(&done);
if (result_reg != result.gp()) {
mov(result.gp(), result_reg);
}
}
void LiftoffAssembler::AtomicFence() { Dmb(InnerShareable, BarrierAll); }
void LiftoffAssembler::LoadCallerFrameSlot(LiftoffRegister dst,
uint32_t caller_slot_idx,
ValueType type) {
int32_t offset = (caller_slot_idx + 1) * LiftoffAssembler::kStackSlotSize;
Ldr(liftoff::GetRegFromType(dst, type), MemOperand(fp, offset));
}
void LiftoffAssembler::StoreCallerFrameSlot(LiftoffRegister src,
uint32_t caller_slot_idx,
ValueType type) {
int32_t offset = (caller_slot_idx + 1) * LiftoffAssembler::kStackSlotSize;
Str(liftoff::GetRegFromType(src, type), MemOperand(fp, offset));
}
void LiftoffAssembler::LoadReturnStackSlot(LiftoffRegister dst, int offset,
ValueType type) {
Ldr(liftoff::GetRegFromType(dst, type), MemOperand(sp, offset));
}
void LiftoffAssembler::MoveStackValue(uint32_t dst_offset, uint32_t src_offset,
ValueType type) {
UseScratchRegisterScope temps(this);
CPURegister scratch = liftoff::AcquireByType(&temps, type);
Ldr(scratch, liftoff::GetStackSlot(src_offset));
Str(scratch, liftoff::GetStackSlot(dst_offset));
}
void LiftoffAssembler::Move(Register dst, Register src, ValueType type) {
if (type == kWasmI32) {
Mov(dst.W(), src.W());
} else {
DCHECK(kWasmI64 == type || type.is_reference_type());
Mov(dst.X(), src.X());
}
}
void LiftoffAssembler::Move(DoubleRegister dst, DoubleRegister src,
ValueType type) {
if (type == kWasmF32) {
Fmov(dst.S(), src.S());
} else if (type == kWasmF64) {
Fmov(dst.D(), src.D());
} else {
DCHECK_EQ(kWasmS128, type);
Mov(dst.Q(), src.Q());
}
}
void LiftoffAssembler::Spill(int offset, LiftoffRegister reg, ValueType type) {
RecordUsedSpillOffset(offset);
MemOperand dst = liftoff::GetStackSlot(offset);
Str(liftoff::GetRegFromType(reg, type), dst);
}
void LiftoffAssembler::Spill(int offset, WasmValue value) {
RecordUsedSpillOffset(offset);
MemOperand dst = liftoff::GetStackSlot(offset);
UseScratchRegisterScope temps(this);
CPURegister src = CPURegister::no_reg();
switch (value.type().kind()) {
case ValueType::kI32:
if (value.to_i32() == 0) {
src = wzr;
} else {
src = temps.AcquireW();
Mov(src.W(), value.to_i32());
}
break;
case ValueType::kI64:
if (value.to_i64() == 0) {
src = xzr;
} else {
src = temps.AcquireX();
Mov(src.X(), value.to_i64());
}
break;
default:
// We do not track f32 and f64 constants, hence they are unreachable.
UNREACHABLE();
}
Str(src, dst);
}
void LiftoffAssembler::Fill(LiftoffRegister reg, int offset, ValueType type) {
MemOperand src = liftoff::GetStackSlot(offset);
Ldr(liftoff::GetRegFromType(reg, type), src);
}
void LiftoffAssembler::FillI64Half(Register, int offset, RegPairHalf) {
UNREACHABLE();
}
void LiftoffAssembler::FillStackSlotsWithZero(int start, int size) {
// Zero 'size' bytes *below* start, byte at offset 'start' is untouched.
DCHECK_LE(0, start);
DCHECK_LT(0, size);
DCHECK_EQ(0, size % 4);
RecordUsedSpillOffset(start + size);
int max_stp_offset = -start - size;
// We check IsImmLSUnscaled(-start-12) because str only allows for unscaled
// 9-bit immediate offset [-256,256]. If start is large enough, which can
// happen when a function has many params (>=32 i64), str cannot be encoded
// properly. We can use Str, which will generate more instructions, so
// fallback to the general case below.
if (size <= 12 * kStackSlotSize &&
IsImmLSPair(max_stp_offset, kXRegSizeLog2) &&
IsImmLSUnscaled(-start - 12)) {
// Special straight-line code for up to 12 slots. Generates one
// instruction per two slots (<= 7 instructions total).
STATIC_ASSERT(kStackSlotSize == kSystemPointerSize);
uint32_t remainder = size;
for (; remainder >= 2 * kStackSlotSize; remainder -= 2 * kStackSlotSize) {
stp(xzr, xzr, liftoff::GetStackSlot(start + remainder));
}
DCHECK_GE(12, remainder);
switch (remainder) {
case 12:
str(xzr, liftoff::GetStackSlot(start + remainder));
str(wzr, liftoff::GetStackSlot(start + remainder - 8));
break;
case 8:
str(xzr, liftoff::GetStackSlot(start + remainder));
break;
case 4:
str(wzr, liftoff::GetStackSlot(start + remainder));
break;
case 0:
break;
default:
UNREACHABLE();
}
} else {
// General case for bigger counts (5-8 instructions).
UseScratchRegisterScope temps(this);
Register address_reg = temps.AcquireX();
// This {Sub} might use another temp register if the offset is too large.
Sub(address_reg, fp, start + size);
Register count_reg = temps.AcquireX();
Mov(count_reg, size / 4);
Label loop;
bind(&loop);
sub(count_reg, count_reg, 1);
str(wzr, MemOperand(address_reg, kSystemPointerSize / 2, PostIndex));
cbnz(count_reg, &loop);
}
}
#define I32_BINOP(name, instruction) \
void LiftoffAssembler::emit_##name(Register dst, Register lhs, \
Register rhs) { \
instruction(dst.W(), lhs.W(), rhs.W()); \
}
#define I32_BINOP_I(name, instruction) \
I32_BINOP(name, instruction) \
void LiftoffAssembler::emit_##name##i(Register dst, Register lhs, \
int32_t imm) { \
instruction(dst.W(), lhs.W(), Immediate(imm)); \
}
#define I64_BINOP(name, instruction) \
void LiftoffAssembler::emit_##name(LiftoffRegister dst, LiftoffRegister lhs, \
LiftoffRegister rhs) { \
instruction(dst.gp().X(), lhs.gp().X(), rhs.gp().X()); \
}
#define I64_BINOP_I(name, instruction) \
I64_BINOP(name, instruction) \
void LiftoffAssembler::emit_##name##i(LiftoffRegister dst, \
LiftoffRegister lhs, int32_t imm) { \
instruction(dst.gp().X(), lhs.gp().X(), imm); \
}
#define FP32_BINOP(name, instruction) \
void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister lhs, \
DoubleRegister rhs) { \
instruction(dst.S(), lhs.S(), rhs.S()); \
}
#define FP32_UNOP(name, instruction) \
void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister src) { \
instruction(dst.S(), src.S()); \
}
#define FP32_UNOP_RETURN_TRUE(name, instruction) \
bool LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister src) { \
instruction(dst.S(), src.S()); \
return true; \
}
#define FP64_BINOP(name, instruction) \
void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister lhs, \
DoubleRegister rhs) { \
instruction(dst.D(), lhs.D(), rhs.D()); \
}
#define FP64_UNOP(name, instruction) \
void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister src) { \
instruction(dst.D(), src.D()); \
}
#define FP64_UNOP_RETURN_TRUE(name, instruction) \
bool LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister src) { \
instruction(dst.D(), src.D()); \
return true; \
}
#define I32_SHIFTOP(name, instruction) \
void LiftoffAssembler::emit_##name(Register dst, Register src, \
Register amount) { \
instruction(dst.W(), src.W(), amount.W()); \
} \
void LiftoffAssembler::emit_##name##i(Register dst, Register src, \
int32_t amount) { \
instruction(dst.W(), src.W(), amount & 31); \
}
#define I64_SHIFTOP(name, instruction) \
void LiftoffAssembler::emit_##name(LiftoffRegister dst, LiftoffRegister src, \
Register amount) { \
instruction(dst.gp().X(), src.gp().X(), amount.X()); \
} \
void LiftoffAssembler::emit_##name##i(LiftoffRegister dst, \
LiftoffRegister src, int32_t amount) { \
instruction(dst.gp().X(), src.gp().X(), amount & 63); \
}
I32_BINOP_I(i32_add, Add)
I32_BINOP(i32_sub, Sub)
I32_BINOP(i32_mul, Mul)
I32_BINOP_I(i32_and, And)
I32_BINOP_I(i32_or, Orr)
I32_BINOP_I(i32_xor, Eor)
I32_SHIFTOP(i32_shl, Lsl)
I32_SHIFTOP(i32_sar, Asr)
I32_SHIFTOP(i32_shr, Lsr)
I64_BINOP_I(i64_add, Add)
I64_BINOP(i64_sub, Sub)
I64_BINOP(i64_mul, Mul)
I64_BINOP_I(i64_and, And)
I64_BINOP_I(i64_or, Orr)
I64_BINOP_I(i64_xor, Eor)
I64_SHIFTOP(i64_shl, Lsl)
I64_SHIFTOP(i64_sar, Asr)
I64_SHIFTOP(i64_shr, Lsr)
FP32_BINOP(f32_add, Fadd)
FP32_BINOP(f32_sub, Fsub)
FP32_BINOP(f32_mul, Fmul)
FP32_BINOP(f32_div, Fdiv)
FP32_BINOP(f32_min, Fmin)
FP32_BINOP(f32_max, Fmax)
FP32_UNOP(f32_abs, Fabs)
FP32_UNOP(f32_neg, Fneg)
FP32_UNOP_RETURN_TRUE(f32_ceil, Frintp)
FP32_UNOP_RETURN_TRUE(f32_floor, Frintm)
FP32_UNOP_RETURN_TRUE(f32_trunc, Frintz)
FP32_UNOP_RETURN_TRUE(f32_nearest_int, Frintn)
FP32_UNOP(f32_sqrt, Fsqrt)
FP64_BINOP(f64_add, Fadd)
FP64_BINOP(f64_sub, Fsub)
FP64_BINOP(f64_mul, Fmul)
FP64_BINOP(f64_div, Fdiv)
FP64_BINOP(f64_min, Fmin)
FP64_BINOP(f64_max, Fmax)
FP64_UNOP(f64_abs, Fabs)
FP64_UNOP(f64_neg, Fneg)
FP64_UNOP_RETURN_TRUE(f64_ceil, Frintp)
FP64_UNOP_RETURN_TRUE(f64_floor, Frintm)
FP64_UNOP_RETURN_TRUE(f64_trunc, Frintz)
FP64_UNOP_RETURN_TRUE(f64_nearest_int, Frintn)
FP64_UNOP(f64_sqrt, Fsqrt)
#undef I32_BINOP
#undef I64_BINOP
#undef FP32_BINOP
#undef FP32_UNOP
#undef FP64_BINOP
#undef FP64_UNOP
#undef FP64_UNOP_RETURN_TRUE
#undef I32_SHIFTOP
#undef I64_SHIFTOP
void LiftoffAssembler::emit_i32_clz(Register dst, Register src) {
Clz(dst.W(), src.W());
}
void LiftoffAssembler::emit_i32_ctz(Register dst, Register src) {
Rbit(dst.W(), src.W());
Clz(dst.W(), dst.W());
}
bool LiftoffAssembler::emit_i32_popcnt(Register dst, Register src) {
UseScratchRegisterScope temps(this);
VRegister scratch = temps.AcquireV(kFormat8B);
Fmov(scratch.S(), src.W());
Cnt(scratch, scratch);
Addv(scratch.B(), scratch);
Fmov(dst.W(), scratch.S());
return true;
}
void LiftoffAssembler::emit_i64_clz(LiftoffRegister dst, LiftoffRegister src) {
Clz(dst.gp().X(), src.gp().X());
}
void LiftoffAssembler::emit_i64_ctz(LiftoffRegister dst, LiftoffRegister src) {
Rbit(dst.gp().X(), src.gp().X());
Clz(dst.gp().X(), dst.gp().X());
}
bool LiftoffAssembler::emit_i64_popcnt(LiftoffRegister dst,
LiftoffRegister src) {
UseScratchRegisterScope temps(this);
VRegister scratch = temps.AcquireV(kFormat8B);
Fmov(scratch.D(), src.gp().X());
Cnt(scratch, scratch);
Addv(scratch.B(), scratch);
Fmov(dst.gp().X(), scratch.D());
return true;
}
void LiftoffAssembler::emit_i32_divs(Register dst, Register lhs, Register rhs,
Label* trap_div_by_zero,
Label* trap_div_unrepresentable) {
Register dst_w = dst.W();
Register lhs_w = lhs.W();
Register rhs_w = rhs.W();
bool can_use_dst = !dst_w.Aliases(lhs_w) && !dst_w.Aliases(rhs_w);
if (can_use_dst) {
// Do div early.
Sdiv(dst_w, lhs_w, rhs_w);
}
// Check for division by zero.
Cbz(rhs_w, trap_div_by_zero);
// Check for kMinInt / -1. This is unrepresentable.
Cmp(rhs_w, -1);
Ccmp(lhs_w, 1, NoFlag, eq);
B(trap_div_unrepresentable, vs);
if (!can_use_dst) {
// Do div.
Sdiv(dst_w, lhs_w, rhs_w);
}
}
void LiftoffAssembler::emit_i32_divu(Register dst, Register lhs, Register rhs,
Label* trap_div_by_zero) {
// Check for division by zero.
Cbz(rhs.W(), trap_div_by_zero);
// Do div.
Udiv(dst.W(), lhs.W(), rhs.W());
}
void LiftoffAssembler::emit_i32_rems(Register dst, Register lhs, Register rhs,
Label* trap_div_by_zero) {
Register dst_w = dst.W();
Register lhs_w = lhs.W();
Register rhs_w = rhs.W();
// Do early div.
// No need to check kMinInt / -1 because the result is kMinInt and then
// kMinInt * -1 -> kMinInt. In this case, the Msub result is therefore 0.
UseScratchRegisterScope temps(this);
Register scratch = temps.AcquireW();
Sdiv(scratch, lhs_w, rhs_w);
// Check for division by zero.
Cbz(rhs_w, trap_div_by_zero);
// Compute remainder.
Msub(dst_w, scratch, rhs_w, lhs_w);
}
void LiftoffAssembler::emit_i32_remu(Register dst, Register lhs, Register rhs,
Label* trap_div_by_zero) {
Register dst_w = dst.W();
Register lhs_w = lhs.W();
Register rhs_w = rhs.W();
// Do early div.
UseScratchRegisterScope temps(this);
Register scratch = temps.AcquireW();
Udiv(scratch, lhs_w, rhs_w);
// Check for division by zero.
Cbz(rhs_w, trap_div_by_zero);
// Compute remainder.
Msub(dst_w, scratch, rhs_w, lhs_w);
}
bool LiftoffAssembler::emit_i64_divs(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs,
Label* trap_div_by_zero,
Label* trap_div_unrepresentable) {
Register dst_x = dst.gp().X();
Register lhs_x = lhs.gp().X();
Register rhs_x = rhs.gp().X();
bool can_use_dst = !dst_x.Aliases(lhs_x) && !dst_x.Aliases(rhs_x);
if (can_use_dst) {
// Do div early.
Sdiv(dst_x, lhs_x, rhs_x);
}
// Check for division by zero.
Cbz(rhs_x, trap_div_by_zero);
// Check for kMinInt / -1. This is unrepresentable.
Cmp(rhs_x, -1);
Ccmp(lhs_x, 1, NoFlag, eq);
B(trap_div_unrepresentable, vs);
if (!can_use_dst) {
// Do div.
Sdiv(dst_x, lhs_x, rhs_x);
}
return true;
}
bool LiftoffAssembler::emit_i64_divu(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs,
Label* trap_div_by_zero) {
// Check for division by zero.
Cbz(rhs.gp().X(), trap_div_by_zero);
// Do div.
Udiv(dst.gp().X(), lhs.gp().X(), rhs.gp().X());
return true;
}
bool LiftoffAssembler::emit_i64_rems(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs,
Label* trap_div_by_zero) {
Register dst_x = dst.gp().X();
Register lhs_x = lhs.gp().X();
Register rhs_x = rhs.gp().X();
// Do early div.
// No need to check kMinInt / -1 because the result is kMinInt and then
// kMinInt * -1 -> kMinInt. In this case, the Msub result is therefore 0.
UseScratchRegisterScope temps(this);
Register scratch = temps.AcquireX();
Sdiv(scratch, lhs_x, rhs_x);
// Check for division by zero.
Cbz(rhs_x, trap_div_by_zero);
// Compute remainder.
Msub(dst_x, scratch, rhs_x, lhs_x);
return true;
}
bool LiftoffAssembler::emit_i64_remu(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs,
Label* trap_div_by_zero) {
Register dst_x = dst.gp().X();
Register lhs_x = lhs.gp().X();
Register rhs_x = rhs.gp().X();
// Do early div.
UseScratchRegisterScope temps(this);
Register scratch = temps.AcquireX();
Udiv(scratch, lhs_x, rhs_x);
// Check for division by zero.
Cbz(rhs_x, trap_div_by_zero);
// Compute remainder.
Msub(dst_x, scratch, rhs_x, lhs_x);
return true;
}
void LiftoffAssembler::emit_u32_to_intptr(Register dst, Register src) {
Uxtw(dst, src);
}
void LiftoffAssembler::emit_f32_copysign(DoubleRegister dst, DoubleRegister lhs,
DoubleRegister rhs) {
UseScratchRegisterScope temps(this);
DoubleRegister scratch = temps.AcquireD();
Ushr(scratch.V2S(), rhs.V2S(), 31);
if (dst != lhs) {
Fmov(dst.S(), lhs.S());
}
Sli(dst.V2S(), scratch.V2S(), 31);
}
void LiftoffAssembler::emit_f64_copysign(DoubleRegister dst, DoubleRegister lhs,
DoubleRegister rhs) {
UseScratchRegisterScope temps(this);
DoubleRegister scratch = temps.AcquireD();
Ushr(scratch.V1D(), rhs.V1D(), 63);
if (dst != lhs) {
Fmov(dst.D(), lhs.D());
}
Sli(dst.V1D(), scratch.V1D(), 63);
}
bool LiftoffAssembler::emit_type_conversion(WasmOpcode opcode,
LiftoffRegister dst,
LiftoffRegister src, Label* trap) {
switch (opcode) {
case kExprI32ConvertI64:
if (src != dst) Mov(dst.gp().W(), src.gp().W());
return true;
case kExprI32SConvertF32:
Fcvtzs(dst.gp().W(), src.fp().S()); // f32 -> i32 round to zero.
// Check underflow and NaN.
Fcmp(src.fp().S(), static_cast<float>(INT32_MIN));
// Check overflow.
Ccmp(dst.gp().W(), -1, VFlag, ge);
B(trap, vs);
return true;
case kExprI32UConvertF32:
Fcvtzu(dst.gp().W(), src.fp().S()); // f32 -> i32 round to zero.
// Check underflow and NaN.
Fcmp(src.fp().S(), -1.0);
// Check overflow.
Ccmp(dst.gp().W(), -1, ZFlag, gt);
B(trap, eq);
return true;
case kExprI32SConvertF64: {
// INT32_MIN and INT32_MAX are valid results, we cannot test the result
// to detect the overflows. We could have done two immediate floating
// point comparisons but it would have generated two conditional branches.
UseScratchRegisterScope temps(this);
VRegister fp_ref = temps.AcquireD();
VRegister fp_cmp = temps.AcquireD();
Fcvtzs(dst.gp().W(), src.fp().D()); // f64 -> i32 round to zero.
Frintz(fp_ref, src.fp().D()); // f64 -> f64 round to zero.
Scvtf(fp_cmp, dst.gp().W()); // i32 -> f64.
// If comparison fails, we have an overflow or a NaN.
Fcmp(fp_cmp, fp_ref);
B(trap, ne);
return true;
}
case kExprI32UConvertF64: {
// INT32_MAX is a valid result, we cannot test the result to detect the
// overflows. We could have done two immediate floating point comparisons
// but it would have generated two conditional branches.
UseScratchRegisterScope temps(this);
VRegister fp_ref = temps.AcquireD();
VRegister fp_cmp = temps.AcquireD();
Fcvtzu(dst.gp().W(), src.fp().D()); // f64 -> i32 round to zero.
Frintz(fp_ref, src.fp().D()); // f64 -> f64 round to zero.
Ucvtf(fp_cmp, dst.gp().W()); // i32 -> f64.
// If comparison fails, we have an overflow or a NaN.
Fcmp(fp_cmp, fp_ref);
B(trap, ne);
return true;
}
case kExprI32SConvertSatF32:
Fcvtzs(dst.gp().W(), src.fp().S());
return true;
case kExprI32UConvertSatF32:
Fcvtzu(dst.gp().W(), src.fp().S());
return true;
case kExprI32SConvertSatF64:
Fcvtzs(dst.gp().W(), src.fp().D());
return true;
case kExprI32UConvertSatF64:
Fcvtzu(dst.gp().W(), src.fp().D());
return true;
case kExprI64SConvertSatF32:
Fcvtzs(dst.gp().X(), src.fp().S());
return true;
case kExprI64UConvertSatF32:
Fcvtzu(dst.gp().X(), src.fp().S());
return true;
case kExprI64SConvertSatF64:
Fcvtzs(dst.gp().X(), src.fp().D());
return true;
case kExprI64UConvertSatF64:
Fcvtzu(dst.gp().X(), src.fp().D());
return true;
case kExprI32ReinterpretF32:
Fmov(dst.gp().W(), src.fp().S());
return true;
case kExprI64SConvertI32:
Sxtw(dst.gp().X(), src.gp().W());
return true;
case kExprI64SConvertF32:
Fcvtzs(dst.gp().X(), src.fp().S()); // f32 -> i64 round to zero.
// Check underflow and NaN.
Fcmp(src.fp().S(), static_cast<float>(INT64_MIN));
// Check overflow.
Ccmp(dst.gp().X(), -1, VFlag, ge);
B(trap, vs);
return true;
case kExprI64UConvertF32:
Fcvtzu(dst.gp().X(), src.fp().S()); // f32 -> i64 round to zero.
// Check underflow and NaN.
Fcmp(src.fp().S(), -1.0);
// Check overflow.
Ccmp(dst.gp().X(), -1, ZFlag, gt);
B(trap, eq);
return true;
case kExprI64SConvertF64:
Fcvtzs(dst.gp().X(), src.fp().D()); // f64 -> i64 round to zero.
// Check underflow and NaN.
Fcmp(src.fp().D(), static_cast<float>(INT64_MIN));
// Check overflow.
Ccmp(dst.gp().X(), -1, VFlag, ge);
B(trap, vs);
return true;
case kExprI64UConvertF64:
Fcvtzu(dst.gp().X(), src.fp().D()); // f64 -> i64 round to zero.
// Check underflow and NaN.
Fcmp(src.fp().D(), -1.0);
// Check overflow.
Ccmp(dst.gp().X(), -1, ZFlag, gt);
B(trap, eq);
return true;
case kExprI64UConvertI32:
Mov(dst.gp().W(), src.gp().W());
return true;
case kExprI64ReinterpretF64:
Fmov(dst.gp().X(), src.fp().D());
return true;
case kExprF32SConvertI32:
Scvtf(dst.fp().S(), src.gp().W());
return true;
case kExprF32UConvertI32:
Ucvtf(dst.fp().S(), src.gp().W());
return true;
case kExprF32SConvertI64:
Scvtf(dst.fp().S(), src.gp().X());
return true;
case kExprF32UConvertI64:
Ucvtf(dst.fp().S(), src.gp().X());
return true;
case kExprF32ConvertF64:
Fcvt(dst.fp().S(), src.fp().D());
return true;
case kExprF32ReinterpretI32:
Fmov(dst.fp().S(), src.gp().W());
return true;
case kExprF64SConvertI32:
Scvtf(dst.fp().D(), src.gp().W());
return true;
case kExprF64UConvertI32:
Ucvtf(dst.fp().D(), src.gp().W());
return true;
case kExprF64SConvertI64:
Scvtf(dst.fp().D(), src.gp().X());
return true;
case kExprF64UConvertI64:
Ucvtf(dst.fp().D(), src.gp().X());
return true;
case kExprF64ConvertF32:
Fcvt(dst.fp().D(), src.fp().S());
return true;
case kExprF64ReinterpretI64:
Fmov(dst.fp().D(), src.gp().X());
return true;
default:
UNREACHABLE();
}
}
void LiftoffAssembler::emit_i32_signextend_i8(Register dst, Register src) {
sxtb(dst, src);
}
void LiftoffAssembler::emit_i32_signextend_i16(Register dst, Register src) {
sxth(dst, src);
}
void LiftoffAssembler::emit_i64_signextend_i8(LiftoffRegister dst,
LiftoffRegister src) {
sxtb(dst.gp(), src.gp());
}
void LiftoffAssembler::emit_i64_signextend_i16(LiftoffRegister dst,
LiftoffRegister src) {
sxth(dst.gp(), src.gp());
}
void LiftoffAssembler::emit_i64_signextend_i32(LiftoffRegister dst,
LiftoffRegister src) {
sxtw(dst.gp(), src.gp());
}
void LiftoffAssembler::emit_jump(Label* label) { B(label); }
void LiftoffAssembler::emit_jump(Register target) { Br(target); }
void LiftoffAssembler::emit_cond_jump(Condition cond, Label* label,
ValueType type, Register lhs,
Register rhs) {
switch (type.kind()) {
case ValueType::kI32:
if (rhs.is_valid()) {
Cmp(lhs.W(), rhs.W());
} else {
Cmp(lhs.W(), wzr);
}
break;
case ValueType::kI64:
if (rhs.is_valid()) {
Cmp(lhs.X(), rhs.X());
} else {
Cmp(lhs.X(), xzr);
}
break;
default:
UNREACHABLE();
}
B(label, cond);
}
void LiftoffAssembler::emit_i32_eqz(Register dst, Register src) {
Cmp(src.W(), wzr);
Cset(dst.W(), eq);
}
void LiftoffAssembler::emit_i32_set_cond(Condition cond, Register dst,
Register lhs, Register rhs) {
Cmp(lhs.W(), rhs.W());
Cset(dst.W(), cond);
}
void LiftoffAssembler::emit_i64_eqz(Register dst, LiftoffRegister src) {
Cmp(src.gp().X(), xzr);
Cset(dst.W(), eq);
}
void LiftoffAssembler::emit_i64_set_cond(Condition cond, Register dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Cmp(lhs.gp().X(), rhs.gp().X());
Cset(dst.W(), cond);
}
void LiftoffAssembler::emit_f32_set_cond(Condition cond, Register dst,
DoubleRegister lhs,
DoubleRegister rhs) {
Fcmp(lhs.S(), rhs.S());
Cset(dst.W(), cond);
if (cond != ne) {
// If V flag set, at least one of the arguments was a Nan -> false.
Csel(dst.W(), wzr, dst.W(), vs);
}
}
void LiftoffAssembler::emit_f64_set_cond(Condition cond, Register dst,
DoubleRegister lhs,
DoubleRegister rhs) {
Fcmp(lhs.D(), rhs.D());
Cset(dst.W(), cond);
if (cond != ne) {
// If V flag set, at least one of the arguments was a Nan -> false.
Csel(dst.W(), wzr, dst.W(), vs);
}
}
bool LiftoffAssembler::emit_select(LiftoffRegister dst, Register condition,
LiftoffRegister true_value,
LiftoffRegister false_value,
ValueType type) {
return false;
}
void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr,
Register offset_reg, uint32_t offset_imm,
LoadType type,
LoadTransformationKind transform,
uint32_t* protected_load_pc) {
UseScratchRegisterScope temps(this);
MemOperand src_op =
liftoff::GetMemOp(this, &temps, src_addr, offset_reg, offset_imm);
*protected_load_pc = pc_offset();
MachineType memtype = type.mem_type();
if (transform == LoadTransformationKind::kExtend) {
if (memtype == MachineType::Int8()) {
Ldr(dst.fp().D(), src_op);
Sxtl(dst.fp().V8H(), dst.fp().V8B());
} else if (memtype == MachineType::Uint8()) {
Ldr(dst.fp().D(), src_op);
Uxtl(dst.fp().V8H(), dst.fp().V8B());
} else if (memtype == MachineType::Int16()) {
Ldr(dst.fp().D(), src_op);
Sxtl(dst.fp().V4S(), dst.fp().V4H());
} else if (memtype == MachineType::Uint16()) {
Ldr(dst.fp().D(), src_op);
Uxtl(dst.fp().V4S(), dst.fp().V4H());
} else if (memtype == MachineType::Int32()) {
Ldr(dst.fp().D(), src_op);
Sxtl(dst.fp().V2D(), dst.fp().V2S());
} else if (memtype == MachineType::Uint32()) {
Ldr(dst.fp().D(), src_op);
Uxtl(dst.fp().V2D(), dst.fp().V2S());
}
} else if (transform == LoadTransformationKind::kZeroExtend) {
if (memtype == MachineType::Int32()) {
Ldr(dst.fp().S(), src_op);
} else {
DCHECK_EQ(MachineType::Int64(), memtype);
Ldr(dst.fp().D(), src_op);
}
} else {
// ld1r only allows no offset or post-index, so emit an add.
DCHECK_EQ(LoadTransformationKind::kSplat, transform);
if (src_op.IsRegisterOffset()) {
// We have 2 tmp gps, so it's okay to acquire 1 more here, and actually
// doesn't matter if we acquire the same one.
Register tmp = temps.AcquireX();
Add(tmp, src_op.base(), src_op.regoffset().X());
src_op = MemOperand(tmp.X(), 0);
} else if (src_op.IsImmediateOffset() && src_op.offset() != 0) {
Register tmp = temps.AcquireX();
Add(tmp, src_op.base(), src_op.offset());
src_op = MemOperand(tmp.X(), 0);
}
if (memtype == MachineType::Int8()) {
ld1r(dst.fp().V16B(), src_op);
} else if (memtype == MachineType::Int16()) {
ld1r(dst.fp().V8H(), src_op);
} else if (memtype == MachineType::Int32()) {
ld1r(dst.fp().V4S(), src_op);
} else if (memtype == MachineType::Int64()) {
ld1r(dst.fp().V2D(), src_op);
}
}
}
void LiftoffAssembler::emit_i8x16_swizzle(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Tbl(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
}
void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
LiftoffRegister src) {
Dup(dst.fp().V2D(), src.fp().D(), 0);
}
void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
Mov(dst.fp().D(), lhs.fp().V2D(), imm_lane_idx);
}
void LiftoffAssembler::emit_f64x2_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx) {
if (dst != src1) {
Mov(dst.fp().V2D(), src1.fp().V2D());
}
Mov(dst.fp().V2D(), imm_lane_idx, src2.fp().V2D(), 0);
}
void LiftoffAssembler::emit_f64x2_abs(LiftoffRegister dst,
LiftoffRegister src) {
Fabs(dst.fp().V2D(), src.fp().V2D());
}
void LiftoffAssembler::emit_f64x2_neg(LiftoffRegister dst,
LiftoffRegister src) {
Fneg(dst.fp().V2D(), src.fp().V2D());
}
void LiftoffAssembler::emit_f64x2_sqrt(LiftoffRegister dst,
LiftoffRegister src) {
Fsqrt(dst.fp().V2D(), src.fp().V2D());
}
bool LiftoffAssembler::emit_f64x2_ceil(LiftoffRegister dst,
LiftoffRegister src) {
Frintp(dst.fp().V2D(), src.fp().V2D());
return true;
}
bool LiftoffAssembler::emit_f64x2_floor(LiftoffRegister dst,
LiftoffRegister src) {
Frintm(dst.fp().V2D(), src.fp().V2D());
return true;
}
bool LiftoffAssembler::emit_f64x2_trunc(LiftoffRegister dst,
LiftoffRegister src) {
Frintz(dst.fp().V2D(), src.fp().V2D());
return true;
}
bool LiftoffAssembler::emit_f64x2_nearest_int(LiftoffRegister dst,
LiftoffRegister src) {
Frintn(dst.fp().V2D(), src.fp().V2D());
return true;
}
void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Fadd(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
}
void LiftoffAssembler::emit_f64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Fsub(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
}
void LiftoffAssembler::emit_f64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Fmul(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
}
void LiftoffAssembler::emit_f64x2_div(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Fdiv(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
}
void LiftoffAssembler::emit_f64x2_min(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Fmin(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
}
void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Fmax(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
}
void LiftoffAssembler::emit_f64x2_pmin(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
UseScratchRegisterScope temps(this);
VRegister tmp = dst.fp();
if (dst == lhs || dst == rhs) {
tmp = temps.AcquireV(kFormat2D);
}
Fcmgt(tmp.V2D(), lhs.fp().V2D(), rhs.fp().V2D());
Bsl(tmp.V16B(), rhs.fp().V16B(), lhs.fp().V16B());
if (dst == lhs || dst == rhs) {
Mov(dst.fp().V2D(), tmp);
}
}
void LiftoffAssembler::emit_f64x2_pmax(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
UseScratchRegisterScope temps(this);
VRegister tmp = dst.fp();
if (dst == lhs || dst == rhs) {
tmp = temps.AcquireV(kFormat2D);
}
Fcmgt(tmp.V2D(), rhs.fp().V2D(), lhs.fp().V2D());
Bsl(tmp.V16B(), rhs.fp().V16B(), lhs.fp().V16B());
if (dst == lhs || dst == rhs) {
Mov(dst.fp().V2D(), tmp);
}
}
void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
LiftoffRegister src) {
Dup(dst.fp().V4S(), src.fp().S(), 0);
}
void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
Mov(dst.fp().S(), lhs.fp().V4S(), imm_lane_idx);
}
void LiftoffAssembler::emit_f32x4_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx) {
if (dst != src1) {
Mov(dst.fp().V4S(), src1.fp().V4S());
}
Mov(dst.fp().V4S(), imm_lane_idx, src2.fp().V4S(), 0);
}
void LiftoffAssembler::emit_f32x4_abs(LiftoffRegister dst,
LiftoffRegister src) {
Fabs(dst.fp().V4S(), src.fp().V4S());
}
void LiftoffAssembler::emit_f32x4_neg(LiftoffRegister dst,
LiftoffRegister src) {
Fneg(dst.fp().V4S(), src.fp().V4S());
}
void LiftoffAssembler::emit_f32x4_sqrt(LiftoffRegister dst,
LiftoffRegister src) {
Fsqrt(dst.fp().V4S(), src.fp().V4S());
}
bool LiftoffAssembler::emit_f32x4_ceil(LiftoffRegister dst,
LiftoffRegister src) {
Frintp(dst.fp().V4S(), src.fp().V4S());
return true;
}
bool LiftoffAssembler::emit_f32x4_floor(LiftoffRegister dst,
LiftoffRegister src) {
Frintm(dst.fp().V4S(), src.fp().V4S());
return true;
}
bool LiftoffAssembler::emit_f32x4_trunc(LiftoffRegister dst,
LiftoffRegister src) {
Frintz(dst.fp().V4S(), src.fp().V4S());
return true;
}
bool LiftoffAssembler::emit_f32x4_nearest_int(LiftoffRegister dst,
LiftoffRegister src) {
Frintn(dst.fp().V4S(), src.fp().V4S());
return true;
}
void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Fadd(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
}
void LiftoffAssembler::emit_f32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Fsub(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
}
void LiftoffAssembler::emit_f32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Fmul(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
}
void LiftoffAssembler::emit_f32x4_div(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Fdiv(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
}
void LiftoffAssembler::emit_f32x4_min(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Fmin(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
}
void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Fmax(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
}
void LiftoffAssembler::emit_f32x4_pmin(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
UseScratchRegisterScope temps(this);
VRegister tmp = dst.fp();
if (dst == lhs || dst == rhs) {
tmp = temps.AcquireV(kFormat4S);
}
Fcmgt(tmp.V4S(), lhs.fp().V4S(), rhs.fp().V4S());
Bsl(tmp.V16B(), rhs.fp().V16B(), lhs.fp().V16B());
if (dst == lhs || dst == rhs) {
Mov(dst.fp().V4S(), tmp);
}
}
void LiftoffAssembler::emit_f32x4_pmax(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
UseScratchRegisterScope temps(this);
VRegister tmp = dst.fp();
if (dst == lhs || dst == rhs) {
tmp = temps.AcquireV(kFormat4S);
}
Fcmgt(tmp.V4S(), rhs.fp().V4S(), lhs.fp().V4S());
Bsl(tmp.V16B(), rhs.fp().V16B(), lhs.fp().V16B());
if (dst == lhs || dst == rhs) {
Mov(dst.fp().V4S(), tmp);
}
}
void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
LiftoffRegister src) {
Dup(dst.fp().V2D(), src.gp().X());
}
void LiftoffAssembler::emit_i64x2_extract_lane(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
Mov(dst.gp().X(), lhs.fp().V2D(), imm_lane_idx);
}
void LiftoffAssembler::emit_i64x2_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx) {
if (dst != src1) {
Mov(dst.fp().V2D(), src1.fp().V2D());
}
Mov(dst.fp().V2D(), imm_lane_idx, src2.gp().X());
}
void LiftoffAssembler::emit_i64x2_neg(LiftoffRegister dst,
LiftoffRegister src) {
Neg(dst.fp().V2D(), src.fp().V2D());
}
void LiftoffAssembler::emit_i64x2_shl(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdShift<liftoff::ShiftDirection::kLeft>(
this, dst.fp().V2D(), lhs.fp().V2D(), rhs.gp(), kFormat2D);
}
void LiftoffAssembler::emit_i64x2_shli(LiftoffRegister dst, LiftoffRegister lhs,
int32_t rhs) {
Shl(dst.fp().V2D(), lhs.fp().V2D(), rhs & 63);
}
void LiftoffAssembler::emit_i64x2_shr_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdShift<liftoff::ShiftDirection::kRight,
liftoff::ShiftSign::kSigned>(
this, dst.fp().V2D(), lhs.fp().V2D(), rhs.gp(), kFormat2D);
}
void LiftoffAssembler::emit_i64x2_shri_s(LiftoffRegister dst,
LiftoffRegister lhs, int32_t rhs) {
liftoff::EmitSimdShiftRightImmediate<kFormat2D, liftoff::ShiftSign::kSigned>(
this, dst.fp().V2D(), lhs.fp().V2D(), rhs);
}
void LiftoffAssembler::emit_i64x2_shr_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdShift<liftoff::ShiftDirection::kRight,
liftoff::ShiftSign::kUnsigned>(
this, dst.fp().V2D(), lhs.fp().V2D(), rhs.gp(), kFormat2D);
}
void LiftoffAssembler::emit_i64x2_shri_u(LiftoffRegister dst,
LiftoffRegister lhs, int32_t rhs) {
liftoff::EmitSimdShiftRightImmediate<kFormat2D,
liftoff::ShiftSign::kUnsigned>(
this, dst.fp().V2D(), lhs.fp().V2D(), rhs);
}
void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Add(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
}
void LiftoffAssembler::emit_i64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Sub(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
}
void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
UseScratchRegisterScope temps(this);
VRegister tmp1 = temps.AcquireV(kFormat2D);
VRegister tmp2 = temps.AcquireV(kFormat2D);
// Algorithm copied from code-generator-arm64.cc with minor modifications:
// - 2 (max number of scratch registers in Liftoff) temporaries instead of 3
// - 1 more Umull instruction to calculate | cg | ae |,
// - so, we can no longer use Umlal in the last step, and use Add instead.
// Refer to comments there for details.
Xtn(tmp1.V2S(), lhs.fp().V2D());
Xtn(tmp2.V2S(), rhs.fp().V2D());
Umull(tmp1.V2D(), tmp1.V2S(), tmp2.V2S());
Rev64(tmp2.V4S(), rhs.fp().V4S());
Mul(tmp2.V4S(), tmp2.V4S(), lhs.fp().V4S());
Addp(tmp2.V4S(), tmp2.V4S(), tmp2.V4S());
Shll(dst.fp().V2D(), tmp2.V2S(), 32);
Add(dst.fp().V2D(), dst.fp().V2D(), tmp1.V2D());
}
void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
LiftoffRegister src) {
Dup(dst.fp().V4S(), src.gp().W());
}
void LiftoffAssembler::emit_i32x4_extract_lane(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
Mov(dst.gp().W(), lhs.fp().V4S(), imm_lane_idx);
}
void LiftoffAssembler::emit_i32x4_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx) {
if (dst != src1) {
Mov(dst.fp().V4S(), src1.fp().V4S());
}
Mov(dst.fp().V4S(), imm_lane_idx, src2.gp().W());
}
void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst,
LiftoffRegister src) {
Neg(dst.fp().V4S(), src.fp().V4S());
}
void LiftoffAssembler::emit_v32x4_anytrue(LiftoffRegister dst,
LiftoffRegister src) {
liftoff::EmitAnyTrue(this, dst, src);
}
void LiftoffAssembler::emit_v32x4_alltrue(LiftoffRegister dst,
LiftoffRegister src) {
liftoff::EmitAllTrue(this, dst, src, kFormat4S);
}
void LiftoffAssembler::emit_i32x4_bitmask(LiftoffRegister dst,
LiftoffRegister src) {
UseScratchRegisterScope temps(this);
VRegister tmp = temps.AcquireQ();
VRegister mask = temps.AcquireQ();
Sshr(tmp.V4S(), src.fp().V4S(), 31);
// Set i-th bit of each lane i. When AND with tmp, the lanes that
// are signed will have i-th bit set, unsigned will be 0.
Movi(mask.V2D(), 0x0000'0008'0000'0004, 0x0000'0002'0000'0001);
And(tmp.V16B(), mask.V16B(), tmp.V16B());
Addv(tmp.S(), tmp.V4S());
Mov(dst.gp().W(), tmp.V4S(), 0);
}
void LiftoffAssembler::emit_i32x4_shl(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdShift<liftoff::ShiftDirection::kLeft>(
this, dst.fp().V4S(), lhs.fp().V4S(), rhs.gp(), kFormat4S);
}
void LiftoffAssembler::emit_i32x4_shli(LiftoffRegister dst, LiftoffRegister lhs,
int32_t rhs) {
Shl(dst.fp().V4S(), lhs.fp().V4S(), rhs & 31);
}
void LiftoffAssembler::emit_i32x4_shr_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdShift<liftoff::ShiftDirection::kRight,
liftoff::ShiftSign::kSigned>(
this, dst.fp().V4S(), lhs.fp().V4S(), rhs.gp(), kFormat4S);
}
void LiftoffAssembler::emit_i32x4_shri_s(LiftoffRegister dst,
LiftoffRegister lhs, int32_t rhs) {
liftoff::EmitSimdShiftRightImmediate<kFormat4S, liftoff::ShiftSign::kSigned>(
this, dst.fp().V4S(), lhs.fp().V4S(), rhs);
}
void LiftoffAssembler::emit_i32x4_shr_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdShift<liftoff::ShiftDirection::kRight,
liftoff::ShiftSign::kUnsigned>(
this, dst.fp().V4S(), lhs.fp().V4S(), rhs.gp(), kFormat4S);
}
void LiftoffAssembler::emit_i32x4_shri_u(LiftoffRegister dst,
LiftoffRegister lhs, int32_t rhs) {
liftoff::EmitSimdShiftRightImmediate<kFormat4S,
liftoff::ShiftSign::kUnsigned>(
this, dst.fp().V4S(), lhs.fp().V4S(), rhs);
}
void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Add(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
}
void LiftoffAssembler::emit_i32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Sub(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
}
void LiftoffAssembler::emit_i32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Mul(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
}
void LiftoffAssembler::emit_i32x4_min_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Smin(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
}
void LiftoffAssembler::emit_i32x4_min_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Umin(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
}
void LiftoffAssembler::emit_i32x4_max_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Smax(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
}
void LiftoffAssembler::emit_i32x4_max_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Umax(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
}
void LiftoffAssembler::emit_i32x4_dot_i16x8_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
UseScratchRegisterScope scope(this);
VRegister tmp1 = scope.AcquireV(kFormat4S);
VRegister tmp2 = scope.AcquireV(kFormat4S);
Smull(tmp1, lhs.fp().V4H(), rhs.fp().V4H());
Smull2(tmp2, lhs.fp().V8H(), rhs.fp().V8H());
Addp(dst.fp().V4S(), tmp1, tmp2);
}
void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst,
LiftoffRegister src) {
Dup(dst.fp().V8H(), src.gp().W());
}
void LiftoffAssembler::emit_i16x8_extract_lane_u(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
Umov(dst.gp().W(), lhs.fp().V8H(), imm_lane_idx);
}
void LiftoffAssembler::emit_i16x8_extract_lane_s(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
Smov(dst.gp().W(), lhs.fp().V8H(), imm_lane_idx);
}
void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx) {
if (dst != src1) {
Mov(dst.fp().V8H(), src1.fp().V8H());
}
Mov(dst.fp().V8H(), imm_lane_idx, src2.gp().W());
}
void LiftoffAssembler::emit_i16x8_neg(LiftoffRegister dst,
LiftoffRegister src) {
Neg(dst.fp().V8H(), src.fp().V8H());
}
void LiftoffAssembler::emit_v16x8_anytrue(LiftoffRegister dst,
LiftoffRegister src) {
liftoff::EmitAnyTrue(this, dst, src);
}
void LiftoffAssembler::emit_v16x8_alltrue(LiftoffRegister dst,
LiftoffRegister src) {
liftoff::EmitAllTrue(this, dst, src, kFormat8H);
}
void LiftoffAssembler::emit_i16x8_bitmask(LiftoffRegister dst,
LiftoffRegister src) {
UseScratchRegisterScope temps(this);
VRegister tmp = temps.AcquireQ();
VRegister mask = temps.AcquireQ();
Sshr(tmp.V8H(), src.fp().V8H(), 15);
// Set i-th bit of each lane i. When AND with tmp, the lanes that
// are signed will have i-th bit set, unsigned will be 0.
Movi(mask.V2D(), 0x0080'0040'0020'0010, 0x0008'0004'0002'0001);
And(tmp.V16B(), mask.V16B(), tmp.V16B());
Addv(tmp.H(), tmp.V8H());
Mov(dst.gp().W(), tmp.V8H(), 0);
}
void LiftoffAssembler::emit_i16x8_shl(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdShift<liftoff::ShiftDirection::kLeft>(
this, dst.fp().V8H(), lhs.fp().V8H(), rhs.gp(), kFormat8H);
}
void LiftoffAssembler::emit_i16x8_shli(LiftoffRegister dst, LiftoffRegister lhs,
int32_t rhs) {
Shl(dst.fp().V8H(), lhs.fp().V8H(), rhs & 15);
}
void LiftoffAssembler::emit_i16x8_shr_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdShift<liftoff::ShiftDirection::kRight,
liftoff::ShiftSign::kSigned>(
this, dst.fp().V8H(), lhs.fp().V8H(), rhs.gp(), kFormat8H);
}
void LiftoffAssembler::emit_i16x8_shri_s(LiftoffRegister dst,
LiftoffRegister lhs, int32_t rhs) {
liftoff::EmitSimdShiftRightImmediate<kFormat8H, liftoff::ShiftSign::kSigned>(
this, dst.fp().V8H(), lhs.fp().V8H(), rhs);
}
void LiftoffAssembler::emit_i16x8_shr_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdShift<liftoff::ShiftDirection::kRight,
liftoff::ShiftSign::kUnsigned>(
this, dst.fp().V8H(), lhs.fp().V8H(), rhs.gp(), kFormat8H);
}
void LiftoffAssembler::emit_i16x8_shri_u(LiftoffRegister dst,
LiftoffRegister lhs, int32_t rhs) {
liftoff::EmitSimdShiftRightImmediate<kFormat8H,
liftoff::ShiftSign::kUnsigned>(
this, dst.fp().V8H(), lhs.fp().V8H(), rhs);
}
void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Add(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
}
void LiftoffAssembler::emit_i16x8_add_sat_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Sqadd(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
}
void LiftoffAssembler::emit_i16x8_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Sub(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
}
void LiftoffAssembler::emit_i16x8_sub_sat_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Sqsub(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
}
void LiftoffAssembler::emit_i16x8_sub_sat_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Uqsub(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
}
void LiftoffAssembler::emit_i16x8_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Mul(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
}
void LiftoffAssembler::emit_i16x8_add_sat_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Uqadd(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
}
void LiftoffAssembler::emit_i16x8_min_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Smin(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
}
void LiftoffAssembler::emit_i16x8_min_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Umin(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
}
void LiftoffAssembler::emit_i16x8_max_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Smax(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
}
void LiftoffAssembler::emit_i16x8_max_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Umax(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
}
void LiftoffAssembler::emit_i8x16_shuffle(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs,
const uint8_t shuffle[16],
bool is_swizzle) {
VRegister src1 = lhs.fp();
VRegister src2 = rhs.fp();
VRegister temp = dst.fp();
if (dst == lhs || dst == rhs) {
// dst overlaps with lhs or rhs, so we need a temporary.
temp = GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(lhs, rhs)).fp();
}
UseScratchRegisterScope scope(this);
if (src1 != src2 && !AreConsecutive(src1, src2)) {
// Tbl needs consecutive registers, which our scratch registers are.
src1 = scope.AcquireV(kFormat16B);
src2 = scope.AcquireV(kFormat16B);
DCHECK(AreConsecutive(src1, src2));
Mov(src1.Q(), lhs.fp().Q());
Mov(src2.Q(), rhs.fp().Q());
}
int64_t imms[2] = {0, 0};
for (int i = 7; i >= 0; i--) {
imms[0] = (imms[0] << 8) | (shuffle[i]);
imms[1] = (imms[1] << 8) | (shuffle[i + 8]);
}
DCHECK_EQ(0, (imms[0] | imms[1]) &
(lhs == rhs ? 0xF0F0F0F0F0F0F0F0 : 0xE0E0E0E0E0E0E0E0));
Movi(temp.V16B(), imms[1], imms[0]);
if (src1 == src2) {
Tbl(dst.fp().V16B(), src1.V16B(), temp.V16B());
} else {
Tbl(dst.fp().V16B(), src1.V16B(), src2.V16B(), temp.V16B());
}
}
void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
LiftoffRegister src) {
Dup(dst.fp().V16B(), src.gp().W());
}
void LiftoffAssembler::emit_i8x16_extract_lane_u(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
Umov(dst.gp().W(), lhs.fp().V16B(), imm_lane_idx);
}
void LiftoffAssembler::emit_i8x16_extract_lane_s(LiftoffRegister dst,
LiftoffRegister lhs,
uint8_t imm_lane_idx) {
Smov(dst.gp().W(), lhs.fp().V16B(), imm_lane_idx);
}
void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
uint8_t imm_lane_idx) {
if (dst != src1) {
Mov(dst.fp().V16B(), src1.fp().V16B());
}
Mov(dst.fp().V16B(), imm_lane_idx, src2.gp().W());
}
void LiftoffAssembler::emit_i8x16_neg(LiftoffRegister dst,
LiftoffRegister src) {
Neg(dst.fp().V16B(), src.fp().V16B());
}
void LiftoffAssembler::emit_v8x16_anytrue(LiftoffRegister dst,
LiftoffRegister src) {
liftoff::EmitAnyTrue(this, dst, src);
}
void LiftoffAssembler::emit_v8x16_alltrue(LiftoffRegister dst,
LiftoffRegister src) {
liftoff::EmitAllTrue(this, dst, src, kFormat16B);
}
void LiftoffAssembler::emit_i8x16_bitmask(LiftoffRegister dst,
LiftoffRegister src) {
UseScratchRegisterScope temps(this);
VRegister tmp = temps.AcquireQ();
VRegister mask = temps.AcquireQ();
// Set i-th bit of each lane i. When AND with tmp, the lanes that
// are signed will have i-th bit set, unsigned will be 0.
Sshr(tmp.V16B(), src.fp().V16B(), 7);
Movi(mask.V2D(), 0x8040'2010'0804'0201);
And(tmp.V16B(), mask.V16B(), tmp.V16B());
Ext(mask.V16B(), tmp.V16B(), tmp.V16B(), 8);
Zip1(tmp.V16B(), tmp.V16B(), mask.V16B());
Addv(tmp.H(), tmp.V8H());
Mov(dst.gp().W(), tmp.V8H(), 0);
}
void LiftoffAssembler::emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdShift<liftoff::ShiftDirection::kLeft>(
this, dst.fp().V16B(), lhs.fp().V16B(), rhs.gp(), kFormat16B);
}
void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs,
int32_t rhs) {
Shl(dst.fp().V16B(), lhs.fp().V16B(), rhs & 7);
}
void LiftoffAssembler::emit_i8x16_shr_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdShift<liftoff::ShiftDirection::kRight,
liftoff::ShiftSign::kSigned>(
this, dst.fp().V16B(), lhs.fp().V16B(), rhs.gp(), kFormat16B);
}
void LiftoffAssembler::emit_i8x16_shri_s(LiftoffRegister dst,
LiftoffRegister lhs, int32_t rhs) {
liftoff::EmitSimdShiftRightImmediate<kFormat16B, liftoff::ShiftSign::kSigned>(
this, dst.fp().V16B(), lhs.fp().V16B(), rhs);
}
void LiftoffAssembler::emit_i8x16_shr_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdShift<liftoff::ShiftDirection::kRight,
liftoff::ShiftSign::kUnsigned>(
this, dst.fp().V16B(), lhs.fp().V16B(), rhs.gp(), kFormat16B);
}
void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst,
LiftoffRegister lhs, int32_t rhs) {
liftoff::EmitSimdShiftRightImmediate<kFormat16B,
liftoff::ShiftSign::kUnsigned>(
this, dst.fp().V16B(), lhs.fp().V16B(), rhs);
}
void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Add(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
}
void LiftoffAssembler::emit_i8x16_add_sat_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Sqadd(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
}
void LiftoffAssembler::emit_i8x16_sub(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Sub(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
}
void LiftoffAssembler::emit_i8x16_sub_sat_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Sqsub(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
}
void LiftoffAssembler::emit_i8x16_sub_sat_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Uqsub(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
}
void LiftoffAssembler::emit_i8x16_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Mul(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
}
void LiftoffAssembler::emit_i8x16_add_sat_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Uqadd(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
}
void LiftoffAssembler::emit_i8x16_min_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Smin(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
}
void LiftoffAssembler::emit_i8x16_min_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Umin(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
}
void LiftoffAssembler::emit_i8x16_max_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Smax(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
}
void LiftoffAssembler::emit_i8x16_max_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Umax(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
}
void LiftoffAssembler::emit_i8x16_eq(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Cmeq(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
}
void LiftoffAssembler::emit_i8x16_ne(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Cmeq(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
Mvn(dst.fp().V16B(), dst.fp().V16B());
}
void LiftoffAssembler::emit_i8x16_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Cmgt(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
}
void LiftoffAssembler::emit_i8x16_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Cmhi(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
}
void LiftoffAssembler::emit_i8x16_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Cmge(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
}
void LiftoffAssembler::emit_i8x16_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Cmhs(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
}
void LiftoffAssembler::emit_i16x8_eq(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Cmeq(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
}
void LiftoffAssembler::emit_i16x8_ne(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Cmeq(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
Mvn(dst.fp().V8H(), dst.fp().V8H());
}
void LiftoffAssembler::emit_i16x8_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Cmgt(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
}
void LiftoffAssembler::emit_i16x8_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Cmhi(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
}
void LiftoffAssembler::emit_i16x8_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Cmge(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
}
void LiftoffAssembler::emit_i16x8_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Cmhs(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
}
void LiftoffAssembler::emit_i32x4_eq(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Cmeq(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
}
void LiftoffAssembler::emit_i32x4_ne(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Cmeq(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
Mvn(dst.fp().V4S(), dst.fp().V4S());
}
void LiftoffAssembler::emit_i32x4_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Cmgt(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
}
void LiftoffAssembler::emit_i32x4_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Cmhi(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
}
void LiftoffAssembler::emit_i32x4_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Cmge(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
}
void LiftoffAssembler::emit_i32x4_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Cmhs(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
}
void LiftoffAssembler::emit_f32x4_eq(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Fcmeq(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
}
void LiftoffAssembler::emit_f32x4_ne(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Fcmeq(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
Mvn(dst.fp().V4S(), dst.fp().V4S());
}
void LiftoffAssembler::emit_f32x4_lt(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Fcmgt(dst.fp().V4S(), rhs.fp().V4S(), lhs.fp().V4S());
}
void LiftoffAssembler::emit_f32x4_le(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Fcmge(dst.fp().V4S(), rhs.fp().V4S(), lhs.fp().V4S());
}
void LiftoffAssembler::emit_f64x2_eq(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Fcmeq(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
}
void LiftoffAssembler::emit_f64x2_ne(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Fcmeq(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
Mvn(dst.fp().V2D(), dst.fp().V2D());
}
void LiftoffAssembler::emit_f64x2_lt(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Fcmgt(dst.fp().V2D(), rhs.fp().V2D(), lhs.fp().V2D());
}
void LiftoffAssembler::emit_f64x2_le(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Fcmge(dst.fp().V2D(), rhs.fp().V2D(), lhs.fp().V2D());
}
void LiftoffAssembler::emit_s128_const(LiftoffRegister dst,
const uint8_t imms[16]) {
uint64_t vals[2];
memcpy(vals, imms, sizeof(vals));
Movi(dst.fp().V16B(), vals[1], vals[0]);
}
void LiftoffAssembler::emit_s128_not(LiftoffRegister dst, LiftoffRegister src) {
Mvn(dst.fp().V16B(), src.fp().V16B());
}
void LiftoffAssembler::emit_s128_and(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
And(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
}
void LiftoffAssembler::emit_s128_or(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Orr(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
}
void LiftoffAssembler::emit_s128_xor(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Eor(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
}
void LiftoffAssembler::emit_s128_select(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
LiftoffRegister mask) {
if (dst != mask) {
Mov(dst.fp().V16B(), mask.fp().V16B());
}
Bsl(dst.fp().V16B(), src1.fp().V16B(), src2.fp().V16B());
}
void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst,
LiftoffRegister src) {
Fcvtzs(dst.fp().V4S(), src.fp().V4S());
}
void LiftoffAssembler::emit_i32x4_uconvert_f32x4(LiftoffRegister dst,
LiftoffRegister src) {
Fcvtzu(dst.fp().V4S(), src.fp().V4S());
}
void LiftoffAssembler::emit_f32x4_sconvert_i32x4(LiftoffRegister dst,
LiftoffRegister src) {
Scvtf(dst.fp().V4S(), src.fp().V4S());
}
void LiftoffAssembler::emit_f32x4_uconvert_i32x4(LiftoffRegister dst,
LiftoffRegister src) {
Ucvtf(dst.fp().V4S(), src.fp().V4S());
}
void LiftoffAssembler::emit_i8x16_sconvert_i16x8(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
UseScratchRegisterScope temps(this);
VRegister tmp = temps.AcquireV(kFormat8H);
VRegister right = rhs.fp().V8H();
if (dst == rhs) {
Mov(tmp, right);
right = tmp;
}
Sqxtn(dst.fp().V8B(), lhs.fp().V8H());
Sqxtn2(dst.fp().V16B(), right);
}
void LiftoffAssembler::emit_i8x16_uconvert_i16x8(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
UseScratchRegisterScope temps(this);
VRegister tmp = temps.AcquireV(kFormat8H);
VRegister right = rhs.fp().V8H();
if (dst == rhs) {
Mov(tmp, right);
right = tmp;
}
Sqxtun(dst.fp().V8B(), lhs.fp().V8H());
Sqxtun2(dst.fp().V16B(), right);
}
void LiftoffAssembler::emit_i16x8_sconvert_i32x4(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
UseScratchRegisterScope temps(this);
VRegister tmp = temps.AcquireV(kFormat4S);
VRegister right = rhs.fp().V4S();
if (dst == rhs) {
Mov(tmp, right);
right = tmp;
}
Sqxtn(dst.fp().V4H(), lhs.fp().V4S());
Sqxtn2(dst.fp().V8H(), right);
}
void LiftoffAssembler::emit_i16x8_uconvert_i32x4(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
UseScratchRegisterScope temps(this);
VRegister tmp = temps.AcquireV(kFormat4S);
VRegister right = rhs.fp().V4S();
if (dst == rhs) {
Mov(tmp, right);
right = tmp;
}
Sqxtun(dst.fp().V4H(), lhs.fp().V4S());
Sqxtun2(dst.fp().V8H(), right);
}
void LiftoffAssembler::emit_i16x8_sconvert_i8x16_low(LiftoffRegister dst,
LiftoffRegister src) {
Sxtl(dst.fp().V8H(), src.fp().V8B());
}
void LiftoffAssembler::emit_i16x8_sconvert_i8x16_high(LiftoffRegister dst,
LiftoffRegister src) {
Sxtl2(dst.fp().V8H(), src.fp().V16B());
}
void LiftoffAssembler::emit_i16x8_uconvert_i8x16_low(LiftoffRegister dst,
LiftoffRegister src) {
Uxtl(dst.fp().V8H(), src.fp().V8B());
}
void LiftoffAssembler::emit_i16x8_uconvert_i8x16_high(LiftoffRegister dst,
LiftoffRegister src) {
Uxtl2(dst.fp().V8H(), src.fp().V16B());
}
void LiftoffAssembler::emit_i32x4_sconvert_i16x8_low(LiftoffRegister dst,
LiftoffRegister src) {
Sxtl(dst.fp().V4S(), src.fp().V4H());
}
void LiftoffAssembler::emit_i32x4_sconvert_i16x8_high(LiftoffRegister dst,
LiftoffRegister src) {
Sxtl2(dst.fp().V4S(), src.fp().V8H());
}
void LiftoffAssembler::emit_i32x4_uconvert_i16x8_low(LiftoffRegister dst,
LiftoffRegister src) {
Uxtl(dst.fp().V4S(), src.fp().V4H());
}
void LiftoffAssembler::emit_i32x4_uconvert_i16x8_high(LiftoffRegister dst,
LiftoffRegister src) {
Uxtl2(dst.fp().V4S(), src.fp().V8H());
}
void LiftoffAssembler::emit_s128_and_not(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Bic(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
}
void LiftoffAssembler::emit_i8x16_rounding_average_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Urhadd(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
}
void LiftoffAssembler::emit_i16x8_rounding_average_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
Urhadd(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
}
void LiftoffAssembler::emit_i8x16_abs(LiftoffRegister dst,
LiftoffRegister src) {
Abs(dst.fp().V16B(), src.fp().V16B());
}
void LiftoffAssembler::emit_i16x8_abs(LiftoffRegister dst,
LiftoffRegister src) {
Abs(dst.fp().V8H(), src.fp().V8H());
}
void LiftoffAssembler::emit_i32x4_abs(LiftoffRegister dst,
LiftoffRegister src) {
Abs(dst.fp().V4S(), src.fp().V4S());
}
void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) {
Ldr(limit_address, MemOperand(limit_address));
Cmp(sp, limit_address);
B(ool_code, ls);
}
void LiftoffAssembler::CallTrapCallbackForTesting() {
CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(), 0);
}
void LiftoffAssembler::AssertUnreachable(AbortReason reason) {
TurboAssembler::AssertUnreachable(reason);
}
void LiftoffAssembler::PushRegisters(LiftoffRegList regs) {
PushCPURegList(liftoff::PadRegList(regs.GetGpList()));
PushCPURegList(liftoff::PadVRegList(regs.GetFpList()));
}
void LiftoffAssembler::PopRegisters(LiftoffRegList regs) {
PopCPURegList(liftoff::PadVRegList(regs.GetFpList()));
PopCPURegList(liftoff::PadRegList(regs.GetGpList()));
}
void LiftoffAssembler::DropStackSlotsAndRet(uint32_t num_stack_slots) {
DropSlots(num_stack_slots);
Ret();
}
void LiftoffAssembler::CallC(const wasm::FunctionSig* sig,
const LiftoffRegister* args,
const LiftoffRegister* rets,
ValueType out_argument_type, int stack_bytes,
ExternalReference ext_ref) {
// The stack pointer is required to be quadword aligned.
int total_size = RoundUp(stack_bytes, kQuadWordSizeInBytes);
// Reserve space in the stack.
Claim(total_size, 1);
int arg_bytes = 0;
for (ValueType param_type : sig->parameters()) {
Poke(liftoff::GetRegFromType(*args++, param_type), arg_bytes);
arg_bytes += param_type.element_size_bytes();
}
DCHECK_LE(arg_bytes, stack_bytes);
// Pass a pointer to the buffer with the arguments to the C function.
Mov(x0, sp);
// Now call the C function.
constexpr int kNumCCallArgs = 1;
CallCFunction(ext_ref, kNumCCallArgs);
// Move return value to the right register.
const LiftoffRegister* next_result_reg = rets;
if (sig->return_count() > 0) {
DCHECK_EQ(1, sig->return_count());
constexpr Register kReturnReg = x0;
if (kReturnReg != next_result_reg->gp()) {
Move(*next_result_reg, LiftoffRegister(kReturnReg), sig->GetReturn(0));
}
++next_result_reg;
}
// Load potential output value from the buffer on the stack.
if (out_argument_type != kWasmStmt) {
Peek(liftoff::GetRegFromType(*next_result_reg, out_argument_type), 0);
}
Drop(total_size, 1);
}
void LiftoffAssembler::CallNativeWasmCode(Address addr) {
Call(addr, RelocInfo::WASM_CALL);
}
void LiftoffAssembler::TailCallNativeWasmCode(Address addr) {
Jump(addr, RelocInfo::WASM_CALL);
}
void LiftoffAssembler::CallIndirect(const wasm::FunctionSig* sig,
compiler::CallDescriptor* call_descriptor,
Register target) {
// For Arm64, we have more cache registers than wasm parameters. That means
// that target will always be in a register.
DCHECK(target.is_valid());
Call(target);
}
void LiftoffAssembler::TailCallIndirect(Register target) {
DCHECK(target.is_valid());
// When control flow integrity is enabled, the target is a "bti c"
// instruction, which enforces that the jump instruction is either a "blr", or
// a "br" with x16 or x17 as its destination.
UseScratchRegisterScope temps(this);
temps.Exclude(x17);
Mov(x17, target);
Jump(x17);
}
void LiftoffAssembler::CallRuntimeStub(WasmCode::RuntimeStubId sid) {
// A direct call to a wasm runtime stub defined in this module.
// Just encode the stub index. This will be patched at relocation.
Call(static_cast<Address>(sid), RelocInfo::WASM_STUB_CALL);
}
void LiftoffAssembler::AllocateStackSlot(Register addr, uint32_t size) {
// The stack pointer is required to be quadword aligned.
size = RoundUp(size, kQuadWordSizeInBytes);
Claim(size, 1);
Mov(addr, sp);
}
void LiftoffAssembler::DeallocateStackSlot(uint32_t size) {
// The stack pointer is required to be quadword aligned.
size = RoundUp(size, kQuadWordSizeInBytes);
Drop(size, 1);
}
void LiftoffStackSlots::Construct() {
size_t num_slots = 0;
for (auto& slot : slots_) {
num_slots += slot.src_.type() == kWasmS128 ? 2 : 1;
}
// The stack pointer is required to be quadword aligned.
asm_->Claim(RoundUp(num_slots, 2));
size_t poke_offset = num_slots * kXRegSize;
for (auto& slot : slots_) {
poke_offset -= slot.src_.type() == kWasmS128 ? kXRegSize * 2 : kXRegSize;
switch (slot.src_.loc()) {
case LiftoffAssembler::VarState::kStack: {
UseScratchRegisterScope temps(asm_);
CPURegister scratch = liftoff::AcquireByType(&temps, slot.src_.type());
asm_->Ldr(scratch, liftoff::GetStackSlot(slot.src_offset_));
asm_->Poke(scratch, poke_offset);
break;
}
case LiftoffAssembler::VarState::kRegister:
asm_->Poke(liftoff::GetRegFromType(slot.src_.reg(), slot.src_.type()),
poke_offset);
break;
case LiftoffAssembler::VarState::kIntConst:
DCHECK(slot.src_.type() == kWasmI32 || slot.src_.type() == kWasmI64);
if (slot.src_.i32_const() == 0) {
Register zero_reg = slot.src_.type() == kWasmI32 ? wzr : xzr;
asm_->Poke(zero_reg, poke_offset);
} else {
UseScratchRegisterScope temps(asm_);
Register scratch = slot.src_.type() == kWasmI32 ? temps.AcquireW()
: temps.AcquireX();
asm_->Mov(scratch, int64_t{slot.src_.i32_const()});
asm_->Poke(scratch, poke_offset);
}
break;
}
}
}
} // namespace wasm
} // namespace internal
} // namespace v8
#endif // V8_WASM_BASELINE_ARM64_LIFTOFF_ASSEMBLER_ARM64_H_