blob: df2c7dffbef1555edbd0aa348342b7722f0b695f [file] [log] [blame]
/*
* Copyright (C) 2023 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef BERBERIS_LITE_TRANSLATOR_RISCV64_TO_X86_64_CALL_INTRINSIC_H_
#define BERBERIS_LITE_TRANSLATOR_RISCV64_TO_X86_64_CALL_INTRINSIC_H_
#include <array>
#include <cstdint>
#include <type_traits>
#include "berberis/assembler/x86_64.h"
#include "berberis/base/bit_util.h"
#include "berberis/base/dependent_false.h"
#include "berberis/intrinsics/macro_assembler.h"
#include "berberis/runtime_primitives/platform.h"
namespace berberis::call_intrinsic {
constexpr x86_64::Assembler::Register kCallerSavedRegs[] = {
x86_64::Assembler::rax,
x86_64::Assembler::rcx,
x86_64::Assembler::rdx,
x86_64::Assembler::rdi,
x86_64::Assembler::rsi,
x86_64::Assembler::r8,
x86_64::Assembler::r9,
x86_64::Assembler::r10,
x86_64::Assembler::r11,
};
constexpr int8_t kRegIsNotOnStack = -1;
// Map from register number to offset in CallIntrinsic save area. Counted in 8-byte slots.
inline constexpr auto kRegOffsetsOnStack = []() {
std::array<int8_t, 16> regs_on_stack = {};
// regs_on_stack.fill(kRegIsNotOnStack); - needs C++20
for (auto& num : regs_on_stack) {
num = kRegIsNotOnStack;
}
int8_t stack_allocation_size = 0;
for (auto reg : kCallerSavedRegs) {
regs_on_stack[reg.num] = stack_allocation_size;
++stack_allocation_size;
}
return regs_on_stack;
}();
constexpr x86_64::Assembler::XMMRegister kCallerSavedXMMRegs[] = {
x86_64::Assembler::xmm0,
x86_64::Assembler::xmm1,
x86_64::Assembler::xmm2,
x86_64::Assembler::xmm3,
x86_64::Assembler::xmm4,
x86_64::Assembler::xmm5,
x86_64::Assembler::xmm6,
x86_64::Assembler::xmm7,
x86_64::Assembler::xmm8,
x86_64::Assembler::xmm9,
x86_64::Assembler::xmm10,
x86_64::Assembler::xmm11,
x86_64::Assembler::xmm12,
x86_64::Assembler::xmm13,
x86_64::Assembler::xmm14,
x86_64::Assembler::xmm15,
};
// Map from register number to offset in CallIntrinsic save area. Counted in 8-byte slots.
inline constexpr auto kSimdRegOffsetsOnStack = []() {
std::array<int8_t, 16> simd_regs_on_stack = {};
// simd_regs_on_stack.fill(kRegIsNotOnStack); - needs C++20
for (auto& num : simd_regs_on_stack) {
num = kRegIsNotOnStack;
}
int8_t stack_allocation_size = AlignUp(arraysize(kCallerSavedRegs), 2);
for (auto reg : kCallerSavedXMMRegs) {
simd_regs_on_stack[reg.num] = stack_allocation_size;
stack_allocation_size += 2;
}
return simd_regs_on_stack;
}();
// Save area size for CallIntrinsic save area. Counted in 8-byte slots.
inline constexpr int8_t kSaveAreaSize =
AlignUp(arraysize(kCallerSavedRegs), 2) + arraysize(kCallerSavedXMMRegs) * 2;
struct StoredRegsInfo {
std::decay_t<decltype(kRegOffsetsOnStack)> regs_on_stack;
std::decay_t<decltype(kSimdRegOffsetsOnStack)> simd_regs_on_stack;
};
inline void PushCallerSaved(MacroAssembler<x86_64::Assembler>& as) {
as.Subq(as.rsp, kSaveAreaSize * 8);
for (auto reg : kCallerSavedRegs) {
as.Movq({.base = as.rsp, .disp = kRegOffsetsOnStack[reg.num] * 8}, reg);
}
for (auto reg : kCallerSavedXMMRegs) {
as.Movdqa({.base = as.rsp, .disp = kSimdRegOffsetsOnStack[reg.num] * 8}, reg);
}
}
// Note: regs_on_stack is usually copy of kRegOffsetsOnStack with some registers marked off as
// kRegIsNotOnStack, simd_regs_on_stack is kSimdRegOffsetsOnStack with some registers marked as
// kRegIsNotOnStack. These registers are skipped during restoration process.
inline void PopCallerSaved(MacroAssembler<x86_64::Assembler>& as, const StoredRegsInfo regs_info) {
for (auto reg : kCallerSavedRegs) {
if (regs_info.regs_on_stack[reg.num] != kRegIsNotOnStack) {
as.Movq(reg, {.base = as.rsp, .disp = regs_info.regs_on_stack[reg.num] * 8});
}
}
for (auto reg : kCallerSavedXMMRegs) {
if (regs_info.simd_regs_on_stack[reg.num] != kRegIsNotOnStack) {
as.Movdqa(reg, {.base = as.rsp, .disp = regs_info.simd_regs_on_stack[reg.num] * 8});
}
}
as.Addq(as.rsp, kSaveAreaSize * 8);
}
// Nonfunctional assembler used by static_assert expression. It doesn't do anything but allows us
// to call InitArgs during compilation time with the same argument types as would happen during
// execution.
//
// This turns runtime check into compile time check and thus allows us to catch weird corner cases
// faster.
class ConstExprCheckAssembler {
public:
using Operand = MacroAssembler<x86_64::Assembler>::Operand;
using Register = MacroAssembler<x86_64::Assembler>::Register;
using XMMRegister = MacroAssembler<x86_64::Assembler>::XMMRegister;
static constexpr auto rsp = MacroAssembler<x86_64::Assembler>::rsp;
constexpr ConstExprCheckAssembler() = default;
template <typename U, typename V>
constexpr void Expand(Register, Operand) const {}
template <typename U, typename V>
constexpr void Expand(Register, Register) const {}
template <typename U>
constexpr void Mov(Operand, Register) const {}
template <typename U>
constexpr void Mov(Register, Operand) const {}
template <typename U>
constexpr void Mov(Register, Register) const {}
constexpr void Movl(Register, int32_t) const {}
template <typename U>
constexpr void Movs(Operand, XMMRegister) const {}
template <typename U>
constexpr void Movs(XMMRegister, Operand) const {}
template <typename U>
constexpr void Movs(XMMRegister, XMMRegister) const {}
template <typename U>
constexpr void Vmovs(Operand, XMMRegister) const {}
template <typename U>
constexpr void Vmovs(XMMRegister, Operand) const {}
template <typename U>
constexpr void Vmovs(XMMRegister, XMMRegister, XMMRegister) const {}
};
// Helper wrapper to pass the intrinsic type down the generic lambda.
template <typename T, typename U>
struct ArgWrap {
typedef T AssemblerType;
typedef U IntrinsicType;
AssemblerType value;
};
static constexpr x86_64::Assembler::Register kAbiArgs[] = {
x86_64::Assembler::rdi,
x86_64::Assembler::rsi,
x86_64::Assembler::rdx,
x86_64::Assembler::rcx,
x86_64::Assembler::r8,
x86_64::Assembler::r9,
};
static constexpr x86_64::Assembler::XMMRegister kAbiSimdArgs[] = {
x86_64::Assembler::xmm0,
x86_64::Assembler::xmm1,
x86_64::Assembler::xmm2,
x86_64::Assembler::xmm3,
x86_64::Assembler::xmm4,
x86_64::Assembler::xmm5,
x86_64::Assembler::xmm6,
x86_64::Assembler::xmm7,
};
// Assumes RSP points to preallocated stack args area.
template <typename IntrinsicResType,
typename... IntrinsicArgType,
typename MacroAssembler,
typename... AssemblerArgType>
constexpr bool InitArgs(MacroAssembler&& as, bool has_avx, AssemblerArgType... args) {
using Assembler = std::decay_t<MacroAssembler>;
using Register = typename Assembler::Register;
using XMMRegister = typename Assembler::XMMRegister;
using Float32 = intrinsics::Float32;
using Float64 = intrinsics::Float64;
// All ABI argument registers are saved among caller-saved registers, so we can safely initialize
// them now. When intrinsic receives its argument from such register we'll read it from stack, so
// there is no early-clobbering problem. Callee-saved regs are never ABI arguments, so we can move
// them to ABI reg directly.
size_t gp_index = 0;
size_t simd_index = 0;
bool success = ([&as, &gp_index, &simd_index, has_avx](auto arg) -> bool {
using AssemblerType = typename decltype(arg)::AssemblerType;
using IntrinsicType = typename decltype(arg)::IntrinsicType;
if (std::is_integral_v<IntrinsicType>) {
if (gp_index == arraysize(kAbiArgs)) {
return false;
}
} else if constexpr (std::is_same_v<IntrinsicType, Float32> ||
std::is_same_v<IntrinsicType, Float64>) {
if (simd_index == arraysize(kAbiSimdArgs)) {
return false;
}
} else {
return false;
}
// Note, ABI mandates extension up to 32-bit and zero-filling the upper half.
if constexpr (std::is_integral_v<IntrinsicType> && sizeof(IntrinsicType) <= sizeof(int32_t) &&
std::is_integral_v<AssemblerType> && sizeof(AssemblerType) <= sizeof(int32_t)) {
as.Movl(kAbiArgs[gp_index++], static_cast<int32_t>(arg.value));
} else if constexpr (std::is_integral_v<IntrinsicType> &&
sizeof(IntrinsicType) == sizeof(int64_t) &&
std::is_integral_v<AssemblerType> &&
sizeof(AssemblerType) == sizeof(int64_t)) {
as.template Expand<int64_t, IntrinsicType>(kAbiArgs[gp_index++],
static_cast<int64_t>(arg.value));
} else if constexpr (std::is_integral_v<IntrinsicType> &&
sizeof(IntrinsicType) <= sizeof(int32_t) &&
std::is_same_v<AssemblerType, Register>) {
if (kRegOffsetsOnStack[arg.value.num] == kRegIsNotOnStack) {
as.template Expand<int32_t, IntrinsicType>(kAbiArgs[gp_index++], arg.value);
} else {
as.template Expand<int32_t, IntrinsicType>(
kAbiArgs[gp_index++],
{.base = Assembler::rsp, .disp = kRegOffsetsOnStack[arg.value.num] * 8});
}
} else if constexpr (std::is_integral_v<IntrinsicType> &&
sizeof(IntrinsicType) == sizeof(int64_t) &&
std::is_same_v<AssemblerType, Register>) {
if (kRegOffsetsOnStack[arg.value.num] == kRegIsNotOnStack) {
as.template Expand<int64_t, IntrinsicType>(kAbiArgs[gp_index++], arg.value);
} else {
as.template Expand<int64_t, IntrinsicType>(
kAbiArgs[gp_index++],
{.base = Assembler::rsp, .disp = kRegOffsetsOnStack[arg.value.num] * 8});
}
} else if constexpr ((std::is_same_v<IntrinsicType, Float32> ||
std::is_same_v<IntrinsicType, Float64>) && std::is_same_v<AssemblerType,
XMMRegister>) {
if (kSimdRegOffsetsOnStack[arg.value.num] == kRegIsNotOnStack) {
if (has_avx) {
as.template Vmovs<IntrinsicType>(
kAbiSimdArgs[simd_index], kAbiSimdArgs[simd_index], arg.value);
simd_index++;
} else {
as.template Movs<IntrinsicType>(kAbiSimdArgs[simd_index++], arg.value);
}
} else {
if (has_avx) {
as.template Vmovs<IntrinsicType>(
kAbiSimdArgs[simd_index++],
{.base = as.rsp, .disp = kSimdRegOffsetsOnStack[arg.value.num] * 8});
} else {
as.template Movs<IntrinsicType>(
kAbiSimdArgs[simd_index++],
{.base = as.rsp, .disp = kSimdRegOffsetsOnStack[arg.value.num] * 8});
}
}
} else {
static_assert(kDependentTypeFalse<std::tuple<IntrinsicType, AssemblerType>>,
"Unknown parameter type, please add support to CallIntrinsic");
}
return true;
}(ArgWrap<AssemblerArgType, IntrinsicArgType>{.value = args}) && ...);
return success;
}
// Forward results from ABI registers to result-specified registers and mark registers in the
// returned StoredRegsInfo with kRegIsNotOnStack to prevent restoration from stack.
template <typename IntrinsicResType, typename AssemblerResType>
StoredRegsInfo ForwardResults(MacroAssembler<x86_64::Assembler>& as, AssemblerResType result) {
using Assembler = MacroAssembler<x86_64::Assembler>;
using Register = Assembler::Register;
using XMMRegister = Assembler::XMMRegister;
using Float32 = intrinsics::Float32;
using Float64 = intrinsics::Float64;
StoredRegsInfo regs_info = {.regs_on_stack = kRegOffsetsOnStack,
.simd_regs_on_stack = kSimdRegOffsetsOnStack};
if constexpr (Assembler::FormatIs<IntrinsicResType, std::tuple<int32_t>, std::tuple<uint32_t>> &&
std::is_same_v<AssemblerResType, Register>) {
// Note: even unsigned 32-bit results are sign-extended to 64bit register on RV64.
regs_info.regs_on_stack[result.num] = kRegIsNotOnStack;
as.Expand<int64_t, int32_t>(result, Assembler::rax);
} else if constexpr (Assembler::
FormatIs<IntrinsicResType, std::tuple<int64_t>, std::tuple<uint64_t>> &&
std::is_same_v<AssemblerResType, Register>) {
regs_info.regs_on_stack[result.num] = kRegIsNotOnStack;
as.Mov<int64_t>(result, Assembler::rax);
} else if constexpr (Assembler::
FormatIs<IntrinsicResType, std::tuple<Float32>, std::tuple<Float64>> &&
std::is_same_v<AssemblerResType, XMMRegister>) {
regs_info.simd_regs_on_stack[result.num] = kRegIsNotOnStack;
if (host_platform::kHasAVX) {
as.Vmovs<std::tuple_element_t<0, IntrinsicResType>>(result, result, Assembler::xmm0);
} else {
as.Movs<std::tuple_element_t<0, IntrinsicResType>>(result, Assembler::xmm0);
}
} else {
static_assert(kDependentTypeFalse<std::tuple<IntrinsicResType, AssemblerResType>>,
"Unknown resullt type, please add support to CallIntrinsic");
}
return regs_info;
}
template <typename AssemblerResType,
typename IntrinsicResType,
typename... IntrinsicArgType,
typename... AssemblerArgType>
void CallIntrinsic(MacroAssembler<x86_64::Assembler>& as,
IntrinsicResType (*function)(IntrinsicArgType...),
AssemblerResType result,
AssemblerArgType... args) {
PushCallerSaved(as);
// Note: we can ignore status in the actual InitArgs call because we know that InitArgs would
// succeed if the call in static_assert succeeded.
//
// AVX flag shouldn't change the outcome, but better safe than sorry.
static_assert(InitArgs<IntrinsicResType, IntrinsicArgType...>(
ConstExprCheckAssembler(), true, AssemblerArgType{0}...));
static_assert(InitArgs<IntrinsicResType, IntrinsicArgType...>(
ConstExprCheckAssembler(), false, AssemblerArgType{0}...));
InitArgs<IntrinsicResType, IntrinsicArgType...>(as, host_platform::kHasAVX, args...);
as.Call(reinterpret_cast<void*>(function));
auto regs_info = ForwardResults<IntrinsicResType>(as, result);
PopCallerSaved(as, regs_info);
}
} // namespace berberis::call_intrinsic
#endif // BERBERIS_LITE_TRANSLATOR_RISCV64_TO_X86_64_CALL_INTRINSIC_H_