Duplicate text assemblers
This CL was made just so that its child CL is easier to review.
Flag: EXEMPT NDK
Bug: b/394278175
Test: m berberis_all
Change-Id: I3f7a3175adcddce344339680b2dab73d6a1b0522
diff --git a/intrinsics/all_to_riscv64/include/berberis/intrinsics/all_to_riscv64/verifier_assembler_riscv.h b/intrinsics/all_to_riscv64/include/berberis/intrinsics/all_to_riscv64/verifier_assembler_riscv.h
new file mode 100644
index 0000000..7986a9a
--- /dev/null
+++ b/intrinsics/all_to_riscv64/include/berberis/intrinsics/all_to_riscv64/verifier_assembler_riscv.h
@@ -0,0 +1,379 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef BERBERIS_INTRINSICS_COMMON_TO_RISCV_TEXT_ASSEMBLER_COMMON_H_
+#define BERBERIS_INTRINSICS_COMMON_TO_RISCV_TEXT_ASSEMBLER_COMMON_H_
+
+#include <array>
+#include <cstdint>
+#include <cstdio>
+#include <deque>
+#include <string>
+
+#include "berberis/assembler/riscv.h"
+#include "berberis/base/checks.h"
+#include "berberis/base/config.h"
+#include "berberis/base/dependent_false.h"
+#include "berberis/intrinsics/all_to_riscv64/intrinsics_bindings.h"
+
+namespace berberis {
+
+namespace constants_pool {
+
+extern const intptr_t kBerberisMacroAssemblerConstantsRelocated;
+
+inline intptr_t GetOffset(intptr_t address) {
+ return address - constants_pool::kBerberisMacroAssemblerConstantsRelocated;
+}
+
+} // namespace constants_pool
+
+namespace riscv {
+
+#define BERBERIS_DEFINE_TO_FAS_ARGUMENT(Immediate) \
+ template <typename MacroAssembler> \
+ inline std::string ToGasArgument(Immediate immediate, MacroAssembler*) { \
+ return "$" + std::to_string(static_cast<int32_t>(immediate)); \
+ }
+BERBERIS_DEFINE_TO_FAS_ARGUMENT(BImmediate)
+BERBERIS_DEFINE_TO_FAS_ARGUMENT(CsrImmediate)
+BERBERIS_DEFINE_TO_FAS_ARGUMENT(IImmediate)
+BERBERIS_DEFINE_TO_FAS_ARGUMENT(JImmediate)
+BERBERIS_DEFINE_TO_FAS_ARGUMENT(PImmediate)
+BERBERIS_DEFINE_TO_FAS_ARGUMENT(Shift32Immediate)
+BERBERIS_DEFINE_TO_FAS_ARGUMENT(Shift64Immediate)
+BERBERIS_DEFINE_TO_FAS_ARGUMENT(SImmediate)
+BERBERIS_DEFINE_TO_FAS_ARGUMENT(UImmediate)
+#undef BERBERIS_DEFINE_TO_FAS_ARGUMENT
+
+template <typename MacroAssembler>
+inline std::string ToGasArgument(Rounding rm, MacroAssembler*) {
+ switch (rm) {
+ case Rounding::kRne:
+ return "rne";
+ case Rounding::kRtz:
+ return "rtz";
+ case Rounding::kRdn:
+ return "rdn";
+ case Rounding::kRup:
+ return "ruo";
+ case Rounding::kRmm:
+ return "rmm";
+ case Rounding::kDyn:
+ return "dyn";
+ default:
+ LOG_ALWAYS_FATAL("Unsupported rounding mode %d", rm);
+ }
+}
+
+template <typename DerivedAssemblerType>
+class TextAssembler {
+ public:
+ using Condition = riscv::Condition;
+ using Csr = riscv::Csr;
+ using Rounding = riscv::Rounding;
+
+ struct Label {
+ size_t id;
+ bool bound = false;
+
+ template <typename MacroAssembler>
+ friend std::string ToGasArgument(const Label& label, MacroAssembler*) {
+ return std::to_string(label.id) + (label.bound ? "b" : "f");
+ }
+ };
+
+ template <typename RegisterType, typename ImmediateType>
+ struct Operand;
+
+ class Register {
+ public:
+ constexpr Register() : arg_no_(kNoRegister) {}
+ constexpr Register(int arg_no) : arg_no_(arg_no) {}
+ int arg_no() const {
+ CHECK_NE(arg_no_, kNoRegister);
+ return arg_no_;
+ }
+
+ friend bool operator==(const Register&, const Register&) = default;
+
+ static constexpr int kNoRegister = -1;
+ static constexpr int kStackPointer = -2;
+ // Used in Operand to deal with references to scratch area.
+ static constexpr int kScratchPointer = -3;
+ static constexpr int kZeroRegister = -4;
+
+ template <typename MacroAssembler>
+ friend const std::string ToGasArgument(const Register& reg, MacroAssembler*) {
+ if (reg.arg_no_ == kZeroRegister) {
+ return "zero";
+ }
+
+ return '%' + std::to_string(reg.arg_no());
+ }
+
+ private:
+ template <typename RegisterType, typename ImmediateType>
+ friend struct Operand;
+
+ // Register number created during creation of assembler call.
+ // See arg['arm_register'] in _gen_c_intrinsic_body in gen_intrinsics.py
+ //
+ // Default value (-1) means it's not assigned yet (thus couldn't be used).
+ int arg_no_;
+ };
+
+ class FpRegister {
+ public:
+ constexpr FpRegister() : arg_no_(kNoRegister) {}
+ constexpr FpRegister(int arg_no) : arg_no_(arg_no) {}
+ int arg_no() const {
+ CHECK_NE(arg_no_, kNoRegister);
+ return arg_no_;
+ }
+
+ friend bool operator==(const FpRegister&, const FpRegister&) = default;
+
+ template <typename MacroAssembler>
+ friend const std::string ToGasArgument(const FpRegister& reg, MacroAssembler*) {
+ return '%' + std::to_string(reg.arg_no());
+ }
+
+ private:
+ // Register number created during creation of assembler call.
+ // See arg['arm_register'] in _gen_c_intrinsic_body in gen_intrinsics.py
+ //
+ // Default value (-1) means it's not assigned yet (thus couldn't be used).
+ static constexpr int kNoRegister = -1;
+ int arg_no_;
+ };
+
+ template <typename RegisterType, typename ImmediateType>
+ struct Operand {
+ RegisterType base{0};
+ ImmediateType disp = 0;
+
+ template <typename MacroAssembler>
+ friend const std::string ToGasArgument(const Operand& op, MacroAssembler* as) {
+ std::string result{};
+ result = '(' + ToGasArgument(op.base, as) + ')';
+ int32_t disp = static_cast<int32_t>(op.disp);
+ if (disp) {
+ result = ToGasArgument(disp, as) + result;
+ }
+ return result;
+ }
+ };
+
+ using BImmediate = riscv::BImmediate;
+ using CsrImmediate = riscv::CsrImmediate;
+ using IImmediate = riscv::IImmediate;
+ using Immediate = riscv::Immediate;
+ using JImmediate = riscv::JImmediate;
+ using Shift32Immediate = riscv::Shift32Immediate;
+ using Shift64Immediate = riscv::Shift64Immediate;
+ using PImmediate = riscv::PImmediate;
+ using SImmediate = riscv::SImmediate;
+ using UImmediate = riscv::UImmediate;
+
+ TextAssembler(int indent, FILE* out) : indent_(indent), out_(out) {}
+
+ // Verify CPU vendor and SSE restrictions.
+ template <typename CPUIDRestriction>
+ void CheckCPUIDRestriction() {}
+
+ // Translate CPU restrictions into string.
+ template <typename CPUIDRestriction>
+ static constexpr const char* kCPUIDRestrictionString =
+ DerivedAssemblerType::template CPUIDRestrictionToString<CPUIDRestriction>();
+
+ // RISC-V doesn't have “a”, “b”, “c”, or “d” registers, but we need these to be able to compile
+ // the code generator.
+ template <char kConstraint>
+ class UnsupportedRegister {
+ public:
+ UnsupportedRegister operator=(Register) {
+ LOG_ALWAYS_FATAL("Registers of the class “%c” don't exist on RISC-V", kConstraint);
+ }
+ };
+ UnsupportedRegister<'a'> gpr_a;
+ UnsupportedRegister<'b'> gpr_b;
+ UnsupportedRegister<'c'> gpr_c;
+ UnsupportedRegister<'d'> gpr_d;
+ // Note: stack pointer is not reflected in list of arguments, intrinsics use
+ // it implicitly.
+ Register gpr_s{Register::kStackPointer};
+ // Used in Operand as pseudo-register to temporary operand.
+ Register gpr_scratch{Register::kScratchPointer};
+ // Intrinsics which use these constants receive it via additional parameter - and
+ // we need to know if it's needed or not.
+ Register gpr_macroassembler_constants{};
+ bool need_gpr_macroassembler_constants() const { return need_gpr_macroassembler_constants_; }
+
+ Register gpr_macroassembler_scratch{};
+ bool need_gpr_macroassembler_scratch() const { return need_gpr_macroassembler_scratch_; }
+ Register gpr_macroassembler_scratch2{};
+
+ Register zero{Register::kZeroRegister};
+
+ void Bind(Label* label) {
+ CHECK_EQ(label->bound, false);
+ fprintf(out_, "%*s\"%zd:\\n\"\n", indent_ + 2, "", label->id);
+ label->bound = true;
+ }
+
+ Label* MakeLabel() {
+ labels_allocated_.push_back({labels_allocated_.size()});
+ return &labels_allocated_.back();
+ }
+
+ template <typename... Args>
+ void Byte(Args... args) {
+ static_assert((std::is_same_v<Args, uint8_t> && ...));
+ bool print_kwd = true;
+ fprintf(out_, "%*s\"", indent_ + 2, "");
+ (fprintf(out_, "%s%" PRIu8, print_kwd ? print_kwd = false, ".byte " : ", ", args), ...);
+ fprintf(out_, "\\n\"\n");
+ }
+
+ template <typename... Args>
+ void TwoByte(Args... args) {
+ static_assert((std::is_same_v<Args, uint16_t> && ...));
+ bool print_kwd = true;
+ fprintf(out_, "%*s\"", indent_ + 2, "");
+ (fprintf(out_, "%s%" PRIu16, print_kwd ? print_kwd = false, ".2byte " : ", ", args), ...);
+ fprintf(out_, "\\n\"\n");
+ }
+
+ template <typename... Args>
+ void FourByte(Args... args) {
+ static_assert((std::is_same_v<Args, uint32_t> && ...));
+ bool print_kwd = true;
+ fprintf(out_, "%*s\"", indent_ + 2, "");
+ (fprintf(out_, "%s%" PRIu32, print_kwd ? print_kwd = false, ".4byte " : ", ", args), ...);
+ fprintf(out_, "\\n\"\n");
+ }
+
+ template <typename... Args>
+ void EigthByte(Args... args) {
+ static_assert((std::is_same_v<Args, uint64_t> && ...));
+ bool print_kwd = true;
+ fprintf(out_, "%*s\"", indent_ + 2, "");
+ (fprintf(out_, "%s%" PRIu64, print_kwd ? print_kwd = false, ".8byte " : ", ", args), ...);
+ fprintf(out_, "\\n\"\n");
+ }
+
+ void P2Align(uint32_t m) { fprintf(out_, "%*s\".p2align %u\\n\"\n", indent_ + 2, "", m); }
+
+// Instructions.
+#include "gen_text_assembler_common_riscv-inl.h" // NOLINT generated file
+
+ protected:
+ template <typename CPUIDRestriction>
+ static constexpr const char* CPUIDRestrictionToString() {
+ if constexpr (std::is_same_v<CPUIDRestriction, intrinsics::bindings::NoCPUIDRestriction>) {
+ return nullptr;
+ } else {
+ static_assert(kDependentTypeFalse<CPUIDRestriction>);
+ }
+ }
+
+ bool need_gpr_macroassembler_constants_ = false;
+ bool need_gpr_macroassembler_scratch_ = false;
+
+ template <typename... Args>
+ void Instruction(const char* name, Condition cond, const Args&... args);
+
+ template <typename... Args>
+ void Instruction(const char* name, const Args&... args);
+
+ void EmitString() {}
+
+ void EmitString(const std::string& s) { fprintf(out_, "%s", s.c_str()); }
+
+ template <typename... Args>
+ void EmitString(const std::string& s, const Args&... args) {
+ fprintf(out_, "%s, ", s.c_str());
+ EmitString(args...);
+ }
+
+ protected:
+ int indent_;
+ FILE* out_;
+
+ private:
+ std::deque<Label> labels_allocated_;
+
+ TextAssembler() = delete;
+ TextAssembler(const TextAssembler&) = delete;
+ TextAssembler(TextAssembler&&) = delete;
+ void operator=(const TextAssembler&) = delete;
+ void operator=(TextAssembler&&) = delete;
+};
+
+template <typename Arg, typename MacroAssembler>
+inline std::string ToGasArgument(const Arg& arg, MacroAssembler*) {
+ return "$" + std::to_string(arg);
+}
+
+template <typename DerivedAssemblerType>
+template <typename... Args>
+inline void TextAssembler<DerivedAssemblerType>::Instruction(const char* name,
+ Condition cond,
+ const Args&... args) {
+ char name_with_condition[8] = {};
+ CHECK_EQ(strcmp(name, "Bcc"), 0);
+
+ switch (cond) {
+ case Condition::kEqual:
+ strcat(name_with_condition, "eq");
+ break;
+ case Condition::kNotEqual:
+ strcat(name_with_condition, "ne");
+ break;
+ case Condition::kLess:
+ strcat(name_with_condition, "lt");
+ break;
+ case Condition::kGreaterEqual:
+ strcat(name_with_condition, "ge");
+ break;
+ case Condition::kBelow:
+ strcat(name_with_condition, "ltu");
+ break;
+ case Condition::kAboveEqual:
+ strcat(name_with_condition, "geu");
+ break;
+ default:
+ LOG_ALWAYS_FATAL("Unsupported condition %d", cond);
+ }
+ Instruction(name_with_condition, args...);
+}
+
+template <typename DerivedAssemblerType>
+template <typename... Args>
+inline void TextAssembler<DerivedAssemblerType>::Instruction(const char* name,
+ const Args&... args) {
+ int name_length = strlen(name);
+ fprintf(out_, "%*s\"%.*s ", indent_ + 2, "", name_length, name);
+ EmitString(ToGasArgument(args, this)...);
+ fprintf(out_, "\\n\"\n");
+}
+
+} // namespace riscv
+
+} // namespace berberis
+
+#endif // BERBERIS_INTRINSICS_COMMON_TO_RISCV_TEXT_ASSEMBLER_COMMON_H_
diff --git a/intrinsics/all_to_x86_32_or_x86_64/include/berberis/intrinsics/all_to_x86_32_or_x86_64/verifier_assembler_x86_32_and_x86_64.h b/intrinsics/all_to_x86_32_or_x86_64/include/berberis/intrinsics/all_to_x86_32_or_x86_64/verifier_assembler_x86_32_and_x86_64.h
new file mode 100644
index 0000000..fdfe576
--- /dev/null
+++ b/intrinsics/all_to_x86_32_or_x86_64/include/berberis/intrinsics/all_to_x86_32_or_x86_64/verifier_assembler_x86_32_and_x86_64.h
@@ -0,0 +1,738 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef BERBERIS_INTRINSICS_ALL_TO_X86_32_OR_x86_64_TEXT_ASSEMBLER_COMMON_H_
+#define BERBERIS_INTRINSICS_ALL_TO_X86_32_OR_x86_64_TEXT_ASSEMBLER_COMMON_H_
+
+#include <array>
+#include <cstdint>
+#include <cstdio>
+#include <deque>
+#include <string>
+
+#include "berberis/base/checks.h"
+#include "berberis/base/config.h"
+#include "berberis/base/dependent_false.h"
+#include "berberis/intrinsics/all_to_x86_32_or_x86_64/intrinsics_bindings.h"
+
+namespace berberis {
+
+namespace constants_pool {
+
+// Note: kBerberisMacroAssemblerConstantsRelocated is the same as original,
+// unrelocated version in 32-bit world. But in 64-bit world it's copy on the first 2GiB.
+//
+// Our builder could be built as 64-bit binary thus we must not mix them.
+//
+// Note: we have CHECK_*_LAYOUT tests in macro_assembler_common_x86.cc to make sure
+// offsets produced by 64-bit builder are usable in 32-bit libberberis.so
+
+extern const int32_t kBerberisMacroAssemblerConstantsRelocated;
+
+inline int32_t GetOffset(int32_t address) {
+ return address - constants_pool::kBerberisMacroAssemblerConstantsRelocated;
+}
+
+} // namespace constants_pool
+
+namespace x86_32_and_x86_64 {
+
+template <typename DerivedAssemblerType>
+class TextAssembler {
+ public:
+ // Condition class - 16 x86 conditions.
+ enum class Condition {
+ kOverflow = 0,
+ kNoOverflow = 1,
+ kBelow = 2,
+ kAboveEqual = 3,
+ kEqual = 4,
+ kNotEqual = 5,
+ kBelowEqual = 6,
+ kAbove = 7,
+ kNegative = 8,
+ kPositiveOrZero = 9,
+ kParityEven = 10,
+ kParityOdd = 11,
+ kLess = 12,
+ kGreaterEqual = 13,
+ kLessEqual = 14,
+ kGreater = 15,
+
+ // aka...
+ kCarry = kBelow,
+ kNotCarry = kAboveEqual,
+ kZero = kEqual,
+ kNotZero = kNotEqual,
+ kSign = kNegative,
+ kNotSign = kPositiveOrZero
+ };
+
+ enum ScaleFactor {
+ kTimesOne = 0,
+ kTimesTwo = 1,
+ kTimesFour = 2,
+ kTimesEight = 3,
+ // All our target systems use 32-bit pointers.
+ kTimesPointerSize = kTimesFour
+ };
+
+ struct Label {
+ size_t id;
+ bool bound = false;
+
+ template <typename MacroAssembler>
+ friend std::string ToGasArgument(const Label& label, MacroAssembler*) {
+ return std::to_string(label.id) + (label.bound ? "b" : "f");
+ }
+ };
+
+ struct Operand;
+
+ class Register {
+ public:
+ constexpr Register(int arg_no) : arg_no_(arg_no) {}
+ int arg_no() const {
+ CHECK_NE(arg_no_, kNoRegister);
+ return arg_no_;
+ }
+
+ constexpr bool operator==(const Register& other) const { return arg_no() == other.arg_no(); }
+ constexpr bool operator!=(const Register& other) const { return arg_no() != other.arg_no(); }
+
+ static constexpr int kNoRegister = -1;
+ static constexpr int kStackPointer = -2;
+ // Used in Operand to deal with references to scratch area.
+ static constexpr int kScratchPointer = -3;
+
+ private:
+ friend struct Operand;
+
+ // Register number created during creation of assembler call.
+ // See arg['arm_register'] in _gen_c_intrinsic_body in gen_intrinsics.py
+ //
+ // Default value (-1) means it's not assigned yet (thus couldn't be used).
+ int arg_no_;
+ };
+
+ class X87Register {
+ public:
+ constexpr X87Register(int arg_no) : arg_no_(arg_no) {}
+ int arg_no() const {
+ CHECK_NE(arg_no_, kNoRegister);
+ return arg_no_;
+ }
+
+ constexpr bool operator==(const X87Register& other) const { return arg_no_ == other.arg_no_; }
+ constexpr bool operator!=(const X87Register& other) const { return arg_no_ != other.arg_no_; }
+
+ template <typename MacroAssembler>
+ friend const std::string ToGasArgument(const X87Register& reg, MacroAssembler*) {
+ return '%' + std::to_string(reg.arg_no());
+ }
+
+ private:
+ // Register number created during creation of assembler call.
+ // See arg['arm_register'] in _gen_c_intrinsic_body in gen_intrinsics.py
+ //
+ // Default value (-1) means it's not assigned yet (thus couldn't be used).
+ static constexpr int kNoRegister = -1;
+ int arg_no_;
+ };
+
+ template <int kBits>
+ class SIMDRegister {
+ public:
+ friend class SIMDRegister<384 - kBits>;
+ constexpr SIMDRegister(int arg_no) : arg_no_(arg_no) {}
+ int arg_no() const {
+ CHECK_NE(arg_no_, kNoRegister);
+ return arg_no_;
+ }
+
+ constexpr bool operator==(const SIMDRegister& other) const {
+ return arg_no() == other.arg_no();
+ }
+ constexpr bool operator!=(const SIMDRegister& other) const {
+ return arg_no() != other.arg_no();
+ }
+
+ constexpr auto To128Bit() const {
+ return std::enable_if_t<kBits != 128, SIMDRegister<128>>{arg_no_};
+ }
+ constexpr auto To256Bit() const {
+ return std::enable_if_t<kBits != 256, SIMDRegister<256>>{arg_no_};
+ }
+
+ template <typename MacroAssembler>
+ friend const std::string ToGasArgument(const SIMDRegister& reg, MacroAssembler*) {
+ if constexpr (kBits == 128) {
+ return "%x" + std::to_string(reg.arg_no());
+ } else if constexpr (kBits == 256) {
+ return "%t" + std::to_string(reg.arg_no());
+ } else if constexpr (kBits == 512) {
+ return "%g" + std::to_string(reg.arg_no());
+ } else {
+ static_assert(kDependentValueFalse<kBits>);
+ }
+ }
+
+ private:
+ // Register number created during creation of assembler call.
+ // See arg['arm_register'] in _gen_c_intrinsic_body in gen_intrinsics.py
+ //
+ // Default value (-1) means it's not assigned yet (thus couldn't be used).
+ static constexpr int kNoRegister = -1;
+ int arg_no_;
+ };
+
+ using XMMRegister = SIMDRegister<128>;
+ using YMMRegister = SIMDRegister<256>;
+
+ struct Operand {
+ Register base = Register{Register::kNoRegister};
+ Register index = Register{Register::kNoRegister};
+ ScaleFactor scale = kTimesOne;
+ int32_t disp = 0;
+
+ template <typename MacroAssembler>
+ friend const std::string ToGasArgument(const Operand& op, MacroAssembler* as) {
+ std::string result{};
+ if (op.base.arg_no_ == Register::kNoRegister and op.index.arg_no_ == Register::kNoRegister) {
+ as->need_gpr_macroassembler_constants_ = true;
+ result =
+ std::to_string(constants_pool::GetOffset(op.disp)) + " + " +
+ ToGasArgument(
+ typename DerivedAssemblerType::RegisterDefaultBit(as->gpr_macroassembler_constants),
+ as);
+ } else if (op.base.arg_no_ == Register::kScratchPointer) {
+ CHECK(op.index.arg_no_ == Register::kNoRegister);
+ // Only support two pointers to scratch area for now.
+ if (op.disp == 0) {
+ result = '%' + std::to_string(as->gpr_macroassembler_scratch.arg_no());
+ } else if (op.disp == config::kScratchAreaSlotSize) {
+ result = '%' + std::to_string(as->gpr_macroassembler_scratch2.arg_no());
+ } else {
+ FATAL("Only two scratch registers are supported for now");
+ }
+ } else {
+ if (op.base.arg_no_ != Register::kNoRegister) {
+ result = ToGasArgument(typename DerivedAssemblerType::RegisterDefaultBit(op.base), as);
+ }
+ if (op.index.arg_no_ != Register::kNoRegister) {
+ result += ',' +
+ ToGasArgument(typename DerivedAssemblerType::RegisterDefaultBit(op.index), as) +
+ ',' + std::to_string(1 << op.scale);
+ }
+ result = '(' + result + ')';
+ if (op.disp) {
+ result = std::to_string(op.disp) + result;
+ }
+ }
+ return result;
+ }
+ };
+
+ TextAssembler(int indent, FILE* out) : indent_(indent), out_(out) {}
+
+ // These start as Register::kNoRegister but can be changed if they are used as arguments to
+ // something else.
+ // If they are not coming as arguments then using them is compile-time error!
+ Register gpr_a{Register::kNoRegister};
+ Register gpr_b{Register::kNoRegister};
+ Register gpr_c{Register::kNoRegister};
+ Register gpr_d{Register::kNoRegister};
+ // Note: stack pointer is not reflected in list of arguments, intrinsics use
+ // it implicitly.
+ Register gpr_s{Register::kStackPointer};
+ // Used in Operand as pseudo-register to temporary operand.
+ Register gpr_scratch{Register::kScratchPointer};
+
+ // In x86-64 case we could refer to kBerberisMacroAssemblerConstants via %rip.
+ // In x86-32 mode, on the other hand, we need complex dance to access it via GOT.
+ // Intrinsics which use these constants receive it via additional parameter - and
+ // we need to know if it's needed or not.
+ Register gpr_macroassembler_constants{Register::kNoRegister};
+ bool need_gpr_macroassembler_constants() const { return need_gpr_macroassembler_constants_; }
+
+ Register gpr_macroassembler_scratch{Register::kNoRegister};
+ bool need_gpr_macroassembler_scratch() const { return need_gpr_macroassembler_scratch_; }
+ Register gpr_macroassembler_scratch2{Register::kNoRegister};
+
+ bool need_aesavx = false;
+ bool need_aes = false;
+ bool need_avx = false;
+ bool need_avx2 = false;
+ bool need_bmi = false;
+ bool need_bmi2 = false;
+ bool need_clmulavx = false;
+ bool need_clmul = false;
+ bool need_f16c = false;
+ bool need_fma = false;
+ bool need_fma4 = false;
+ bool need_lzcnt = false;
+ bool need_popcnt = false;
+ bool need_sse3 = false;
+ bool need_ssse3 = false;
+ bool need_sse4_1 = false;
+ bool need_sse4_2 = false;
+ bool need_vaes = false;
+ bool need_vpclmulqd = false;
+ bool has_custom_capability = false;
+
+ void Bind(Label* label) {
+ CHECK_EQ(label->bound, false);
+ fprintf(out_, "%*s\"%zd:\\n\"\n", indent_ + 2, "", label->id);
+ label->bound = true;
+ }
+
+ Label* MakeLabel() {
+ labels_allocated_.push_back({labels_allocated_.size()});
+ return &labels_allocated_.back();
+ }
+
+ template <typename... Args>
+ void Byte(Args... args) {
+ static_assert((std::is_same_v<Args, uint8_t> && ...));
+ bool print_kwd = true;
+ fprintf(out_, "%*s\"", indent_ + 2, "");
+ (fprintf(out_, "%s%" PRIu8, print_kwd ? print_kwd = false, ".byte " : ", ", args), ...);
+ fprintf(out_, "\\n\"\n");
+ }
+
+ template <typename... Args>
+ void TwoByte(Args... args) {
+ static_assert((std::is_same_v<Args, uint16_t> && ...));
+ bool print_kwd = true;
+ fprintf(out_, "%*s\"", indent_ + 2, "");
+ (fprintf(out_, "%s%" PRIu16, print_kwd ? print_kwd = false, ".2byte " : ", ", args), ...);
+ fprintf(out_, "\\n\"\n");
+ }
+
+ template <typename... Args>
+ void FourByte(Args... args) {
+ static_assert((std::is_same_v<Args, uint32_t> && ...));
+ bool print_kwd = true;
+ fprintf(out_, "%*s\"", indent_ + 2, "");
+ (fprintf(out_, "%s%" PRIu32, print_kwd ? print_kwd = false, ".4byte " : ", ", args), ...);
+ fprintf(out_, "\\n\"\n");
+ }
+
+ template <typename... Args>
+ void EigthByte(Args... args) {
+ static_assert((std::is_same_v<Args, uint64_t> && ...));
+ bool print_kwd = true;
+ fprintf(out_, "%*s\"", indent_ + 2, "");
+ (fprintf(out_, "%s%" PRIu64, print_kwd ? print_kwd = false, ".8byte " : ", ", args), ...);
+ fprintf(out_, "\\n\"\n");
+ }
+
+ void P2Align(uint32_t m) { fprintf(out_, "%*s\".p2align %u\\n\"\n", indent_ + 2, "", m); }
+
+ // Verify CPU vendor and SSE restrictions.
+ template <typename CPUIDRestriction>
+ void CheckCPUIDRestriction() {
+ constexpr bool expect_bmi = std::is_same_v<CPUIDRestriction, intrinsics::bindings::HasBMI>;
+ constexpr bool expect_f16c = std::is_same_v<CPUIDRestriction, intrinsics::bindings::HasF16C>;
+ constexpr bool expect_fma = std::is_same_v<CPUIDRestriction, intrinsics::bindings::HasFMA>;
+ constexpr bool expect_fma4 = std::is_same_v<CPUIDRestriction, intrinsics::bindings::HasFMA4>;
+ constexpr bool expect_lzcnt = std::is_same_v<CPUIDRestriction, intrinsics::bindings::HasLZCNT>;
+ constexpr bool expect_vaes = std::is_same_v<CPUIDRestriction, intrinsics::bindings::HasVAES>;
+ constexpr bool expect_vpclmulqd =
+ std::is_same_v<CPUIDRestriction, intrinsics::bindings::HasVPCLMULQD>;
+ constexpr bool expect_aesavx =
+ std::is_same_v<CPUIDRestriction, intrinsics::bindings::HasAESAVX> || expect_vaes;
+ constexpr bool expect_aes =
+ std::is_same_v<CPUIDRestriction, intrinsics::bindings::HasAES> || expect_aesavx;
+ constexpr bool expect_clmulavx =
+ std::is_same_v<CPUIDRestriction, intrinsics::bindings::HasCLMULAVX> || expect_vpclmulqd;
+ constexpr bool expect_clmul =
+ std::is_same_v<CPUIDRestriction, intrinsics::bindings::HasCLMUL> || expect_clmulavx;
+ constexpr bool expect_popcnt =
+ std::is_same_v<CPUIDRestriction, intrinsics::bindings::HasPOPCNT>;
+ constexpr bool expect_avx = std::is_same_v<CPUIDRestriction, intrinsics::bindings::HasAVX> ||
+ expect_aesavx || expect_clmulavx || expect_f16c || expect_fma ||
+ expect_fma4;
+ constexpr bool expect_sse4_2 =
+ std::is_same_v<CPUIDRestriction, intrinsics::bindings::HasSSE4_2> || expect_aes ||
+ expect_clmul || expect_avx;
+ constexpr bool expect_sse4_1 =
+ std::is_same_v<CPUIDRestriction, intrinsics::bindings::HasSSE4_1> || expect_sse4_2;
+ constexpr bool expect_ssse3 =
+ std::is_same_v<CPUIDRestriction, intrinsics::bindings::HasSSSE3> || expect_sse4_1;
+ constexpr bool expect_sse3 =
+ std::is_same_v<CPUIDRestriction, intrinsics::bindings::HasSSE3> || expect_ssse3;
+
+ CHECK_EQ(expect_aesavx, need_aesavx);
+ CHECK_EQ(expect_aes, need_aes);
+ CHECK_EQ(expect_avx, need_avx);
+ CHECK_EQ(expect_bmi, need_bmi);
+ CHECK_EQ(expect_clmulavx, need_clmulavx);
+ CHECK_EQ(expect_clmul, need_clmul);
+ CHECK_EQ(expect_f16c, need_f16c);
+ CHECK_EQ(expect_fma, need_fma);
+ CHECK_EQ(expect_fma4, need_fma4);
+ CHECK_EQ(expect_lzcnt, need_lzcnt);
+ CHECK_EQ(expect_popcnt, need_popcnt);
+ CHECK_EQ(expect_sse3, need_sse3);
+ CHECK_EQ(expect_ssse3, need_ssse3);
+ CHECK_EQ(expect_sse4_1, need_sse4_1);
+ CHECK_EQ(expect_sse4_2, need_sse4_2);
+ CHECK_EQ(expect_vaes, need_vaes);
+ CHECK_EQ(expect_vpclmulqd, need_vpclmulqd);
+ }
+
+ // Translate CPU restrictions into string.
+ template <typename CPUIDRestriction>
+ static constexpr const char* kCPUIDRestrictionString =
+ DerivedAssemblerType::template CPUIDRestrictionToString<CPUIDRestriction>();
+
+// Instructions.
+#include "gen_text_assembler_common_x86-inl.h" // NOLINT generated file
+
+ protected:
+ template <typename CPUIDRestriction>
+ static constexpr const char* CPUIDRestrictionToString() {
+ if constexpr (std::is_same_v<CPUIDRestriction, intrinsics::bindings::NoCPUIDRestriction>) {
+ return nullptr;
+ } else if constexpr (std::is_same_v<CPUIDRestriction, intrinsics::bindings::IsAuthenticAMD>) {
+ return "host_platform::kIsAuthenticAMD";
+ } else if constexpr (std::is_same_v<CPUIDRestriction, intrinsics::bindings::HasAES>) {
+ return "host_platform::kHasAES";
+ } else if constexpr (std::is_same_v<CPUIDRestriction, intrinsics::bindings::HasAESAVX>) {
+ return "host_platform::kHasAES && host_platform::kHasAVX";
+ } else if constexpr (std::is_same_v<CPUIDRestriction, intrinsics::bindings::HasAVX>) {
+ return "host_platform::kHasAVX";
+ } else if constexpr (std::is_same_v<CPUIDRestriction, intrinsics::bindings::HasBMI>) {
+ return "host_platform::kHasBMI";
+ } else if constexpr (std::is_same_v<CPUIDRestriction, intrinsics::bindings::HasF16C>) {
+ return "host_platform::kHasF16C";
+ } else if constexpr (std::is_same_v<CPUIDRestriction, intrinsics::bindings::HasCLMUL>) {
+ return "host_platform::kHasCLMUL";
+ } else if constexpr (std::is_same_v<CPUIDRestriction, intrinsics::bindings::HasCLMULAVX>) {
+ return "host_platform::kHasCLMUL && host_platform::kHasAVX";
+ } else if constexpr (std::is_same_v<CPUIDRestriction, intrinsics::bindings::HasFMA>) {
+ return "host_platform::kHasFMA";
+ } else if constexpr (std::is_same_v<CPUIDRestriction, intrinsics::bindings::HasFMA4>) {
+ return "host_platform::kHasFMA4";
+ } else if constexpr (std::is_same_v<CPUIDRestriction, intrinsics::bindings::HasLZCNT>) {
+ return "host_platform::kHasLZCNT";
+ } else if constexpr (std::is_same_v<CPUIDRestriction, intrinsics::bindings::HasPOPCNT>) {
+ return "host_platform::kHasPOPCNT";
+ } else if constexpr (std::is_same_v<CPUIDRestriction, intrinsics::bindings::HasSSE3>) {
+ return "host_platform::kHasSSE3";
+ } else if constexpr (std::is_same_v<CPUIDRestriction, intrinsics::bindings::HasSSSE3>) {
+ return "host_platform::kHasSSSE3";
+ } else if constexpr (std::is_same_v<CPUIDRestriction, intrinsics::bindings::HasSSE4_1>) {
+ return "host_platform::kHasSSE4_1";
+ } else if constexpr (std::is_same_v<CPUIDRestriction, intrinsics::bindings::HasSSE4_2>) {
+ return "host_platform::kHasSSE4_2";
+ } else if constexpr (std::is_same_v<CPUIDRestriction, intrinsics::bindings::HasSSSE3>) {
+ return "host_platform::kHasSSSE3";
+ } else if constexpr (std::is_same_v<CPUIDRestriction, intrinsics::bindings::HasVAES>) {
+ return "host_platform::kHasVAES";
+ } else if constexpr (std::is_same_v<CPUIDRestriction, intrinsics::bindings::HasVPCLMULQD>) {
+ return "host_platform::kHasVPCLMULQD";
+ } else if constexpr (std::is_same_v<CPUIDRestriction,
+ intrinsics::bindings::HasCustomCapability>) {
+ return "host_platform::kHasCustomCapability";
+ } else {
+ static_assert(kDependentTypeFalse<CPUIDRestriction>);
+ }
+ }
+
+ bool need_gpr_macroassembler_constants_ = false;
+ bool need_gpr_macroassembler_scratch_ = false;
+
+ template <const char* kSpPrefix, char kRegisterPrefix>
+ class RegisterTemplate {
+ public:
+ explicit RegisterTemplate(Register reg) : reg_(reg) {}
+
+ template <typename MacroAssembler>
+ friend const std::string ToGasArgument(const RegisterTemplate& reg, MacroAssembler*) {
+ if (reg.reg_.arg_no() == Register::kStackPointer) {
+ return kSpPrefix;
+ } else {
+ if (kRegisterPrefix) {
+ return std::string({'%', kRegisterPrefix}) + std::to_string(reg.reg_.arg_no());
+ } else {
+ return '%' + std::to_string(reg.reg_.arg_no());
+ }
+ }
+ }
+
+ private:
+ Register reg_;
+ };
+
+ constexpr static char kSpl[] = "%%spl";
+ using Register8Bit = RegisterTemplate<kSpl, 'b'>;
+ constexpr static char kSp[] = "%%sp";
+ using Register16Bit = RegisterTemplate<kSp, 'w'>;
+ constexpr static char kEsp[] = "%%esp";
+ using Register32Bit = RegisterTemplate<kEsp, 'k'>;
+ constexpr static char kRsp[] = "%%rsp";
+ using Register64Bit = RegisterTemplate<kRsp, 'q'>;
+
+ void SetRequiredFeatureAESAVX() {
+ need_aesavx = true;
+ SetRequiredFeatureAES();
+ SetRequiredFeatureAVX();
+ }
+
+ void SetRequiredFeatureAES() {
+ need_aes = true;
+ SetRequiredFeatureSSE4_2();
+ }
+
+ void SetRequiredFeatureAVX() {
+ need_avx = true;
+ SetRequiredFeatureSSE4_2();
+ }
+
+ void SetRequiredFeatureAVX2() {
+ need_avx2 = true;
+ SetRequiredFeatureAVX();
+ }
+
+ void SetRequiredFeatureBMI() { need_bmi = true; }
+
+ void SetRequiredFeatureBMI2() { need_bmi2 = true; }
+
+ void SetRequiredFeatureCLMULAVX() {
+ need_clmulavx = true;
+ SetRequiredFeatureCLMUL();
+ SetRequiredFeatureAVX();
+ }
+
+ void SetRequiredFeatureCLMUL() {
+ need_clmul = true;
+ SetRequiredFeatureSSE4_2();
+ }
+
+ void SetRequiredFeatureF16C() {
+ need_f16c = true;
+ SetRequiredFeatureAVX();
+ }
+
+ void SetRequiredFeatureFMA() {
+ need_fma = true;
+ SetRequiredFeatureAVX();
+ }
+
+ void SetRequiredFeatureFMA4() {
+ need_fma4 = true;
+ SetRequiredFeatureAVX();
+ }
+
+ void SetRequiredFeatureLZCNT() { need_lzcnt = true; }
+
+ void SetRequiredFeaturePOPCNT() { need_popcnt = true; }
+
+ void SetRequiredFeatureSSE3() {
+ need_sse3 = true;
+ // Note: we assume that SSE2 is always available thus we don't have have_sse2 or have_sse1
+ // variables.
+ }
+
+ void SetRequiredFeatureSSSE3() {
+ need_ssse3 = true;
+ SetRequiredFeatureSSE3();
+ }
+
+ void SetRequiredFeatureSSE4_1() {
+ need_sse4_1 = true;
+ SetRequiredFeatureSSSE3();
+ }
+
+ void SetRequiredFeatureSSE4_2() {
+ need_sse4_2 = true;
+ SetRequiredFeatureSSE4_1();
+ }
+
+ void SetRequiredFeatureVAES() {
+ need_vaes = true;
+ SetRequiredFeatureAESAVX();
+ }
+
+ void SetRequiredFeatureVPCLMULQD() {
+ need_vpclmulqd = true;
+ SetRequiredFeatureCLMULAVX();
+ }
+
+ void SetHasCustomCapability() { has_custom_capability = true; }
+
+ template <typename... Args>
+ void Instruction(const char* name, Condition cond, const Args&... args);
+
+ template <typename... Args>
+ void Instruction(const char* name, const Args&... args);
+
+ void EmitString() {}
+
+ void EmitString(const std::string& s) { fprintf(out_, "%s", s.c_str()); }
+
+ template <typename... Args>
+ void EmitString(const std::string& s, const Args&... args) {
+ EmitString(args...);
+ fprintf(out_, ", %s", s.c_str());
+ }
+
+ protected:
+ int indent_;
+ FILE* out_;
+
+ private:
+ std::deque<Label> labels_allocated_;
+
+ TextAssembler() = delete;
+ TextAssembler(const TextAssembler&) = delete;
+ TextAssembler(TextAssembler&&) = delete;
+ void operator=(const TextAssembler&) = delete;
+ void operator=(TextAssembler&&) = delete;
+};
+
+template <typename Arg, typename MacroAssembler>
+inline std::string ToGasArgument(const Arg& arg, MacroAssembler*) {
+ return "$" + std::to_string(arg);
+}
+
+template <typename DerivedAssemblerType>
+template <typename... Args>
+inline void TextAssembler<DerivedAssemblerType>::Instruction(const char* name,
+ Condition cond,
+ const Args&... args) {
+ char name_with_condition[8] = {};
+ if (strcmp(name, "Cmovw") == 0 || strcmp(name, "Cmovl") == 0 || strcmp(name, "Cmovq") == 0) {
+ strcpy(name_with_condition, "Cmov");
+ } else if (strcmp(name, "Jcc") == 0) {
+ strcpy(name_with_condition, "J");
+ } else {
+ CHECK(strcmp(name, "Setcc") == 0);
+ strcpy(name_with_condition, "Set");
+ }
+ switch (cond) {
+ case Condition::kOverflow:
+ strcat(name_with_condition, "o");
+ break;
+ case Condition::kNoOverflow:
+ strcat(name_with_condition, "no");
+ break;
+ case Condition::kBelow:
+ strcat(name_with_condition, "b");
+ break;
+ case Condition::kAboveEqual:
+ strcat(name_with_condition, "ae");
+ break;
+ case Condition::kEqual:
+ strcat(name_with_condition, "e");
+ break;
+ case Condition::kNotEqual:
+ strcat(name_with_condition, "ne");
+ break;
+ case Condition::kBelowEqual:
+ strcat(name_with_condition, "be");
+ break;
+ case Condition::kAbove:
+ strcat(name_with_condition, "a");
+ break;
+ case Condition::kNegative:
+ strcat(name_with_condition, "s");
+ break;
+ case Condition::kPositiveOrZero:
+ strcat(name_with_condition, "ns");
+ break;
+ case Condition::kParityEven:
+ strcat(name_with_condition, "p");
+ break;
+ case Condition::kParityOdd:
+ strcat(name_with_condition, "np");
+ break;
+ case Condition::kLess:
+ strcat(name_with_condition, "l");
+ break;
+ case Condition::kGreaterEqual:
+ strcat(name_with_condition, "ge");
+ break;
+ case Condition::kLessEqual:
+ strcat(name_with_condition, "le");
+ break;
+ case Condition::kGreater:
+ strcat(name_with_condition, "g");
+ break;
+ }
+ Instruction(name_with_condition, args...);
+}
+
+template <typename DerivedAssemblerType>
+template <typename... Args>
+inline void TextAssembler<DerivedAssemblerType>::Instruction(const char* name,
+ const Args&... args) {
+ for (auto it : std::array<std::tuple<const char*, const char*>, 22>{
+ {// Note: SSE doesn't include simple register-to-register move instruction.
+ // You are supposed to use one of half-dozen variants depending on what you
+ // are doing.
+ //
+ // Pseudoinstructions with embedded "lock" prefix.
+ {"Lock Xaddb", "Lock; Xaddb"},
+ {"Lock Xaddw", "Lock; Xaddw"},
+ {"Lock Xaddl", "Lock; Xaddl"},
+ {"Lock Xaddq", "Lock; Xaddq"},
+ {"Lock CmpXchg8b", "Lock; CmpXchg8b"},
+ {"Lock CmpXchg16b", "Lock; CmpXchg16b"},
+ {"Lock CmpXchgb", "Lock; CmpXchgb"},
+ {"Lock CmpXchgl", "Lock; CmpXchgl"},
+ {"Lock CmpXchgq", "Lock; CmpXchgq"},
+ {"Lock CmpXchgw", "Lock; CmpXchgw"},
+ // Our assembler has Pmov instruction which is supposed to pick the best
+ // option - but currently we just map Pmov to Movaps.
+ {"Pmov", "Movaps"},
+ // These instructions use different names in our assembler than in GNU AS.
+ {"Movdq", "Movaps"},
+ {"Movsxbl", "Movsbl"},
+ {"Movsxbq", "Movsbq"},
+ {"Movsxwl", "Movswl"},
+ {"Movsxwq", "Movswq"},
+ {"Movsxlq", "Movslq"},
+ {"Movzxbl", "Movzbl"},
+ {"Movzxbq", "Movzbq"},
+ {"Movzxwl", "Movzwl"},
+ {"Movzxwq", "Movzwq"},
+ {"Movzxlq", "Movzlq"}}}) {
+ if (strcmp(name, std::get<0>(it)) == 0) {
+ name = std::get<1>(it);
+ break;
+ }
+ }
+
+ int name_length = strlen(name);
+ auto cl_register = "";
+ if (name_length > 4 && strcmp(name + (name_length - 4), "ByCl") == 0) {
+ name_length -= 4;
+ cl_register = " %%cl,";
+ }
+
+ fprintf(out_, "%*s\"%.*s%s ", indent_ + 2, "", name_length, name, cl_register);
+ EmitString(ToGasArgument(args, this)...);
+ fprintf(out_, "\\n\"\n");
+}
+
+} // namespace x86_32_and_x86_64
+
+} // namespace berberis
+
+#endif // BERBERIS_INTRINSICS_ALL_TO_X86_32_OR_x86_64_TEXT_ASSEMBLER_COMMON_H_