blob: d6f0d59bda12dd253f6a9f1d1356cf65a89ba94f [file] [log] [blame]
* Copyright (C) 2023 The Android Open Source Project
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
#include "code_generator_riscv64.h"
#include "android-base/logging.h"
#include "android-base/macros.h"
#include "arch/riscv64/jni_frame_riscv64.h"
#include "arch/riscv64/registers_riscv64.h"
#include "base/arena_containers.h"
#include "base/macros.h"
#include "class_root-inl.h"
#include "code_generator_utils.h"
#include "dwarf/register.h"
#include "gc/heap.h"
#include "gc/space/image_space.h"
#include "heap_poisoning.h"
#include "intrinsics_list.h"
#include "intrinsics_riscv64.h"
#include "jit/profiling_info.h"
#include "linker/linker_patch.h"
#include "mirror/class-inl.h"
#include "optimizing/nodes.h"
#include "optimizing/profiling_info_builder.h"
#include "runtime.h"
#include "scoped_thread_state_change-inl.h"
#include "stack_map_stream.h"
#include "trace.h"
#include "utils/label.h"
#include "utils/riscv64/assembler_riscv64.h"
#include "utils/stack_checks.h"
namespace art HIDDEN {
namespace riscv64 {
// Placeholder values embedded in instructions, patched at link time.
constexpr uint32_t kLinkTimeOffsetPlaceholderHigh = 0x12345;
constexpr uint32_t kLinkTimeOffsetPlaceholderLow = 0x678;
// Compare-and-jump packed switch generates approx. 3 + 1.5 * N 32-bit
// instructions for N cases.
// Table-based packed switch generates approx. 10 32-bit instructions
// and N 32-bit data words for N cases.
// We switch to the table-based method starting with 6 entries.
static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 6;
static constexpr XRegister kCoreCalleeSaves[] = {
// S1(TR) is excluded as the ART thread register.
S0, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, RA
static constexpr FRegister kFpuCalleeSaves[] = {
FS0, FS1, FS2, FS3, FS4, FS5, FS6, FS7, FS8, FS9, FS10, FS11
#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kRiscv64PointerSize, x).Int32Value()
Location RegisterOrZeroBitPatternLocation(HInstruction* instruction) {
return IsZeroBitPattern(instruction)
? Location::ConstantLocation(instruction)
: Location::RequiresRegister();
Location FpuRegisterOrZeroBitPatternLocation(HInstruction* instruction) {
return IsZeroBitPattern(instruction)
? Location::ConstantLocation(instruction)
: Location::RequiresFpuRegister();
XRegister InputXRegisterOrZero(Location location) {
if (location.IsConstant()) {
return Zero;
} else {
return location.AsRegister<XRegister>();
Location ValueLocationForStore(HInstruction* value) {
if (IsZeroBitPattern(value)) {
return Location::ConstantLocation(value);
} else if (DataType::IsFloatingPointType(value->GetType())) {
return Location::RequiresFpuRegister();
} else {
return Location::RequiresRegister();
Location Riscv64ReturnLocation(DataType::Type return_type) {
switch (return_type) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
case DataType::Type::kUint16:
case DataType::Type::kInt16:
case DataType::Type::kUint32:
case DataType::Type::kInt32:
case DataType::Type::kReference:
case DataType::Type::kUint64:
case DataType::Type::kInt64:
return Location::RegisterLocation(A0);
case DataType::Type::kFloat32:
case DataType::Type::kFloat64:
return Location::FpuRegisterLocation(FA0);
case DataType::Type::kVoid:
return Location::NoLocation();
static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
InvokeRuntimeCallingConvention calling_convention;
RegisterSet caller_saves = RegisterSet::Empty();
return caller_saves;
template <ClassStatus kStatus>
static constexpr int64_t ShiftedSignExtendedClassStatusValue() {
// This is used only for status values that have the highest bit set.
static_assert(CLZ(enum_cast<uint32_t>(kStatus)) == kClassStatusLsbPosition);
constexpr uint32_t kShiftedStatusValue = enum_cast<uint32_t>(kStatus) << kClassStatusLsbPosition;
static_assert(kShiftedStatusValue >= 0x80000000u);
return static_cast<int64_t>(kShiftedStatusValue) - (INT64_C(1) << 32);
// Split a 64-bit address used by JIT to the nearest 4KiB-aligned base address and a 12-bit
// signed offset. It is usually cheaper to materialize the aligned address than the full address.
std::pair<uint64_t, int32_t> SplitJitAddress(uint64_t address) {
uint64_t bits0_11 = address & UINT64_C(0xfff);
uint64_t bit11 = address & UINT64_C(0x800);
// Round the address to nearest 4KiB address because the `imm12` has range [-0x800, 0x800).
uint64_t base_address = (address & ~UINT64_C(0xfff)) + (bit11 << 1);
int32_t imm12 = dchecked_integral_cast<int32_t>(bits0_11) -
dchecked_integral_cast<int32_t>(bit11 << 1);
return {base_address, imm12};
int32_t ReadBarrierMarkEntrypointOffset(Location ref) {
int reg = ref.reg();
DCHECK(T0 <= reg && reg <= T6 && reg != TR) << reg;
// Note: Entrypoints for registers X30 (T5) and X31 (T6) are stored in entries
// for X0 (Zero) and X1 (RA) because these are not valid registers for marking
// and we currently have slots only up to register 29.
int entry_point_number = (reg >= 30) ? reg - 30 : reg;
return Thread::ReadBarrierMarkEntryPointsOffset<kRiscv64PointerSize>(entry_point_number);
Location InvokeRuntimeCallingConvention::GetReturnLocation(DataType::Type return_type) {
return Riscv64ReturnLocation(return_type);
Location InvokeDexCallingConventionVisitorRISCV64::GetReturnLocation(DataType::Type type) const {
return Riscv64ReturnLocation(type);
Location InvokeDexCallingConventionVisitorRISCV64::GetMethodLocation() const {
return Location::RegisterLocation(kArtMethodRegister);
Location InvokeDexCallingConventionVisitorRISCV64::GetNextLocation(DataType::Type type) {
Location next_location;
if (type == DataType::Type::kVoid) {
LOG(FATAL) << "Unexpected parameter type " << type;
// Note: Unlike the RISC-V C/C++ calling convention, managed ABI does not use
// GPRs to pass FP args when we run out of FPRs.
if (DataType::IsFloatingPointType(type) &&
float_index_ < calling_convention.GetNumberOfFpuRegisters()) {
next_location =
} else if (!DataType::IsFloatingPointType(type) &&
(gp_index_ < calling_convention.GetNumberOfRegisters())) {
next_location = Location::RegisterLocation(calling_convention.GetRegisterAt(gp_index_++));
} else {
size_t stack_offset = calling_convention.GetStackOffsetOf(stack_index_);
next_location = DataType::Is64BitType(type) ? Location::DoubleStackSlot(stack_offset) :
// Space on the stack is reserved for all arguments.
stack_index_ += DataType::Is64BitType(type) ? 2 : 1;
return next_location;
Location CriticalNativeCallingConventionVisitorRiscv64::GetNextLocation(DataType::Type type) {
DCHECK_NE(type, DataType::Type::kReference);
Location location = Location::NoLocation();
if (DataType::IsFloatingPointType(type)) {
if (fpr_index_ < kParameterFpuRegistersLength) {
location = Location::FpuRegisterLocation(kParameterFpuRegisters[fpr_index_]);
} else {
// Native ABI allows passing excessive FP args in GPRs. This is facilitated by
// inserting fake conversion intrinsic calls (`Double.doubleToRawLongBits()`
// or `Float.floatToRawIntBits()`) by `CriticalNativeAbiFixupRiscv64`.
// Remaining FP args shall be passed on the stack.
CHECK_EQ(gpr_index_, kRuntimeParameterCoreRegistersLength);
} else {
// Native ABI uses the same core registers as a runtime call.
if (gpr_index_ < kRuntimeParameterCoreRegistersLength) {
location = Location::RegisterLocation(kRuntimeParameterCoreRegisters[gpr_index_]);
if (location.IsInvalid()) {
// Only a `float` gets a single slot. Integral args need to be sign-extended to 64 bits.
if (type == DataType::Type::kFloat32) {
location = Location::StackSlot(stack_offset_);
} else {
location = Location::DoubleStackSlot(stack_offset_);
stack_offset_ += kFramePointerSize;
if (for_register_allocation_) {
location = Location::Any();
return location;
Location CriticalNativeCallingConventionVisitorRiscv64::GetReturnLocation(
DataType::Type type) const {
// The result is returned the same way in native ABI and managed ABI. No result conversion is
// needed, see comments in `Riscv64JniCallingConvention::RequiresSmallResultTypeExtension()`.
InvokeDexCallingConventionVisitorRISCV64 dex_calling_convention;
return dex_calling_convention.GetReturnLocation(type);
Location CriticalNativeCallingConventionVisitorRiscv64::GetMethodLocation() const {
// Pass the method in the hidden argument T0.
return Location::RegisterLocation(T0);
#define __ down_cast<CodeGeneratorRISCV64*>(codegen)->GetAssembler()-> // NOLINT
void LocationsBuilderRISCV64::HandleInvoke(HInvoke* instruction) {
InvokeDexCallingConventionVisitorRISCV64 calling_convention_visitor;
CodeGenerator::CreateCommonInvokeLocationSummary(instruction, &calling_convention_visitor);
class CompileOptimizedSlowPathRISCV64 : public SlowPathCodeRISCV64 {
CompileOptimizedSlowPathRISCV64(HSuspendCheck* suspend_check, XRegister base, int32_t imm12)
: SlowPathCodeRISCV64(suspend_check),
imm12_(imm12) {}
void EmitNativeCode(CodeGenerator* codegen) override {
uint32_t entrypoint_offset =
__ Bind(GetEntryLabel());
CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
riscv64::ScratchRegisterScope srs(riscv64_codegen->GetAssembler());
XRegister counter = srs.AllocateXRegister();
__ LoadConst32(counter, ProfilingInfo::GetOptimizeThreshold());
__ Sh(counter, base_, imm12_);
if (instruction_ != nullptr) {
// Only saves live vector regs for SIMD.
SaveLiveRegisters(codegen, instruction_->GetLocations());
__ Loadd(RA, TR, entrypoint_offset);
// Note: we don't record the call here (and therefore don't generate a stack
// map), as the entrypoint should never be suspended.
__ Jalr(RA);
if (instruction_ != nullptr) {
// Only restores live vector regs for SIMD.
RestoreLiveRegisters(codegen, instruction_->GetLocations());
__ J(GetExitLabel());
const char* GetDescription() const override { return "CompileOptimizedSlowPath"; }
XRegister base_;
const int32_t imm12_;
class SuspendCheckSlowPathRISCV64 : public SlowPathCodeRISCV64 {
SuspendCheckSlowPathRISCV64(HSuspendCheck* instruction, HBasicBlock* successor)
: SlowPathCodeRISCV64(instruction), successor_(successor) {}
void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations); // Only saves live vector registers for SIMD.
riscv64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickTestSuspend, void, void>();
RestoreLiveRegisters(codegen, locations); // Only restores live vector registers for SIMD.
if (successor_ == nullptr) {
__ J(GetReturnLabel());
} else {
__ J(riscv64_codegen->GetLabelOf(successor_));
Riscv64Label* GetReturnLabel() {
DCHECK(successor_ == nullptr);
return &return_label_;
const char* GetDescription() const override { return "SuspendCheckSlowPathRISCV64"; }
HBasicBlock* GetSuccessor() const { return successor_; }
// If not null, the block to branch to after the suspend check.
HBasicBlock* const successor_;
// If `successor_` is null, the label to branch to after the suspend check.
Riscv64Label return_label_;
class NullCheckSlowPathRISCV64 : public SlowPathCodeRISCV64 {
explicit NullCheckSlowPathRISCV64(HNullCheck* instr) : SlowPathCodeRISCV64(instr) {}
void EmitNativeCode(CodeGenerator* codegen) override {
CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
__ Bind(GetEntryLabel());
if (instruction_->CanThrowIntoCatchBlock()) {
// Live registers will be restored in the catch block if caught.
SaveLiveRegisters(codegen, instruction_->GetLocations());
kQuickThrowNullPointer, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
bool IsFatal() const override { return true; }
const char* GetDescription() const override { return "NullCheckSlowPathRISCV64"; }
class BoundsCheckSlowPathRISCV64 : public SlowPathCodeRISCV64 {
explicit BoundsCheckSlowPathRISCV64(HBoundsCheck* instruction)
: SlowPathCodeRISCV64(instruction) {}
void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
__ Bind(GetEntryLabel());
if (instruction_->CanThrowIntoCatchBlock()) {
// Live registers will be restored in the catch block if caught.
SaveLiveRegisters(codegen, instruction_->GetLocations());
// We're moving two locations to locations that could overlap, so we need a parallel
// move resolver.
InvokeRuntimeCallingConvention calling_convention;
QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt() ?
kQuickThrowStringBounds :
riscv64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
bool IsFatal() const override { return true; }
const char* GetDescription() const override { return "BoundsCheckSlowPathRISCV64"; }
class LoadClassSlowPathRISCV64 : public SlowPathCodeRISCV64 {
LoadClassSlowPathRISCV64(HLoadClass* cls, HInstruction* at) : SlowPathCodeRISCV64(at), cls_(cls) {
DCHECK(at->IsLoadClass() || at->IsClinitCheck());
DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
Location out = locations->Out();
const uint32_t dex_pc = instruction_->GetDexPc();
bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
InvokeRuntimeCallingConvention calling_convention;
if (must_resolve_type) {
DCHECK(IsSameDexFile(cls_->GetDexFile(), riscv64_codegen->GetGraph()->GetDexFile()) ||
riscv64_codegen->GetCompilerOptions().WithinOatFile(&cls_->GetDexFile()) ||
dex::TypeIndex type_index = cls_->GetTypeIndex();
__ LoadConst32(calling_convention.GetRegisterAt(0), type_index.index_);
if (cls_->NeedsAccessCheck()) {
CheckEntrypointTypes<kQuickResolveTypeAndVerifyAccess, void*, uint32_t>();
kQuickResolveTypeAndVerifyAccess, instruction_, dex_pc, this);
} else {
CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
riscv64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
// If we also must_do_clinit, the resolved type is now in the correct register.
} else {
Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
Location::RegisterLocation(calling_convention.GetRegisterAt(0)), source, cls_->GetType());
if (must_do_clinit) {
riscv64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
// Move the class to the desired location.
if (out.IsValid()) {
DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
DataType::Type type = DataType::Type::kReference;
DCHECK_EQ(type, instruction_->GetType());
riscv64_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type);
RestoreLiveRegisters(codegen, locations);
__ J(GetExitLabel());
const char* GetDescription() const override { return "LoadClassSlowPathRISCV64"; }
// The class this slow path will load.
HLoadClass* const cls_;
class DeoptimizationSlowPathRISCV64 : public SlowPathCodeRISCV64 {
explicit DeoptimizationSlowPathRISCV64(HDeoptimize* instruction)
: SlowPathCodeRISCV64(instruction) {}
void EmitNativeCode(CodeGenerator* codegen) override {
CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
__ Bind(GetEntryLabel());
LocationSummary* locations = instruction_->GetLocations();
SaveLiveRegisters(codegen, locations);
InvokeRuntimeCallingConvention calling_convention;
__ LoadConst32(calling_convention.GetRegisterAt(0),
riscv64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
const char* GetDescription() const override { return "DeoptimizationSlowPathRISCV64"; }
// Slow path generating a read barrier for a GC root.
class ReadBarrierForRootSlowPathRISCV64 : public SlowPathCodeRISCV64 {
ReadBarrierForRootSlowPathRISCV64(HInstruction* instruction, Location out, Location root)
: SlowPathCodeRISCV64(instruction), out_(out), root_(root) {
void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
DataType::Type type = DataType::Type::kReference;
XRegister reg_out = out_.AsRegister<XRegister>();
DCHECK(instruction_->IsLoadClass() ||
instruction_->IsLoadString() ||
(instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier for GC root slow path: "
<< instruction_->DebugName();
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
InvokeRuntimeCallingConvention calling_convention;
CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
riscv64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
RestoreLiveRegisters(codegen, locations);
__ J(GetExitLabel());
const char* GetDescription() const override { return "ReadBarrierForRootSlowPathRISCV64"; }
const Location out_;
const Location root_;
class MethodEntryExitHooksSlowPathRISCV64 : public SlowPathCodeRISCV64 {
explicit MethodEntryExitHooksSlowPathRISCV64(HInstruction* instruction)
: SlowPathCodeRISCV64(instruction) {}
void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
QuickEntrypointEnum entry_point =
(instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
if (instruction_->IsMethodExitHook()) {
__ Li(A4, riscv64_codegen->GetFrameSize());
riscv64_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this);
RestoreLiveRegisters(codegen, locations);
__ J(GetExitLabel());
const char* GetDescription() const override {
return "MethodEntryExitHooksSlowPathRISCV";
class ArraySetSlowPathRISCV64 : public SlowPathCodeRISCV64 {
explicit ArraySetSlowPathRISCV64(HInstruction* instruction) : SlowPathCodeRISCV64(instruction) {}
void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
InvokeRuntimeCallingConvention calling_convention;
HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
riscv64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
RestoreLiveRegisters(codegen, locations);
__ J(GetExitLabel());
const char* GetDescription() const override { return "ArraySetSlowPathRISCV64"; }
class TypeCheckSlowPathRISCV64 : public SlowPathCodeRISCV64 {
explicit TypeCheckSlowPathRISCV64(HInstruction* instruction, bool is_fatal)
: SlowPathCodeRISCV64(instruction), is_fatal_(is_fatal) {}
void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
uint32_t dex_pc = instruction_->GetDexPc();
|| !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
__ Bind(GetEntryLabel());
if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
SaveLiveRegisters(codegen, locations);
// We're moving two locations to locations that could overlap, so we need a parallel
// move resolver.
InvokeRuntimeCallingConvention calling_convention;
if (instruction_->IsInstanceOf()) {
riscv64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
DataType::Type ret_type = instruction_->GetType();
Location ret_loc = calling_convention.GetReturnLocation(ret_type);
riscv64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
} else {
riscv64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
if (!is_fatal_) {
RestoreLiveRegisters(codegen, locations);
__ J(GetExitLabel());
const char* GetDescription() const override { return "TypeCheckSlowPathRISCV64"; }
bool IsFatal() const override { return is_fatal_; }
const bool is_fatal_;
class DivZeroCheckSlowPathRISCV64 : public SlowPathCodeRISCV64 {
explicit DivZeroCheckSlowPathRISCV64(HDivZeroCheck* instruction)
: SlowPathCodeRISCV64(instruction) {}
void EmitNativeCode(CodeGenerator* codegen) override {
CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
__ Bind(GetEntryLabel());
kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
bool IsFatal() const override { return true; }
const char* GetDescription() const override { return "DivZeroCheckSlowPathRISCV64"; }
class ReadBarrierMarkSlowPathRISCV64 : public SlowPathCodeRISCV64 {
ReadBarrierMarkSlowPathRISCV64(HInstruction* instruction, Location ref, Location entrypoint)
: SlowPathCodeRISCV64(instruction), ref_(ref), entrypoint_(entrypoint) {
const char* GetDescription() const override { return "ReadBarrierMarkSlowPathRISCV64"; }
void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
XRegister ref_reg = ref_.AsRegister<XRegister>();
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
DCHECK(instruction_->IsInstanceFieldGet() ||
instruction_->IsStaticFieldGet() ||
instruction_->IsArrayGet() ||
instruction_->IsArraySet() ||
instruction_->IsLoadClass() ||
instruction_->IsLoadString() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
(instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier marking slow path: "
<< instruction_->DebugName();
__ Bind(GetEntryLabel());
// No need to save live registers; it's taken care of by the
// entrypoint. Also, there is no need to update the stack mask,
// as this runtime call will not trigger a garbage collection.
CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
DCHECK(ref_reg >= T0 && ref_reg != TR);
// "Compact" slow path, saving two moves.
// Instead of using the standard runtime calling convention (input
// and output in A0 and V0 respectively):
// A0 <- ref
// V0 <- ReadBarrierMark(A0)
// ref <- V0
// we just use rX (the register containing `ref`) as input and output
// of a dedicated entrypoint:
// rX <- ReadBarrierMarkRegX(rX)
riscv64_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
DCHECK_NE(entrypoint_.AsRegister<XRegister>(), TMP); // A taken branch can clobber `TMP`.
__ Jalr(entrypoint_.AsRegister<XRegister>()); // Clobbers `RA` (used as the `entrypoint_`).
__ J(GetExitLabel());
// The location (register) of the marked object reference.
const Location ref_;
// The location of the already loaded entrypoint.
const Location entrypoint_;
class LoadStringSlowPathRISCV64 : public SlowPathCodeRISCV64 {
explicit LoadStringSlowPathRISCV64(HLoadString* instruction)
: SlowPathCodeRISCV64(instruction) {}
void EmitNativeCode(CodeGenerator* codegen) override {
DCHECK_EQ(instruction_->AsLoadString()->GetLoadKind(), HLoadString::LoadKind::kBssEntry);
LocationSummary* locations = instruction_->GetLocations();
const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
InvokeRuntimeCallingConvention calling_convention;
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
__ LoadConst32(calling_convention.GetRegisterAt(0), string_index.index_);
kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
DataType::Type type = DataType::Type::kReference;
DCHECK_EQ(type, instruction_->GetType());
locations->Out(), calling_convention.GetReturnLocation(type), type);
RestoreLiveRegisters(codegen, locations);
__ J(GetExitLabel());
const char* GetDescription() const override { return "LoadStringSlowPathRISCV64"; }
#undef __
#define __ down_cast<Riscv64Assembler*>(GetAssembler())-> // NOLINT
template <typename Reg,
void (Riscv64Assembler::*opS)(Reg, FRegister, FRegister),
void (Riscv64Assembler::*opD)(Reg, FRegister, FRegister)>
inline void InstructionCodeGeneratorRISCV64::FpBinOp(
Reg rd, FRegister rs1, FRegister rs2, DataType::Type type) {
Riscv64Assembler* assembler = down_cast<CodeGeneratorRISCV64*>(codegen_)->GetAssembler();
if (type == DataType::Type::kFloat32) {
(assembler->*opS)(rd, rs1, rs2);
} else {
DCHECK_EQ(type, DataType::Type::kFloat64);
(assembler->*opD)(rd, rs1, rs2);
void InstructionCodeGeneratorRISCV64::FAdd(
FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) {
FpBinOp<FRegister, &Riscv64Assembler::FAddS, &Riscv64Assembler::FAddD>(rd, rs1, rs2, type);
inline void InstructionCodeGeneratorRISCV64::FSub(
FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) {
FpBinOp<FRegister, &Riscv64Assembler::FSubS, &Riscv64Assembler::FSubD>(rd, rs1, rs2, type);
inline void InstructionCodeGeneratorRISCV64::FDiv(
FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) {
FpBinOp<FRegister, &Riscv64Assembler::FDivS, &Riscv64Assembler::FDivD>(rd, rs1, rs2, type);
inline void InstructionCodeGeneratorRISCV64::FMul(
FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) {
FpBinOp<FRegister, &Riscv64Assembler::FMulS, &Riscv64Assembler::FMulD>(rd, rs1, rs2, type);
inline void InstructionCodeGeneratorRISCV64::FMin(
FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) {
FpBinOp<FRegister, &Riscv64Assembler::FMinS, &Riscv64Assembler::FMinD>(rd, rs1, rs2, type);
inline void InstructionCodeGeneratorRISCV64::FMax(
FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) {
FpBinOp<FRegister, &Riscv64Assembler::FMaxS, &Riscv64Assembler::FMaxD>(rd, rs1, rs2, type);
inline void InstructionCodeGeneratorRISCV64::FEq(
XRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) {
FpBinOp<XRegister, &Riscv64Assembler::FEqS, &Riscv64Assembler::FEqD>(rd, rs1, rs2, type);
inline void InstructionCodeGeneratorRISCV64::FLt(
XRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) {
FpBinOp<XRegister, &Riscv64Assembler::FLtS, &Riscv64Assembler::FLtD>(rd, rs1, rs2, type);
inline void InstructionCodeGeneratorRISCV64::FLe(
XRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) {
FpBinOp<XRegister, &Riscv64Assembler::FLeS, &Riscv64Assembler::FLeD>(rd, rs1, rs2, type);
template <typename Reg,
void (Riscv64Assembler::*opS)(Reg, FRegister),
void (Riscv64Assembler::*opD)(Reg, FRegister)>
inline void InstructionCodeGeneratorRISCV64::FpUnOp(
Reg rd, FRegister rs1, DataType::Type type) {
Riscv64Assembler* assembler = down_cast<CodeGeneratorRISCV64*>(codegen_)->GetAssembler();
if (type == DataType::Type::kFloat32) {
(assembler->*opS)(rd, rs1);
} else {
DCHECK_EQ(type, DataType::Type::kFloat64);
(assembler->*opD)(rd, rs1);
inline void InstructionCodeGeneratorRISCV64::FAbs(
FRegister rd, FRegister rs1, DataType::Type type) {
FpUnOp<FRegister, &Riscv64Assembler::FAbsS, &Riscv64Assembler::FAbsD>(rd, rs1, type);
inline void InstructionCodeGeneratorRISCV64::FNeg(
FRegister rd, FRegister rs1, DataType::Type type) {
FpUnOp<FRegister, &Riscv64Assembler::FNegS, &Riscv64Assembler::FNegD>(rd, rs1, type);
inline void InstructionCodeGeneratorRISCV64::FMv(
FRegister rd, FRegister rs1, DataType::Type type) {
FpUnOp<FRegister, &Riscv64Assembler::FMvS, &Riscv64Assembler::FMvD>(rd, rs1, type);
inline void InstructionCodeGeneratorRISCV64::FMvX(
XRegister rd, FRegister rs1, DataType::Type type) {
FpUnOp<XRegister, &Riscv64Assembler::FMvXW, &Riscv64Assembler::FMvXD>(rd, rs1, type);
void InstructionCodeGeneratorRISCV64::FClass(
XRegister rd, FRegister rs1, DataType::Type type) {
FpUnOp<XRegister, &Riscv64Assembler::FClassS, &Riscv64Assembler::FClassD>(rd, rs1, type);
void InstructionCodeGeneratorRISCV64::Load(
Location out, XRegister rs1, int32_t offset, DataType::Type type) {
switch (type) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
__ Loadbu(out.AsRegister<XRegister>(), rs1, offset);
case DataType::Type::kInt8:
__ Loadb(out.AsRegister<XRegister>(), rs1, offset);
case DataType::Type::kUint16:
__ Loadhu(out.AsRegister<XRegister>(), rs1, offset);
case DataType::Type::kInt16:
__ Loadh(out.AsRegister<XRegister>(), rs1, offset);
case DataType::Type::kInt32:
__ Loadw(out.AsRegister<XRegister>(), rs1, offset);
case DataType::Type::kInt64:
__ Loadd(out.AsRegister<XRegister>(), rs1, offset);
case DataType::Type::kReference:
__ Loadwu(out.AsRegister<XRegister>(), rs1, offset);
case DataType::Type::kFloat32:
__ FLoadw(out.AsFpuRegister<FRegister>(), rs1, offset);
case DataType::Type::kFloat64:
__ FLoadd(out.AsFpuRegister<FRegister>(), rs1, offset);
case DataType::Type::kUint32:
case DataType::Type::kUint64:
case DataType::Type::kVoid:
LOG(FATAL) << "Unreachable type " << type;
void InstructionCodeGeneratorRISCV64::Store(
Location value, XRegister rs1, int32_t offset, DataType::Type type) {
DCHECK_IMPLIES(value.IsConstant(), IsZeroBitPattern(value.GetConstant()));
if (kPoisonHeapReferences && type == DataType::Type::kReference && !value.IsConstant()) {
riscv64::ScratchRegisterScope srs(GetAssembler());
XRegister tmp = srs.AllocateXRegister();
__ Mv(tmp, value.AsRegister<XRegister>());
__ Storew(tmp, rs1, offset);
switch (type) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
__ Storeb(InputXRegisterOrZero(value), rs1, offset);
case DataType::Type::kUint16:
case DataType::Type::kInt16:
__ Storeh(InputXRegisterOrZero(value), rs1, offset);
case DataType::Type::kFloat32:
if (!value.IsConstant()) {
__ FStorew(value.AsFpuRegister<FRegister>(), rs1, offset);
case DataType::Type::kInt32:
case DataType::Type::kReference:
__ Storew(InputXRegisterOrZero(value), rs1, offset);
case DataType::Type::kFloat64:
if (!value.IsConstant()) {
__ FStored(value.AsFpuRegister<FRegister>(), rs1, offset);
case DataType::Type::kInt64:
__ Stored(InputXRegisterOrZero(value), rs1, offset);
case DataType::Type::kUint32:
case DataType::Type::kUint64:
case DataType::Type::kVoid:
LOG(FATAL) << "Unreachable type " << type;
void InstructionCodeGeneratorRISCV64::StoreSeqCst(Location value,
XRegister rs1,
int32_t offset,
DataType::Type type,
HInstruction* instruction) {
if (DataType::Size(type) >= 4u) {
// Use AMOSWAP for 32-bit and 64-bit data types.
ScratchRegisterScope srs(GetAssembler());
XRegister swap_src = kNoXRegister;
if (kPoisonHeapReferences && type == DataType::Type::kReference && !value.IsConstant()) {
swap_src = srs.AllocateXRegister();
__ Mv(swap_src, value.AsRegister<XRegister>());
} else if (DataType::IsFloatingPointType(type) && !value.IsConstant()) {
swap_src = srs.AllocateXRegister();
FMvX(swap_src, value.AsFpuRegister<FRegister>(), type);
} else {
swap_src = InputXRegisterOrZero(value);
XRegister addr = rs1;
if (offset != 0) {
addr = srs.AllocateXRegister();
__ AddConst64(addr, rs1, offset);
if (DataType::Is64BitType(type)) {
__ AmoSwapD(Zero, swap_src, addr, AqRl::kRelease);
} else {
__ AmoSwapW(Zero, swap_src, addr, AqRl::kRelease);
if (instruction != nullptr) {
} else {
// Use fences for smaller data types.
Store(value, rs1, offset, type);
if (instruction != nullptr) {
void InstructionCodeGeneratorRISCV64::ShNAdd(
XRegister rd, XRegister rs1, XRegister rs2, DataType::Type type) {
switch (type) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
DCHECK_EQ(DataType::SizeShift(type), 0u);
__ Add(rd, rs1, rs2);
case DataType::Type::kUint16:
case DataType::Type::kInt16:
DCHECK_EQ(DataType::SizeShift(type), 1u);
__ Sh1Add(rd, rs1, rs2);
case DataType::Type::kInt32:
case DataType::Type::kReference:
case DataType::Type::kFloat32:
DCHECK_EQ(DataType::SizeShift(type), 2u);
__ Sh2Add(rd, rs1, rs2);
case DataType::Type::kInt64:
case DataType::Type::kFloat64:
DCHECK_EQ(DataType::SizeShift(type), 3u);
__ Sh3Add(rd, rs1, rs2);
case DataType::Type::kUint32:
case DataType::Type::kUint64:
case DataType::Type::kVoid:
LOG(FATAL) << "Unreachable type " << type;
Riscv64Assembler* ParallelMoveResolverRISCV64::GetAssembler() const {
return codegen_->GetAssembler();
void ParallelMoveResolverRISCV64::EmitMove(size_t index) {
MoveOperands* move = moves_[index];
codegen_->MoveLocation(move->GetDestination(), move->GetSource(), move->GetType());
void ParallelMoveResolverRISCV64::EmitSwap(size_t index) {
MoveOperands* move = moves_[index];
codegen_->SwapLocations(move->GetDestination(), move->GetSource(), move->GetType());
void ParallelMoveResolverRISCV64::SpillScratch([[maybe_unused]] int reg) {
LOG(FATAL) << "Unimplemented";
void ParallelMoveResolverRISCV64::RestoreScratch([[maybe_unused]] int reg) {
LOG(FATAL) << "Unimplemented";
void ParallelMoveResolverRISCV64::Exchange(int index1, int index2, bool double_slot) {
// We have 2 scratch X registers and 1 scratch F register that we can use. We prefer
// to use X registers for the swap but if both offsets are too big, we need to reserve
// one of the X registers for address adjustment and use an F register.
bool use_fp_tmp2 = false;
if (!IsInt<12>(index2)) {
if (!IsInt<12>(index1)) {
use_fp_tmp2 = true;
} else {
std::swap(index1, index2);
DCHECK_IMPLIES(!IsInt<12>(index2), use_fp_tmp2);
Location loc1(double_slot ? Location::DoubleStackSlot(index1) : Location::StackSlot(index1));
Location loc2(double_slot ? Location::DoubleStackSlot(index2) : Location::StackSlot(index2));
riscv64::ScratchRegisterScope srs(GetAssembler());
Location tmp = Location::RegisterLocation(srs.AllocateXRegister());
DataType::Type tmp_type = double_slot ? DataType::Type::kInt64 : DataType::Type::kInt32;
Location tmp2 = use_fp_tmp2
? Location::FpuRegisterLocation(srs.AllocateFRegister())
: Location::RegisterLocation(srs.AllocateXRegister());
DataType::Type tmp2_type = use_fp_tmp2
? (double_slot ? DataType::Type::kFloat64 : DataType::Type::kFloat32)
: tmp_type;
codegen_->MoveLocation(tmp, loc1, tmp_type);
codegen_->MoveLocation(tmp2, loc2, tmp2_type);
if (use_fp_tmp2) {
codegen_->MoveLocation(loc2, tmp, tmp_type);
} else {
// We cannot use `Stored()` or `Storew()` via `MoveLocation()` because we have
// no more scratch registers available. Use `Sd()` or `Sw()` explicitly.
if (double_slot) {
__ Sd(tmp.AsRegister<XRegister>(), SP, index2);
} else {
__ Sw(tmp.AsRegister<XRegister>(), SP, index2);
srs.FreeXRegister(tmp.AsRegister<XRegister>()); // Free a temporary for `MoveLocation()`.
codegen_->MoveLocation(loc1, tmp2, tmp2_type);
InstructionCodeGeneratorRISCV64::InstructionCodeGeneratorRISCV64(HGraph* graph,
CodeGeneratorRISCV64* codegen)
: InstructionCodeGenerator(graph, codegen),
codegen_(codegen) {}
void InstructionCodeGeneratorRISCV64::GenerateClassInitializationCheck(
SlowPathCodeRISCV64* slow_path, XRegister class_reg) {
ScratchRegisterScope srs(GetAssembler());
XRegister tmp = srs.AllocateXRegister();
XRegister tmp2 = srs.AllocateXRegister();
// We shall load the full 32-bit status word with sign-extension and compare as unsigned
// to a sign-extended shifted status value. This yields the same comparison as loading and
// materializing unsigned but the constant is materialized with a single LUI instruction.
__ Loadw(tmp, class_reg, mirror::Class::StatusOffset().SizeValue()); // Sign-extended.
__ Li(tmp2, ShiftedSignExtendedClassStatusValue<ClassStatus::kVisiblyInitialized>());
__ Bltu(tmp, tmp2, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
void InstructionCodeGeneratorRISCV64::GenerateBitstringTypeCheckCompare(
HTypeCheckInstruction* instruction, XRegister temp) {
LOG(FATAL) << "Unimplemented";
void InstructionCodeGeneratorRISCV64::GenerateSuspendCheck(HSuspendCheck* instruction,
HBasicBlock* successor) {
if (instruction->IsNoOp()) {
if (successor != nullptr) {
__ J(codegen_->GetLabelOf(successor));
if (codegen_->CanUseImplicitSuspendCheck()) {
LOG(FATAL) << "Unimplemented ImplicitSuspendCheck";
SuspendCheckSlowPathRISCV64* slow_path =
if (slow_path == nullptr) {
slow_path =
new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathRISCV64(instruction, successor);
if (successor != nullptr) {
} else {
DCHECK_EQ(slow_path->GetSuccessor(), successor);
ScratchRegisterScope srs(GetAssembler());
XRegister tmp = srs.AllocateXRegister();
__ Loadw(tmp, TR, Thread::ThreadFlagsOffset<kRiscv64PointerSize>().Int32Value());
static_assert(Thread::SuspendOrCheckpointRequestFlags() != std::numeric_limits<uint32_t>::max());
static_assert(IsPowerOfTwo(Thread::SuspendOrCheckpointRequestFlags() + 1u));
// Shift out other bits. Use an instruction that can be 16-bit with the "C" Standard Extension.
__ Slli(tmp, tmp, CLZ(static_cast<uint64_t>(Thread::SuspendOrCheckpointRequestFlags())));
if (successor == nullptr) {
__ Bnez(tmp, slow_path->GetEntryLabel());
__ Bind(slow_path->GetReturnLabel());
} else {
__ Beqz(tmp, codegen_->GetLabelOf(successor));
__ J(slow_path->GetEntryLabel());
// slow_path will return to GetLabelOf(successor).
void InstructionCodeGeneratorRISCV64::GenerateReferenceLoadOneRegister(
HInstruction* instruction,
Location out,
uint32_t offset,
Location maybe_temp,
ReadBarrierOption read_barrier_option) {
XRegister out_reg = out.AsRegister<XRegister>();
if (read_barrier_option == kWithReadBarrier) {
if (kUseBakerReadBarrier) {
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(out + offset)
/* needs_null_check= */ false);
} else {
// Load with slow path based read barrier.
// Save the value of `out` into `maybe_temp` before overwriting it
// in the following move operation, as we will need it for the
// read barrier below.
__ Mv(maybe_temp.AsRegister<XRegister>(), out_reg);
// /* HeapReference<Object> */ out = *(out + offset)
__ Loadwu(out_reg, out_reg, offset);
codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
} else {
// Plain load with no read barrier.
// /* HeapReference<Object> */ out = *(out + offset)
__ Loadwu(out_reg, out_reg, offset);
void InstructionCodeGeneratorRISCV64::GenerateReferenceLoadTwoRegisters(
HInstruction* instruction,
Location out,
Location obj,
uint32_t offset,
Location maybe_temp,
ReadBarrierOption read_barrier_option) {
XRegister out_reg = out.AsRegister<XRegister>();
XRegister obj_reg = obj.AsRegister<XRegister>();
if (read_barrier_option == kWithReadBarrier) {
if (kUseBakerReadBarrier) {
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(obj + offset)
/* needs_null_check= */ false);
} else {
// Load with slow path based read barrier.
// /* HeapReference<Object> */ out = *(obj + offset)
__ Loadwu(out_reg, obj_reg, offset);
codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
} else {
// Plain load with no read barrier.
// /* HeapReference<Object> */ out = *(obj + offset)
__ Loadwu(out_reg, obj_reg, offset);
SlowPathCodeRISCV64* CodeGeneratorRISCV64::AddGcRootBakerBarrierBarrierSlowPath(
HInstruction* instruction, Location root, Location temp) {
SlowPathCodeRISCV64* slow_path =
new (GetScopedAllocator()) ReadBarrierMarkSlowPathRISCV64(instruction, root, temp);
return slow_path;
void CodeGeneratorRISCV64::EmitBakerReadBarierMarkingCheck(
SlowPathCodeRISCV64* slow_path, Location root, Location temp) {
const int32_t entry_point_offset = ReadBarrierMarkEntrypointOffset(root);
// Loading the entrypoint does not require a load acquire since it is only changed when
// threads are suspended or running a checkpoint.
__ Loadd(temp.AsRegister<XRegister>(), TR, entry_point_offset);
__ Bnez(temp.AsRegister<XRegister>(), slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
void CodeGeneratorRISCV64::GenerateGcRootFieldLoad(HInstruction* instruction,
Location root,
XRegister obj,
uint32_t offset,
ReadBarrierOption read_barrier_option,
Riscv64Label* label_low) {
DCHECK_IMPLIES(label_low != nullptr, offset == kLinkTimeOffsetPlaceholderLow) << offset;
XRegister root_reg = root.AsRegister<XRegister>();
if (read_barrier_option == kWithReadBarrier) {
if (kUseBakerReadBarrier) {
// Note that we do not actually check the value of `GetIsGcMarking()`
// to decide whether to mark the loaded GC root or not. Instead, we
// load into `temp` (T6) the read barrier mark entry point corresponding
// to register `root`. If `temp` is null, it means that `GetIsGcMarking()`
// is false, and vice versa.
// GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
// temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
// if (temp != null) {
// root = temp(root)
// }
// TODO(riscv64): Introduce a "marking register" that holds the pointer to one of the
// register marking entrypoints if marking (null if not marking) and make sure that
// marking entrypoints for other registers are at known offsets, so that we can call
// them using the "marking register" plus the offset embedded in the JALR instruction.
if (label_low != nullptr) {
__ Bind(label_low);
// /* GcRoot<mirror::Object> */ root = *(obj + offset)
__ Loadwu(root_reg, obj, offset);
sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
"art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
"have different sizes.");
static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
"art::mirror::CompressedReference<mirror::Object> and int32_t "
"have different sizes.");
// Use RA as temp. It is clobbered in the slow path anyway.
Location temp = Location::RegisterLocation(RA);
SlowPathCodeRISCV64* slow_path =
AddGcRootBakerBarrierBarrierSlowPath(instruction, root, temp);
EmitBakerReadBarierMarkingCheck(slow_path, root, temp);
} else {
// GC root loaded through a slow path for read barriers other
// than Baker's.
// /* GcRoot<mirror::Object>* */ root = obj + offset
if (label_low != nullptr) {
__ Bind(label_low);
__ AddConst32(root_reg, obj, offset);
// /* mirror::Object* */ root = root->Read()
GenerateReadBarrierForRootSlow(instruction, root, root);
} else {
// Plain GC root load with no read barrier.
// /* GcRoot<mirror::Object> */ root = *(obj + offset)
if (label_low != nullptr) {
__ Bind(label_low);
__ Loadwu(root_reg, obj, offset);
// Note that GC roots are not affected by heap poisoning, thus we
// do not have to unpoison `root_reg` here.
void InstructionCodeGeneratorRISCV64::GenerateTestAndBranch(HInstruction* instruction,
size_t condition_input_index,
Riscv64Label* true_target,
Riscv64Label* false_target) {
HInstruction* cond = instruction->InputAt(condition_input_index);
if (true_target == nullptr && false_target == nullptr) {
// Nothing to do. The code always falls through.
} else if (cond->IsIntConstant()) {
// Constant condition, statically compared against "true" (integer value 1).
if (cond->AsIntConstant()->IsTrue()) {
if (true_target != nullptr) {
__ J(true_target);
} else {
DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
if (false_target != nullptr) {
__ J(false_target);
// The following code generates these patterns:
// (1) true_target == nullptr && false_target != nullptr
// - opposite condition true => branch to false_target
// (2) true_target != nullptr && false_target == nullptr
// - condition true => branch to true_target
// (3) true_target != nullptr && false_target != nullptr
// - condition true => branch to true_target
// - branch to false_target
if (IsBooleanValueOrMaterializedCondition(cond)) {
// The condition instruction has been materialized, compare the output to 0.
Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
if (true_target == nullptr) {
__ Beqz(cond_val.AsRegister<XRegister>(), false_target);
} else {
__ Bnez(cond_val.AsRegister<XRegister>(), true_target);
} else {
// The condition instruction has not been materialized, use its inputs as
// the comparison and its condition as the branch condition.
HCondition* condition = cond->AsCondition();
DataType::Type type = condition->InputAt(0)->GetType();
LocationSummary* locations = condition->GetLocations();
IfCondition if_cond = condition->GetCondition();
Riscv64Label* branch_target = true_target;
if (true_target == nullptr) {
if_cond = condition->GetOppositeCondition();
branch_target = false_target;
switch (type) {
case DataType::Type::kFloat32:
case DataType::Type::kFloat64:
GenerateFpCondition(if_cond, condition->IsGtBias(), type, locations, branch_target);
// Integral types and reference equality.
GenerateIntLongCompareAndBranch(if_cond, locations, branch_target);
// If neither branch falls through (case 3), the conditional branch to `true_target`
// was already emitted (case 2) and we need to emit a jump to `false_target`.
if (true_target != nullptr && false_target != nullptr) {
__ J(false_target);
void InstructionCodeGeneratorRISCV64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
DCHECK(instruction->IsDiv() || instruction->IsRem());
DataType::Type type = instruction->GetResultType();
LocationSummary* locations = instruction->GetLocations();
Location second = locations->InAt(1);
XRegister out = locations->Out().AsRegister<XRegister>();
XRegister dividend = locations->InAt(0).AsRegister<XRegister>();
int64_t imm = Int64FromConstant(second.GetConstant());
DCHECK(imm == 1 || imm == -1);
if (instruction->IsRem()) {
__ Mv(out, Zero);
} else {
if (imm == -1) {
if (type == DataType::Type::kInt32) {
__ Subw(out, Zero, dividend);
} else {
DCHECK_EQ(type, DataType::Type::kInt64);
__ Sub(out, Zero, dividend);
} else if (out != dividend) {
__ Mv(out, dividend);
void InstructionCodeGeneratorRISCV64::DivRemByPowerOfTwo(HBinaryOperation* instruction) {
DCHECK(instruction->IsDiv() || instruction->IsRem());
DataType::Type type = instruction->GetResultType();
DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64) << type;
LocationSummary* locations = instruction->GetLocations();
Location second = locations->InAt(1);
XRegister out = locations->Out().AsRegister<XRegister>();
XRegister dividend = locations->InAt(0).AsRegister<XRegister>();
int64_t imm = Int64FromConstant(second.GetConstant());
int64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm));
int ctz_imm = CTZ(abs_imm);
DCHECK_GE(ctz_imm, 1); // Division by +/-1 is handled by `DivRemOneOrMinusOne()`.
ScratchRegisterScope srs(GetAssembler());
XRegister tmp = srs.AllocateXRegister();
// Calculate the negative dividend adjustment `tmp = dividend < 0 ? abs_imm - 1 : 0`.
// This adjustment is needed for rounding the division result towards zero.
if (type == DataType::Type::kInt32 || ctz_imm == 1) {
// A 32-bit dividend is sign-extended to 64-bit, so we can use the upper bits.
// And for a 64-bit division by +/-2, we need just the sign bit.
DCHECK_IMPLIES(type == DataType::Type::kInt32, ctz_imm < 32);
__ Srli(tmp, dividend, 64 - ctz_imm);
} else {
// For other 64-bit divisions, we need to replicate the sign bit.
__ Srai(tmp, dividend, 63);
__ Srli(tmp, tmp, 64 - ctz_imm);
// The rest of the calculation can use 64-bit operations even for 32-bit div/rem.
__ Add(tmp, tmp, dividend);
if (instruction->IsDiv()) {
__ Srai(out, tmp, ctz_imm);
if (imm < 0) {
__ Neg(out, out);
} else {
if (ctz_imm <= 11) {
__ Andi(tmp, tmp, -abs_imm);
} else {
ScratchRegisterScope srs2(GetAssembler());
XRegister tmp2 = srs2.AllocateXRegister();
__ Li(tmp2, -abs_imm);
__ And(tmp, tmp, tmp2);
__ Sub(out, dividend, tmp);
void InstructionCodeGeneratorRISCV64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
DCHECK(instruction->IsDiv() || instruction->IsRem());
LocationSummary* locations = instruction->GetLocations();
XRegister dividend = locations->InAt(0).AsRegister<XRegister>();
XRegister out = locations->Out().AsRegister<XRegister>();
Location second = locations->InAt(1);
int64_t imm = Int64FromConstant(second.GetConstant());
DataType::Type type = instruction->GetResultType();
ScratchRegisterScope srs(GetAssembler());
XRegister tmp = srs.AllocateXRegister();
// TODO: optimize with constant.
__ LoadConst64(tmp, imm);
if (instruction->IsDiv()) {
if (type == DataType::Type::kInt32) {
__ Divw(out, dividend, tmp);
} else {
__ Div(out, dividend, tmp);
} else {
if (type == DataType::Type::kInt32) {
__ Remw(out, dividend, tmp);
} else {
__ Rem(out, dividend, tmp);
void InstructionCodeGeneratorRISCV64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
DCHECK(instruction->IsDiv() || instruction->IsRem());
DataType::Type type = instruction->GetResultType();
DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64) << type;
LocationSummary* locations = instruction->GetLocations();
XRegister out = locations->Out().AsRegister<XRegister>();
Location second = locations->InAt(1);
if (second.IsConstant()) {
int64_t imm = Int64FromConstant(second.GetConstant());
if (imm == 0) {
// Do not generate anything. DivZeroCheck would prevent any code to be executed.
} else if (imm == 1 || imm == -1) {
} else if (IsPowerOfTwo(AbsOrMin(imm))) {
} else {
DCHECK(imm <= -2 || imm >= 2);
} else {
XRegister dividend = locations->InAt(0).AsRegister<XRegister>();
XRegister divisor = second.AsRegister<XRegister>();
if (instruction->IsDiv()) {
if (type == DataType::Type::kInt32) {
__ Divw(out, dividend, divisor);
} else {
__ Div(out, dividend, divisor);
} else {
if (type == DataType::Type::kInt32) {
__ Remw(out, dividend, divisor);
} else {
__ Rem(out, dividend, divisor);
void InstructionCodeGeneratorRISCV64::GenerateIntLongCondition(IfCondition cond,
LocationSummary* locations) {
XRegister rd = locations->Out().AsRegister<XRegister>();
GenerateIntLongCondition(cond, locations, rd, /*to_all_bits=*/ false);
void InstructionCodeGeneratorRISCV64::GenerateIntLongCondition(IfCondition cond,
LocationSummary* locations,
XRegister rd,
bool to_all_bits) {
XRegister rs1 = locations->InAt(0).AsRegister<XRegister>();
Location rs2_location = locations->InAt(1);
bool use_imm = rs2_location.IsConstant();
int64_t imm = use_imm ? CodeGenerator::GetInt64ValueOf(rs2_location.GetConstant()) : 0;
XRegister rs2 = use_imm ? kNoXRegister : rs2_location.AsRegister<XRegister>();
bool reverse_condition = false;
switch (cond) {
case kCondEQ:
case kCondNE:
if (!use_imm) {
__ Sub(rd, rs1, rs2); // SUB is OK here even for 32-bit comparison.
} else if (imm != 0) {
__ Addi(rd, rs1, -imm); // ADDI is OK here even for 32-bit comparison.
} // else test `rs1` directly without subtraction for `use_imm && imm == 0`.
if (cond == kCondEQ) {
__ Seqz(rd, (use_imm && imm == 0) ? rs1 : rd);
} else {
__ Snez(rd, (use_imm && imm == 0) ? rs1 : rd);
case kCondLT:
case kCondGE:
if (use_imm) {
__ Slti(rd, rs1, imm);
} else {
__ Slt(rd, rs1, rs2);
// Calculate `rs1 >= rhs` as `!(rs1 < rhs)` since there's only the SLT but no SGE.
reverse_condition = (cond == kCondGE);
case kCondLE:
case kCondGT:
if (use_imm) {
// Calculate `rs1 <= imm` as `rs1 < imm + 1`.
DCHECK(IsInt<12>(imm + 1)); // The value that overflows would fail this check.
__ Slti(rd, rs1, imm + 1);
} else {
__ Slt(rd, rs2, rs1);
// Calculate `rs1 > imm` as `!(rs1 < imm + 1)` and calculate
// `rs1 <= rs2` as `!(rs2 < rs1)` since there's only the SLT but no SGE.
reverse_condition = ((cond == kCondGT) == use_imm);
case kCondB:
case kCondAE:
if (use_imm) {
// Sltiu sign-extends its 12-bit immediate operand before the comparison
// and thus lets us compare directly with unsigned values in the ranges
// [0, 0x7ff] and [0x[ffffffff]fffff800, 0x[ffffffff]ffffffff].
__ Sltiu(rd, rs1, imm);
} else {
__ Sltu(rd, rs1, rs2);
// Calculate `rs1 AE rhs` as `!(rs1 B rhs)` since there's only the SLTU but no SGEU.
reverse_condition = (cond == kCondAE);
case kCondBE:
case kCondA:
if (use_imm) {
// Calculate `rs1 BE imm` as `rs1 B imm + 1`.
// Sltiu sign-extends its 12-bit immediate operand before the comparison
// and thus lets us compare directly with unsigned values in the ranges
// [0, 0x7ff] and [0x[ffffffff]fffff800, 0x[ffffffff]ffffffff].
DCHECK(IsInt<12>(imm + 1)); // The value that overflows would fail this check.
__ Sltiu(rd, rs1, imm + 1);
} else {
__ Sltu(rd, rs2, rs1);
// Calculate `rs1 A imm` as `!(rs1 B imm + 1)` and calculate
// `rs1 BE rs2` as `!(rs2 B rs1)` since there's only the SLTU but no SGEU.
reverse_condition = ((cond == kCondA) == use_imm);
if (to_all_bits) {
// Store the result to all bits; in other words, "true" is represented by -1.
if (reverse_condition) {
__ Addi(rd, rd, -1); // 0 -> -1, 1 -> 0
} else {
__ Neg(rd, rd); // 0 -> 0, 1 -> -1
} else {
if (reverse_condition) {
__ Xori(rd, rd, 1);
void InstructionCodeGeneratorRISCV64::GenerateIntLongCompareAndBranch(IfCondition cond,
LocationSummary* locations,
Riscv64Label* label) {
XRegister left = locations->InAt(0).AsRegister<XRegister>();
Location right_location = locations->InAt(1);
if (right_location.IsConstant()) {
DCHECK_EQ(CodeGenerator::GetInt64ValueOf(right_location.GetConstant()), 0);
switch (cond) {
case kCondEQ:
case kCondBE: // <= 0 if zero
__ Beqz(left, label);
case kCondNE:
case kCondA: // > 0 if non-zero
__ Bnez(left, label);
case kCondLT:
__ Bltz(left, label);
case kCondGE:
__ Bgez(left, label);
case kCondLE:
__ Blez(left, label);
case kCondGT:
__ Bgtz(left, label);
case kCondB: // always false
case kCondAE: // always true
__ J(label);
} else {
XRegister right_reg = right_location.AsRegister<XRegister>();
switch (cond) {
case kCondEQ:
__ Beq(left, right_reg, label);
case kCondNE:
__ Bne(left, right_reg, label);
case kCondLT:
__ Blt(left, right_reg, label);
case kCondGE:
__ Bge(left, right_reg, label);
case kCondLE:
__ Ble(left, right_reg, label);
case kCondGT:
__ Bgt(left, right_reg, label);
case kCondB:
__ Bltu(left, right_reg, label);
case kCondAE:
__ Bgeu(left, right_reg, label);
case kCondBE:
__ Bleu(left, right_reg, label);
case kCondA:
__ Bgtu(left, right_reg, label);
void InstructionCodeGeneratorRISCV64::GenerateFpCondition(IfCondition cond,
bool gt_bias,
DataType::Type type,
LocationSummary* locations,
Riscv64Label* label) {
DCHECK_EQ(label != nullptr, locations->Out().IsInvalid());
ScratchRegisterScope srs(GetAssembler());
XRegister rd =
(label != nullptr) ? srs.AllocateXRegister() : locations->Out().AsRegister<XRegister>();
GenerateFpCondition(cond, gt_bias, type, locations, label, rd, /*to_all_bits=*/ false);
void InstructionCodeGeneratorRISCV64::GenerateFpCondition(IfCondition cond,
bool gt_bias,
DataType::Type type,
LocationSummary* locations,
Riscv64Label* label,
XRegister rd,
bool to_all_bits) {
// RISCV-V FP compare instructions yield the following values:
// l<r l=r l>r Unordered
// FEQ l,r 0 1 0 0
// FLT l,r 1 0 0 0
// FLT r,l 0 0 1 0
// FLE l,r 1 1 0 0
// FLE r,l 0 1 1 0
// We can calculate the `Compare` results using the following formulas:
// l<r l=r l>r Unordered
// Compare/gt_bias -1 0 1 1 = ((FLE l,r) ^ 1) - (FLT l,r)
// Compare/lt_bias -1 0 1 -1 = ((FLE r,l) - 1) + (FLT r,l)
// These are emitted in `VisitCompare()`.
// This function emits a fused `Condition(Compare(., .), 0)`. If we compare the
// `Compare` results above with 0, we get the following values and formulas:
// l<r l=r l>r Unordered
// CondEQ/- 0 1 0 0 = (FEQ l, r)
// CondNE/- 1 0 1 1 = (FEQ l, r) ^ 1
// CondLT/gt_bias 1 0 0 0 = (FLT l,r)
// CondLT/lt_bias 1 0 0 1 = (FLE r,l) ^ 1
// CondLE/gt_bias 1 1 0 0 = (FLE l,r)
// CondLE/lt_bias 1 1 0 1 = (FLT r,l) ^ 1
// CondGT/gt_bias 0 0 1 1 = (FLE l,r) ^ 1
// CondGT/lt_bias 0 0 1 0 = (FLT r,l)
// CondGE/gt_bias 0 1 1 1 = (FLT l,r) ^ 1
// CondGE/lt_bias 0 1 1 0 = (FLE r,l)
// (CondEQ/CondNE comparison with zero yields the same result with gt_bias and lt_bias.)
// If the condition is not materialized, the `^ 1` is not emitted,
// instead the condition is reversed by emitting BEQZ instead of BNEZ.
FRegister rs1 = locations->InAt(0).AsFpuRegister<FRegister>();
FRegister rs2 = locations->InAt(1).AsFpuRegister<FRegister>();
bool reverse_condition = false;
switch (cond) {
case kCondEQ:
FEq(rd, rs1, rs2, type);
case kCondNE:
FEq(rd, rs1, rs2, type);
reverse_condition = true;
case kCondLT:
if (gt_bias) {
FLt(rd, rs1, rs2, type);
} else {
FLe(rd, rs2, rs1, type);
reverse_condition = true;
case kCondLE:
if (gt_bias) {
FLe(rd, rs1, rs2, type);
} else {
FLt(rd, rs2, rs1, type);
reverse_condition = true;
case kCondGT:
if (gt_bias) {
FLe(rd, rs1, rs2, type);
reverse_condition = true;
} else {
FLt(rd, rs2, rs1, type);
case kCondGE:
if (gt_bias) {
FLt(rd, rs1, rs2, type);
reverse_condition = true;
} else {
FLe(rd, rs2, rs1, type);
LOG(FATAL) << "Unexpected floating-point condition " << cond;
if (label != nullptr) {
if (reverse_condition) {
__ Beqz(rd, label);
} else {
__ Bnez(rd, label);
} else if (to_all_bits) {
// Store the result to all bits; in other words, "true" is represented by -1.
if (reverse_condition) {
__ Addi(rd, rd, -1); // 0 -> -1, 1 -> 0
} else {
__ Neg(rd, rd); // 0 -> 0, 1 -> -1
} else {
if (reverse_condition) {
__ Xori(rd, rd, 1);
void CodeGeneratorRISCV64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
XRegister obj,
uint32_t offset,
Location temp,
bool needs_null_check) {
instruction, ref, obj, offset, /*index=*/ Location::NoLocation(), temp, needs_null_check);
void CodeGeneratorRISCV64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
XRegister obj,
uint32_t data_offset,
Location index,
Location temp,
bool needs_null_check) {
instruction, ref, obj, data_offset, index, temp, needs_null_check);
void CodeGeneratorRISCV64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
XRegister obj,
uint32_t offset,
Location index,
Location temp,
bool needs_null_check) {
// For now, use the same approach as for GC roots plus unpoison the reference if needed.
// TODO(riscv64): Implement checking if the holder is black.
XRegister reg = ref.AsRegister<XRegister>();
if (index.IsValid()) {
DataType::Type type = DataType::Type::kReference;
DCHECK_EQ(type, instruction->GetType());
if (instruction->IsArrayGet()) {
// /* HeapReference<Object> */ ref = *(obj + index * element_size + offset)
instruction_visitor_.ShNAdd(reg, index.AsRegister<XRegister>(), obj, type);
} else {
// /* HeapReference<Object> */ ref = *(obj + index + offset)
__ Add(reg, index.AsRegister<XRegister>(), obj);
__ Loadwu(reg, reg, offset);
} else {
// /* HeapReference<Object> */ ref = *(obj + offset)
__ Loadwu(reg, obj, offset);
if (needs_null_check) {
// Slow path marking the reference.
XRegister tmp = RA; // Use RA as temp. It is clobbered in the slow path anyway.
SlowPathCodeRISCV64* slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathRISCV64(
instruction, ref, Location::RegisterLocation(tmp));
const int32_t entry_point_offset = ReadBarrierMarkEntrypointOffset(ref);
// Loading the entrypoint does not require a load acquire since it is only changed when
// threads are suspended or running a checkpoint.
__ Loadd(tmp, TR, entry_point_offset);
__ Bnez(tmp, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
SlowPathCodeRISCV64* CodeGeneratorRISCV64::AddReadBarrierSlowPath(HInstruction* instruction,
Location out,
Location ref,
Location obj,
uint32_t offset,
Location index) {
LOG(FATAL) << "Unimplemented";
void CodeGeneratorRISCV64::GenerateReadBarrierSlow(HInstruction* instruction,
Location out,
Location ref,
Location obj,
uint32_t offset,
Location index) {
LOG(FATAL) << "Unimplemented";
void CodeGeneratorRISCV64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
Location out,
Location ref,
Location obj,
uint32_t offset,
Location index) {
if (EmitReadBarrier()) {
// Baker's read barriers shall be handled by the fast path
// (CodeGeneratorRISCV64::GenerateReferenceLoadWithBakerReadBarrier).
// If heap poisoning is enabled, unpoisoning will be taken care of
// by the runtime within the slow path.
GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
} else if (kPoisonHeapReferences) {
void CodeGeneratorRISCV64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
Location out,
Location root) {
// Insert a slow path based read barrier *after* the GC root load.
// Note that GC roots are not affected by heap poisoning, so we do
// not need to do anything special for this here.
SlowPathCodeRISCV64* slow_path =
new (GetScopedAllocator()) ReadBarrierForRootSlowPathRISCV64(instruction, out, root);
__ J(slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
void InstructionCodeGeneratorRISCV64::HandleGoto(HInstruction* instruction,
HBasicBlock* successor) {
if (successor->IsExitBlock()) {
return; // no code needed
HBasicBlock* block = instruction->GetBlock();
HInstruction* previous = instruction->GetPrevious();
HLoopInformation* info = block->GetLoopInformation();
if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
codegen_->MaybeIncrementHotness(info->GetSuspendCheck(), /*is_frame_entry=*/ false);
GenerateSuspendCheck(info->GetSuspendCheck(), successor);
return; // `GenerateSuspendCheck()` emitted the jump.
if (block->IsEntryBlock() && previous != nullptr && previous->IsSuspendCheck()) {
GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
if (!codegen_->GoesToNextBlock(block, successor)) {
__ J(codegen_->GetLabelOf(successor));
void InstructionCodeGeneratorRISCV64::GenPackedSwitchWithCompares(XRegister adjusted,
XRegister temp,
uint32_t num_entries,
HBasicBlock* switch_block) {
// Note: The `adjusted` register holds `value - lower_bound`. If the `lower_bound` is 0,
// `adjusted` is the original `value` register and we must not clobber it. Otherwise,
// `adjusted` is the `temp`. The caller already emitted the `adjusted < num_entries` check.
// Create a set of compare/jumps.
ArrayRef<HBasicBlock* const> successors(switch_block->GetSuccessors());
uint32_t index = 0;
for (; num_entries - index >= 2u; index += 2u) {
// Jump to `successors[index]` if `value == lower_bound + index`.
// Note that `adjusted` holds `value - lower_bound - index`.
__ Beqz(adjusted, codegen_->GetLabelOf(successors[index]));
if (num_entries - index == 2u) {
break; // The last entry shall match, so the branch shall be unconditional.
// Jump to `successors[index + 1]` if `value == lower_bound + index + 1`.
// Modify `adjusted` to hold `value - lower_bound - index - 2` for this comparison.
__ Addi(temp, adjusted, -2);
adjusted = temp;
__ Bltz(adjusted, codegen_->GetLabelOf(successors[index + 1]));
// For the last entry, unconditionally jump to `successors[num_entries - 1]`.
__ J(codegen_->GetLabelOf(successors[num_entries - 1u]));
void InstructionCodeGeneratorRISCV64::GenTableBasedPackedSwitch(XRegister adjusted,
XRegister temp,
uint32_t num_entries,
HBasicBlock* switch_block) {
// Note: The `adjusted` register holds `value - lower_bound`. If the `lower_bound` is 0,
// `adjusted` is the original `value` register and we must not clobber it. Otherwise,
// `adjusted` is the `temp`. The caller already emitted the `adjusted < num_entries` check.
// Create a jump table.
ArenaVector<Riscv64Label*> labels(num_entries,
__ GetAllocator()->Adapter(kArenaAllocSwitchTable));
const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors();
for (uint32_t i = 0; i < num_entries; i++) {
labels[i] = codegen_->GetLabelOf(successors[i]);
JumpTable* table = __ CreateJumpTable(std::move(labels));
// Load the address of the jump table.
// Note: The `LoadLabelAddress()` emits AUIPC+ADD. It is possible to avoid the ADD and
// instead embed that offset in the LW below as well as all jump table entries but
// that would need some invasive changes in the jump table handling in the assembler.
ScratchRegisterScope srs(GetAssembler());
XRegister table_base = srs.AllocateXRegister();
__ LoadLabelAddress(table_base, table->GetLabel());
// Load the PC difference from the jump table.
// TODO(riscv64): Use SH2ADD from the Zba extension.
__ Slli(temp, adjusted, 2);
__ Add(temp, temp, table_base);
__ Lw(temp, temp, 0);
// Compute the absolute target address by adding the table start address
// (the table contains offsets to targets relative to its start).
__ Add(temp, temp, table_base);
// And jump.
__ Jr(temp);
int32_t InstructionCodeGeneratorRISCV64::VecAddress(LocationSummary* locations,
size_t size,
/*out*/ XRegister* adjusted_base) {
LOG(FATAL) << "Unimplemented";
void LocationsBuilderRISCV64::HandleBinaryOp(HBinaryOperation* instruction) {
DCHECK_EQ(instruction->InputCount(), 2u);
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
DataType::Type type = instruction->GetResultType();
switch (type) {
case DataType::Type::kInt32:
case DataType::Type::kInt64: {
locations->SetInAt(0, Location::RequiresRegister());
HInstruction* right = instruction->InputAt(1);
bool can_use_imm = false;
if (instruction->IsMin() || instruction->IsMax()) {
can_use_imm = IsZeroBitPattern(instruction);
} else if (right->IsConstant()) {
int64_t imm = CodeGenerator::GetInt64ValueOf(right->AsConstant());
can_use_imm = IsInt<12>(instruction->IsSub() ? -imm : imm);
if (can_use_imm) {
locations->SetInAt(1, Location::ConstantLocation(right));
} else {
locations->SetInAt(1, Location::RequiresRegister());
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
case DataType::Type::kFloat32:
case DataType::Type::kFloat64:
locations->SetInAt(0, Location::RequiresFpuRegister());
locations->SetInAt(1, Location::RequiresFpuRegister());
if (instruction->IsMin() || instruction->IsMax()) {
locations->SetOut(Location::RequiresFpuRegister(), Location::kOutputOverlap);
} else {
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
LOG(FATAL) << "Unexpected " << instruction->DebugName() << " type " << type;
void InstructionCodeGeneratorRISCV64::HandleBinaryOp(HBinaryOperation* instruction) {
DataType::Type type = instruction->GetType();
LocationSummary* locations = instruction->GetLocations();
switch (type) {
case DataType::Type::kInt32:
case DataType::Type::kInt64: {
XRegister rd = locations->Out().AsRegister<XRegister>();
XRegister rs1 = locations->InAt(0).AsRegister<XRegister>();
Location rs2_location = locations->InAt(1);
bool use_imm = rs2_location.IsConstant();
XRegister rs2 = use_imm ? kNoXRegister : rs2_location.AsRegister<XRegister>();
int64_t imm = use_imm ? CodeGenerator::GetInt64ValueOf(rs2_location.GetConstant()) : 0;
if (instruction->IsAnd()) {
if (use_imm) {
__ Andi(rd, rs1, imm);
} else {
__ And(rd, rs1, rs2);
} else if (instruction->IsOr()) {
if (use_imm) {
__ Ori(rd, rs1, imm);
} else {
__ Or(rd, rs1, rs2);
} else if (instruction->IsXor()) {
if (use_imm) {
__ Xori(rd, rs1, imm);
} else {
__ Xor(rd, rs1, rs2);
} else if (instruction->IsAdd() || instruction->IsSub()) {
if (type == DataType::Type::kInt32) {
if (use_imm) {
__ Addiw(rd, rs1, instruction->IsSub() ? -imm : imm);
} else if (instruction->IsAdd()) {
__ Addw(rd, rs1, rs2);
} else {
__ Subw(rd, rs1, rs2);
} else {
if (use_imm) {
__ Addi(rd, rs1, instruction->IsSub() ? -imm : imm);
} else if (instruction->IsAdd()) {
__ Add(rd, rs1, rs2);
} else {
__ Sub(rd, rs1, rs2);
} else if (instruction->IsMin()) {
DCHECK_IMPLIES(use_imm, imm == 0);
__ Min(rd, rs1, use_imm ? Zero : rs2);
} else {
DCHECK_IMPLIES(use_imm, imm == 0);
__ Max(rd, rs1, use_imm ? Zero : rs2);
case DataType::Type::kFloat32:
case DataType::Type::kFloat64: {
FRegister rd = locations->Out().AsFpuRegister<FRegister>();
FRegister rs1 = locations->InAt(0).AsFpuRegister<FRegister>();
FRegister rs2 = locations->InAt(1).AsFpuRegister<FRegister>();
if (instruction->IsAdd()) {
FAdd(rd, rs1, rs2, type);
} else if (instruction->IsSub()) {
FSub(rd, rs1, rs2, type);
} else {
DCHECK(instruction->IsMin() || instruction->IsMax());
// If one of the operands is NaN and the other is not, riscv64 instructions FMIN/FMAX
// return the other operand while we want to return the NaN operand.
DCHECK_NE(rd, rs1); // Requested `Location::kOutputOverlap`.
DCHECK_NE(rd, rs2); // Requested `Location::kOutputOverlap`.
ScratchRegisterScope srs(GetAssembler());
XRegister tmp = srs.AllocateXRegister();
XRegister tmp2 = srs.AllocateXRegister();
Riscv64Label done;
// Return `rs1` if it's NaN.
FClass(tmp, rs1, type);
__ Li(tmp2, kFClassNaNMinValue);
FMv(rd, rs1, type);
__ Bgeu(tmp, tmp2, &done);
// Return `rs2` if it's NaN.
FClass(tmp, rs2, type);
FMv(rd, rs2, type);
__ Bgeu(tmp, tmp2, &done);
// Calculate Min/Max for non-NaN arguments.
if (instruction->IsMin()) {
FMin(rd, rs1, rs2, type);
} else {
FMax(rd, rs1, rs2, type);
__ Bind(&done);
LOG(FATAL) << "Unexpected binary operation type " << type;
void LocationsBuilderRISCV64::HandleCondition(HCondition* instruction) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
switch (instruction->InputAt(0)->GetType()) {
case DataType::Type::kFloat32:
case DataType::Type::kFloat64:
locations->SetInAt(0, Location::RequiresFpuRegister());
locations->SetInAt(1, Location::RequiresFpuRegister());
default: {
locations->SetInAt(0, Location::RequiresRegister());
HInstruction* rhs = instruction->InputAt(1);
bool use_imm = false;
if (rhs->IsConstant()) {
int64_t imm = CodeGenerator::GetInt64ValueOf(rhs->AsConstant());
if (instruction->IsEmittedAtUseSite()) {
// For `HIf`, materialize all non-zero constants with an `HParallelMove`.
// Note: For certain constants and conditions, the code could be improved.
// For example, 2048 takes two instructions to materialize but the negative
// -2048 could be embedded in ADDI for EQ/NE comparison.
use_imm = (imm == 0);
} else {
// Constants that cannot be embedded in an instruction's 12-bit immediate shall be
// materialized with an `HParallelMove`. This simplifies the code and avoids cases
// with arithmetic overflow. Adjust the `imm` if needed for a particular instruction.
switch (instruction->GetCondition()) {
case kCondEQ:
case kCondNE:
imm = -imm; // ADDI with negative immediate (there is no SUBI).
case kCondLE:
case kCondGT:
case kCondBE:
case kCondA:
imm += 1; // SLTI/SLTIU with adjusted immediate (there is no SLEI/SLEIU).
use_imm = IsInt<12>(imm);
if (use_imm) {
locations->SetInAt(1, Location::ConstantLocation(rhs));
} else {
locations->SetInAt(1, Location::RequiresRegister());
if (!instruction->IsEmittedAtUseSite()) {
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
void InstructionCodeGeneratorRISCV64::HandleCondition(HCondition* instruction) {
if (instruction->IsEmittedAtUseSite()) {
DataType::Type type = instruction->InputAt(0)->GetType();
LocationSummary* locations = instruction->GetLocations();
switch (type) {
case DataType::Type::kFloat32:
case DataType::Type::kFloat64:
GenerateFpCondition(instruction->GetCondition(), instruction->IsGtBias(), type, locations);
// Integral types and reference equality.
GenerateIntLongCondition(instruction->GetCondition(), locations);
void LocationsBuilderRISCV64::HandleShift(HBinaryOperation* instruction) {
DCHECK(instruction->IsShl() ||
instruction->IsShr() ||
instruction->IsUShr() ||
instruction->IsRol() ||
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
DataType::Type type = instruction->GetResultType();
switch (type) {
case DataType::Type::kInt32:
case DataType::Type::kInt64: {
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
LOG(FATAL) << "Unexpected shift type " << type;
void InstructionCodeGeneratorRISCV64::HandleShift(HBinaryOperation* instruction) {
DCHECK(instruction->IsShl() ||
instruction->IsShr() ||
instruction->IsUShr() ||
instruction->IsRol() ||
LocationSummary* locations = instruction->GetLocations();
DataType::Type type = instruction->GetType();
switch (type) {
case DataType::Type::kInt32:
case DataType::Type::kInt64: {
XRegister rd = locations->Out().AsRegister<XRegister>();
XRegister rs1 = locations->InAt(0).AsRegister<XRegister>();
Location rs2_location = locations->InAt(1);
if (rs2_location.IsConstant()) {
int64_t imm = CodeGenerator::GetInt64ValueOf(rs2_location.GetConstant());
if (instruction->IsRol()) {
imm = -imm;
uint32_t shamt =
imm & (type == DataType::Type::kInt32 ? kMaxIntShiftDistance : kMaxLongShiftDistance);
if (shamt == 0) {
if (rd != rs1) {
__ Mv(rd, rs1);
} else if (type == DataType::Type::kInt32) {
if (instruction->IsShl()) {
__ Slliw(rd, rs1, shamt);
} else if (instruction->IsShr()) {
__ Sraiw(rd, rs1, shamt);
} else if (instruction->IsUShr()) {
__ Srliw(rd, rs1, shamt);
} else if (instruction->IsRol()) {
__ Roriw(rd, rs1, shamt);
} else {
__ Roriw(rd, rs1, shamt);
} else {
if (instruction->IsShl()) {
__ Slli(rd, rs1, shamt);
} else if (instruction->IsShr()) {
__ Srai(rd, rs1, shamt);
} else if (instruction->IsUShr()) {
__ Srli(rd, rs1, shamt);
} else if (instruction->IsRol()) {
__ Rori(rd, rs1, shamt);
} else {
__ Rori(rd, rs1, shamt);
} else {
XRegister rs2 = rs2_location.AsRegister<XRegister>();
if (type == DataType::Type::kInt32) {
if (instruction->IsShl()) {
__ Sllw(rd, rs1, rs2);
} else if (instruction->IsShr()) {
__ Sraw(rd, rs1, rs2);
} else if (instruction->IsUShr()) {
__ Srlw(rd, rs1, rs2);
} else if (instruction->IsRol()) {
__ Rolw(rd, rs1, rs2);
} else {
__ Rorw(rd, rs1, rs2);
} else {
if (instruction->IsShl()) {
__ Sll(rd, rs1, rs2);
} else if (instruction->IsShr()) {
__ Sra(rd, rs1, rs2);
} else if (instruction->IsUShr()) {
__ Srl(rd, rs1, rs2);
} else if (instruction->IsRol()) {
__ Rol(rd, rs1, rs2);
} else {
__ Ror(rd, rs1, rs2);
LOG(FATAL) << "Unexpected shift operation type " << type;
void CodeGeneratorRISCV64::MaybeMarkGCCard(XRegister object,
XRegister value,
bool value_can_be_null) {
Riscv64Label done;
if (value_can_be_null) {
__ Beqz(value, &done);
__ Bind(&done);
void CodeGeneratorRISCV64::MarkGCCard(XRegister object) {
ScratchRegisterScope srs(GetAssembler());
XRegister card = srs.AllocateXRegister();
XRegister temp = srs.AllocateXRegister();
// Load the address of the card table into `card`.
__ Loadd(card, TR, Thread::CardTableOffset<kRiscv64PointerSize>().Int32Value());
// Calculate the address of the card corresponding to `object`.
__ Srli(temp, object, gc::accounting::CardTable::kCardShift);
__ Add(temp, card, temp);
// Write the `art::gc::accounting::CardTable::kCardDirty` value into the
// `object`'s card.
// Register `card` contains the address of the card table. Note that the card
// table's base is biased during its creation so that it always starts at an
// address whose least-significant byte is equal to `kCardDirty` (see
// art::gc::accounting::CardTable::Create). Therefore the SB instruction
// below writes the `kCardDirty` (byte) value into the `object`'s card
// (located at `card + object >> kCardShift`).
// This dual use of the value in register `card` (1. to calculate the location
// of the card to mark; and 2. to load the `kCardDirty` value) saves a load
// (no need to explicitly load `kCardDirty` as an immediate value).
__ Sb(card, temp, 0); // No scratch register left for `Storeb()`.
void CodeGeneratorRISCV64::CheckGCCardIsValid(XRegister object) {
Riscv64Label done;
ScratchRegisterScope srs(GetAssembler());
XRegister card = srs.AllocateXRegister();
XRegister temp = srs.AllocateXRegister();
// Load the address of the card table into `card`.
__ Loadd(card, TR, Thread::CardTableOffset<kRiscv64PointerSize>().Int32Value());
// Calculate the address of the card corresponding to `object`.
__ Srli(temp, object, gc::accounting::CardTable::kCardShift);
__ Add(temp, card, temp);
// assert (!clean || !self->is_gc_marking)
__ Lb(temp, temp, 0);
static_assert(gc::accounting::CardTable::kCardClean == 0);
__ Bnez(temp, &done);
__ Loadw(temp, TR, Thread::IsGcMarkingOffset<kRiscv64PointerSize>().Int32Value());
__ Beqz(temp, &done);
__ Unimp();
__ Bind(&done);
void LocationsBuilderRISCV64::HandleFieldSet(HInstruction* instruction) {
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, ValueLocationForStore(instruction->InputAt(1)));
void InstructionCodeGeneratorRISCV64::HandleFieldSet(HInstruction* instruction,
const FieldInfo& field_info,
bool value_can_be_null,
WriteBarrierKind write_barrier_kind) {
DataType::Type type = field_info.GetFieldType();
LocationSummary* locations = instruction->GetLocations();
XRegister obj = locations->InAt(0).AsRegister<XRegister>();
Location value = locations->InAt(1);
DCHECK_IMPLIES(value.IsConstant(), IsZeroBitPattern(value.GetConstant()));
bool is_volatile = field_info.IsVolatile();
uint32_t offset = field_info.GetFieldOffset().Uint32Value();
if (is_volatile) {
StoreSeqCst(value, obj, offset, type, instruction);
} else {
Store(value, obj, offset, type);
bool needs_write_barrier =
codegen_->StoreNeedsWriteBarrier(type, instruction->InputAt(1), write_barrier_kind);
if (needs_write_barrier) {
if (value.IsConstant()) {
DCHECK_EQ(write_barrier_kind, WriteBarrierKind::kEmitBeingReliedOn);
} else {
value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitNotBeingReliedOn);
} else if (codegen_->ShouldCheckGCCard(type, instruction->InputAt(1), write_barrier_kind)) {
void LocationsBuilderRISCV64::HandleFieldGet(HInstruction* instruction) {
DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
bool object_field_get_with_read_barrier =
(instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall);
// Input for object receiver.
locations->SetInAt(0, Location::RequiresRegister());
if (DataType::IsFloatingPointType(instruction->GetType())) {
} else {
// The output overlaps for an object field get when read barriers
// are enabled: we do not want the load to overwrite the object's
// location, as we need it to emit the read barrier.
object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
// We need a temporary register for the read barrier marking slow
// path in CodeGeneratorRISCV64::GenerateFieldLoadWithBakerReadBarrier.
void InstructionCodeGeneratorRISCV64::HandleFieldGet(HInstruction* instruction,
const FieldInfo& field_info) {
DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
DataType::Type type = instruction->GetType();
LocationSummary* locations = instruction->GetLocations();
Location obj_loc = locations->InAt(0);
XRegister obj = obj_loc.AsRegister<XRegister>();
Location dst_loc = locations->Out();
bool is_volatile = field_info.IsVolatile();
uint32_t offset = field_info.GetFieldOffset().Uint32Value();
if (is_volatile) {
if (type == DataType::Type::kReference && codegen_->EmitBakerReadBarrier()) {
// /* HeapReference<Object> */ dst = *(obj + offset)
Location temp_loc = locations->GetTemp(0);
// Note that a potential implicit null check is handled in this
// CodeGeneratorRISCV64::GenerateFieldLoadWithBakerReadBarrier call.
/* needs_null_check= */ true);
} else {
Load(dst_loc, obj, offset, type);
if (is_volatile) {
if (type == DataType::Type::kReference && !codegen_->EmitBakerReadBarrier()) {
// If read barriers are enabled, emit read barriers other than
// Baker's using a slow path (and also unpoison the loaded
// reference, if heap poisoning is enabled).
codegen_->MaybeGenerateReadBarrierSlow(instruction, dst_loc, dst_loc, obj_loc, offset);
void InstructionCodeGeneratorRISCV64::GenerateMethodEntryExitHook(HInstruction* instruction) {
SlowPathCodeRISCV64* slow_path =
new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathRISCV64(instruction);
ScratchRegisterScope temps(GetAssembler());
XRegister tmp = temps.AllocateXRegister();
if (instruction->IsMethodExitHook()) {
// Check if we are required to check if the caller needs a deoptimization. Strictly speaking it
// would be sufficient to check if CheckCallerForDeopt bit is set. Though it is faster to check
// if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is
// disabled in debuggable runtime. The other bit is used when this method itself requires a
// deoptimization due to redefinition. So it is safe to just check for non-zero value here.
__ Loadwu(tmp, SP, codegen_->GetStackOffsetOfShouldDeoptimizeFlag());
__ Bnez(tmp, slow_path->GetEntryLabel());
uint64_t hook_offset = instruction->IsMethodExitHook() ?
instrumentation::Instrumentation::HaveMethodExitListenersOffset().SizeValue() :
auto [base_hook_address, hook_imm12] = SplitJitAddress(
reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation()) + hook_offset);
__ LoadConst64(tmp, base_hook_address);
__ Lbu(tmp, tmp, hook_imm12);
// Check if there are any method entry / exit listeners. If no, continue.
__ Beqz(tmp, slow_path->GetExitLabel());
// Check if there are any slow (jvmti / trace with thread cpu time) method entry / exit listeners.
// If yes, just take the slow path.
static_assert(instrumentation::Instrumentation::kFastTraceListeners == 1u);
__ Addi(tmp, tmp, -1);
__ Bnez(tmp, slow_path->GetEntryLabel());
// Allocate second core scratch register. We can no longer use `Stored()`
// and similar macro instructions because there is no core scratch register left.
XRegister tmp2 = temps.AllocateXRegister();
// Check if there is place in the buffer to store a new entry, if no, take the slow path.
int32_t trace_buffer_curr_entry_offset =
__ Loadd(tmp, TR, trace_buffer_curr_entry_offset);
__ Loadd(tmp2, TR, Thread::TraceBufferPtrOffset<kRiscv64PointerSize>().SizeValue());
__ Addi(tmp, tmp, -dchecked_integral_cast<int32_t>(kNumEntriesForWallClock * sizeof(void*)));
__ Blt(tmp, tmp2, slow_path->GetEntryLabel());
// Update the index in the `Thread`.
__ Sd(tmp, TR, trace_buffer_curr_entry_offset);
// Record method pointer and trace action.
__ Ld(tmp2, SP, 0);
// Use last two bits to encode trace method action. For MethodEntry it is 0
// so no need to set the bits since they are 0 already.
DCHECK_GE(ArtMethod::Alignment(kRuntimePointerSize), static_cast<size_t>(4));
static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodEnter) == 0);