blob: 8204f1eecbbed5982a022cfb1aa36390f02410e7 [file] [log] [blame]
/*
* Copyright (C) 2016 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "code_generator_arm_vixl.h"
#include "arch/arm/asm_support_arm.h"
#include "arch/arm/instruction_set_features_arm.h"
#include "art_method.h"
#include "base/bit_utils.h"
#include "base/bit_utils_iterator.h"
#include "class_table.h"
#include "code_generator_utils.h"
#include "common_arm.h"
#include "compiled_method.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "gc/accounting/card_table.h"
#include "gc/space/image_space.h"
#include "heap_poisoning.h"
#include "intrinsics.h"
#include "intrinsics_arm_vixl.h"
#include "linker/linker_patch.h"
#include "mirror/array-inl.h"
#include "mirror/class-inl.h"
#include "thread.h"
#include "utils/arm/assembler_arm_vixl.h"
#include "utils/arm/managed_register_arm.h"
#include "utils/assembler.h"
#include "utils/stack_checks.h"
namespace art {
namespace arm {
namespace vixl32 = vixl::aarch32;
using namespace vixl32; // NOLINT(build/namespaces)
using helpers::DRegisterFrom;
using helpers::DWARFReg;
using helpers::HighRegisterFrom;
using helpers::InputDRegisterAt;
using helpers::InputOperandAt;
using helpers::InputRegister;
using helpers::InputRegisterAt;
using helpers::InputSRegisterAt;
using helpers::InputVRegister;
using helpers::InputVRegisterAt;
using helpers::Int32ConstantFrom;
using helpers::Int64ConstantFrom;
using helpers::LocationFrom;
using helpers::LowRegisterFrom;
using helpers::LowSRegisterFrom;
using helpers::OperandFrom;
using helpers::OutputRegister;
using helpers::OutputSRegister;
using helpers::OutputVRegister;
using helpers::RegisterFrom;
using helpers::SRegisterFrom;
using helpers::Uint64ConstantFrom;
using vixl::ExactAssemblyScope;
using vixl::CodeBufferCheckScope;
using RegisterList = vixl32::RegisterList;
static bool ExpectedPairLayout(Location location) {
// We expected this for both core and fpu register pairs.
return ((location.low() & 1) == 0) && (location.low() + 1 == location.high());
}
// Use a local definition to prevent copying mistakes.
static constexpr size_t kArmWordSize = static_cast<size_t>(kArmPointerSize);
static constexpr size_t kArmBitsPerWord = kArmWordSize * kBitsPerByte;
static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
// Reference load (except object array loads) is using LDR Rt, [Rn, #offset] which can handle
// offset < 4KiB. For offsets >= 4KiB, the load shall be emitted as two or more instructions.
// For the Baker read barrier implementation using link-time generated thunks we need to split
// the offset explicitly.
constexpr uint32_t kReferenceLoadMinFarOffset = 4 * KB;
// Using a base helps identify when we hit Marking Register check breakpoints.
constexpr int kMarkingRegisterCheckBreakCodeBaseCode = 0x10;
#ifdef __
#error "ARM Codegen VIXL macro-assembler macro already defined."
#endif
// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
#define __ down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler()-> // NOLINT
#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, x).Int32Value()
// Marker that code is yet to be, and must, be implemented.
#define TODO_VIXL32(level) LOG(level) << __PRETTY_FUNCTION__ << " unimplemented "
static inline bool CanEmitNarrowLdr(vixl32::Register rt, vixl32::Register rn, uint32_t offset) {
return rt.IsLow() && rn.IsLow() && offset < 32u;
}
class EmitAdrCode {
public:
EmitAdrCode(ArmVIXLMacroAssembler* assembler, vixl32::Register rd, vixl32::Label* label)
: assembler_(assembler), rd_(rd), label_(label) {
DCHECK(!assembler->AllowMacroInstructions()); // In ExactAssemblyScope.
adr_location_ = assembler->GetCursorOffset();
assembler->adr(EncodingSize(Wide), rd, label);
}
~EmitAdrCode() {
DCHECK(label_->IsBound());
// The ADR emitted by the assembler does not set the Thumb mode bit we need.
// TODO: Maybe extend VIXL to allow ADR for return address?
uint8_t* raw_adr = assembler_->GetBuffer()->GetOffsetAddress<uint8_t*>(adr_location_);
// Expecting ADR encoding T3 with `(offset & 1) == 0`.
DCHECK_EQ(raw_adr[1] & 0xfbu, 0xf2u); // Check bits 24-31, except 26.
DCHECK_EQ(raw_adr[0] & 0xffu, 0x0fu); // Check bits 16-23.
DCHECK_EQ(raw_adr[3] & 0x8fu, rd_.GetCode()); // Check bits 8-11 and 15.
DCHECK_EQ(raw_adr[2] & 0x01u, 0x00u); // Check bit 0, i.e. the `offset & 1`.
// Add the Thumb mode bit.
raw_adr[2] |= 0x01u;
}
private:
ArmVIXLMacroAssembler* const assembler_;
vixl32::Register rd_;
vixl32::Label* const label_;
int32_t adr_location_;
};
static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
InvokeRuntimeCallingConventionARMVIXL calling_convention;
RegisterSet caller_saves = RegisterSet::Empty();
caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0)));
// TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
// that the the kPrimNot result register is the same as the first argument register.
return caller_saves;
}
// SaveLiveRegisters and RestoreLiveRegisters from SlowPathCodeARM operate on sets of S registers,
// for each live D registers they treat two corresponding S registers as live ones.
//
// Two following functions (SaveContiguousSRegisterList, RestoreContiguousSRegisterList) build
// from a list of contiguous S registers a list of contiguous D registers (processing first/last
// S registers corner cases) and save/restore this new list treating them as D registers.
// - decreasing code size
// - avoiding hazards on Cortex-A57, when a pair of S registers for an actual live D register is
// restored and then used in regular non SlowPath code as D register.
//
// For the following example (v means the S register is live):
// D names: | D0 | D1 | D2 | D4 | ...
// S names: | S0 | S1 | S2 | S3 | S4 | S5 | S6 | S7 | ...
// Live? | | v | v | v | v | v | v | | ...
//
// S1 and S6 will be saved/restored independently; D registers list (D1, D2) will be processed
// as D registers.
//
// TODO(VIXL): All this code should be unnecessary once the VIXL AArch32 backend provides helpers
// for lists of floating-point registers.
static size_t SaveContiguousSRegisterList(size_t first,
size_t last,
CodeGenerator* codegen,
size_t stack_offset) {
static_assert(kSRegSizeInBytes == kArmWordSize, "Broken assumption on reg/word sizes.");
static_assert(kDRegSizeInBytes == 2 * kArmWordSize, "Broken assumption on reg/word sizes.");
DCHECK_LE(first, last);
if ((first == last) && (first == 0)) {
__ Vstr(vixl32::SRegister(first), MemOperand(sp, stack_offset));
return stack_offset + kSRegSizeInBytes;
}
if (first % 2 == 1) {
__ Vstr(vixl32::SRegister(first++), MemOperand(sp, stack_offset));
stack_offset += kSRegSizeInBytes;
}
bool save_last = false;
if (last % 2 == 0) {
save_last = true;
--last;
}
if (first < last) {
vixl32::DRegister d_reg = vixl32::DRegister(first / 2);
DCHECK_EQ((last - first + 1) % 2, 0u);
size_t number_of_d_regs = (last - first + 1) / 2;
if (number_of_d_regs == 1) {
__ Vstr(d_reg, MemOperand(sp, stack_offset));
} else if (number_of_d_regs > 1) {
UseScratchRegisterScope temps(down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler());
vixl32::Register base = sp;
if (stack_offset != 0) {
base = temps.Acquire();
__ Add(base, sp, Operand::From(stack_offset));
}
__ Vstm(F64, base, NO_WRITE_BACK, DRegisterList(d_reg, number_of_d_regs));
}
stack_offset += number_of_d_regs * kDRegSizeInBytes;
}
if (save_last) {
__ Vstr(vixl32::SRegister(last + 1), MemOperand(sp, stack_offset));
stack_offset += kSRegSizeInBytes;
}
return stack_offset;
}
static size_t RestoreContiguousSRegisterList(size_t first,
size_t last,
CodeGenerator* codegen,
size_t stack_offset) {
static_assert(kSRegSizeInBytes == kArmWordSize, "Broken assumption on reg/word sizes.");
static_assert(kDRegSizeInBytes == 2 * kArmWordSize, "Broken assumption on reg/word sizes.");
DCHECK_LE(first, last);
if ((first == last) && (first == 0)) {
__ Vldr(vixl32::SRegister(first), MemOperand(sp, stack_offset));
return stack_offset + kSRegSizeInBytes;
}
if (first % 2 == 1) {
__ Vldr(vixl32::SRegister(first++), MemOperand(sp, stack_offset));
stack_offset += kSRegSizeInBytes;
}
bool restore_last = false;
if (last % 2 == 0) {
restore_last = true;
--last;
}
if (first < last) {
vixl32::DRegister d_reg = vixl32::DRegister(first / 2);
DCHECK_EQ((last - first + 1) % 2, 0u);
size_t number_of_d_regs = (last - first + 1) / 2;
if (number_of_d_regs == 1) {
__ Vldr(d_reg, MemOperand(sp, stack_offset));
} else if (number_of_d_regs > 1) {
UseScratchRegisterScope temps(down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler());
vixl32::Register base = sp;
if (stack_offset != 0) {
base = temps.Acquire();
__ Add(base, sp, Operand::From(stack_offset));
}
__ Vldm(F64, base, NO_WRITE_BACK, DRegisterList(d_reg, number_of_d_regs));
}
stack_offset += number_of_d_regs * kDRegSizeInBytes;
}
if (restore_last) {
__ Vldr(vixl32::SRegister(last + 1), MemOperand(sp, stack_offset));
stack_offset += kSRegSizeInBytes;
}
return stack_offset;
}
static LoadOperandType GetLoadOperandType(DataType::Type type) {
switch (type) {
case DataType::Type::kReference:
return kLoadWord;
case DataType::Type::kBool:
case DataType::Type::kUint8:
return kLoadUnsignedByte;
case DataType::Type::kInt8:
return kLoadSignedByte;
case DataType::Type::kUint16:
return kLoadUnsignedHalfword;
case DataType::Type::kInt16:
return kLoadSignedHalfword;
case DataType::Type::kInt32:
return kLoadWord;
case DataType::Type::kInt64:
return kLoadWordPair;
case DataType::Type::kFloat32:
return kLoadSWord;
case DataType::Type::kFloat64:
return kLoadDWord;
default:
LOG(FATAL) << "Unreachable type " << type;
UNREACHABLE();
}
}
static StoreOperandType GetStoreOperandType(DataType::Type type) {
switch (type) {
case DataType::Type::kReference:
return kStoreWord;
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
return kStoreByte;
case DataType::Type::kUint16:
case DataType::Type::kInt16:
return kStoreHalfword;
case DataType::Type::kInt32:
return kStoreWord;
case DataType::Type::kInt64:
return kStoreWordPair;
case DataType::Type::kFloat32:
return kStoreSWord;
case DataType::Type::kFloat64:
return kStoreDWord;
default:
LOG(FATAL) << "Unreachable type " << type;
UNREACHABLE();
}
}
void SlowPathCodeARMVIXL::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
size_t orig_offset = stack_offset;
const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
for (uint32_t i : LowToHighBits(core_spills)) {
// If the register holds an object, update the stack mask.
if (locations->RegisterContainsObject(i)) {
locations->SetStackBit(stack_offset / kVRegSize);
}
DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
saved_core_stack_offsets_[i] = stack_offset;
stack_offset += kArmWordSize;
}
CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
arm_codegen->GetAssembler()->StoreRegisterList(core_spills, orig_offset);
uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
orig_offset = stack_offset;
for (uint32_t i : LowToHighBits(fp_spills)) {
DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
saved_fpu_stack_offsets_[i] = stack_offset;
stack_offset += kArmWordSize;
}
stack_offset = orig_offset;
while (fp_spills != 0u) {
uint32_t begin = CTZ(fp_spills);
uint32_t tmp = fp_spills + (1u << begin);
fp_spills &= tmp; // Clear the contiguous range of 1s.
uint32_t end = (tmp == 0u) ? 32u : CTZ(tmp); // CTZ(0) is undefined.
stack_offset = SaveContiguousSRegisterList(begin, end - 1, codegen, stack_offset);
}
DCHECK_LE(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
}
void SlowPathCodeARMVIXL::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
size_t orig_offset = stack_offset;
const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
for (uint32_t i : LowToHighBits(core_spills)) {
DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
stack_offset += kArmWordSize;
}
// TODO(VIXL): Check the coherency of stack_offset after this with a test.
CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
arm_codegen->GetAssembler()->LoadRegisterList(core_spills, orig_offset);
uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
while (fp_spills != 0u) {
uint32_t begin = CTZ(fp_spills);
uint32_t tmp = fp_spills + (1u << begin);
fp_spills &= tmp; // Clear the contiguous range of 1s.
uint32_t end = (tmp == 0u) ? 32u : CTZ(tmp); // CTZ(0) is undefined.
stack_offset = RestoreContiguousSRegisterList(begin, end - 1, codegen, stack_offset);
}
DCHECK_LE(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
}
class NullCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
public:
explicit NullCheckSlowPathARMVIXL(HNullCheck* instruction) : SlowPathCodeARMVIXL(instruction) {}
void EmitNativeCode(CodeGenerator* codegen) override {
CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
__ Bind(GetEntryLabel());
if (instruction_->CanThrowIntoCatchBlock()) {
// Live registers will be restored in the catch block if caught.
SaveLiveRegisters(codegen, instruction_->GetLocations());
}
arm_codegen->InvokeRuntime(kQuickThrowNullPointer,
instruction_,
instruction_->GetDexPc(),
this);
CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
}
bool IsFatal() const override { return true; }
const char* GetDescription() const override { return "NullCheckSlowPathARMVIXL"; }
private:
DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARMVIXL);
};
class DivZeroCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
public:
explicit DivZeroCheckSlowPathARMVIXL(HDivZeroCheck* instruction)
: SlowPathCodeARMVIXL(instruction) {}
void EmitNativeCode(CodeGenerator* codegen) override {
CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
__ Bind(GetEntryLabel());
arm_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
}
bool IsFatal() const override { return true; }
const char* GetDescription() const override { return "DivZeroCheckSlowPathARMVIXL"; }
private:
DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARMVIXL);
};
class SuspendCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
public:
SuspendCheckSlowPathARMVIXL(HSuspendCheck* instruction, HBasicBlock* successor)
: SlowPathCodeARMVIXL(instruction), successor_(successor) {}
void EmitNativeCode(CodeGenerator* codegen) override {
CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
__ Bind(GetEntryLabel());
arm_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickTestSuspend, void, void>();
if (successor_ == nullptr) {
__ B(GetReturnLabel());
} else {
__ B(arm_codegen->GetLabelOf(successor_));
}
}
vixl32::Label* GetReturnLabel() {
DCHECK(successor_ == nullptr);
return &return_label_;
}
HBasicBlock* GetSuccessor() const {
return successor_;
}
const char* GetDescription() const override { return "SuspendCheckSlowPathARMVIXL"; }
private:
// If not null, the block to branch to after the suspend check.
HBasicBlock* const successor_;
// If `successor_` is null, the label to branch to after the suspend check.
vixl32::Label return_label_;
DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARMVIXL);
};
class BoundsCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
public:
explicit BoundsCheckSlowPathARMVIXL(HBoundsCheck* instruction)
: SlowPathCodeARMVIXL(instruction) {}
void EmitNativeCode(CodeGenerator* codegen) override {
CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
LocationSummary* locations = instruction_->GetLocations();
__ Bind(GetEntryLabel());
if (instruction_->CanThrowIntoCatchBlock()) {
// Live registers will be restored in the catch block if caught.
SaveLiveRegisters(codegen, instruction_->GetLocations());
}
// We're moving two locations to locations that could overlap, so we need a parallel
// move resolver.
InvokeRuntimeCallingConventionARMVIXL calling_convention;
codegen->EmitParallelMoves(
locations->InAt(0),
LocationFrom(calling_convention.GetRegisterAt(0)),
DataType::Type::kInt32,
locations->InAt(1),
LocationFrom(calling_convention.GetRegisterAt(1)),
DataType::Type::kInt32);
QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
? kQuickThrowStringBounds
: kQuickThrowArrayBounds;
arm_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
}
bool IsFatal() const override { return true; }
const char* GetDescription() const override { return "BoundsCheckSlowPathARMVIXL"; }
private:
DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARMVIXL);
};
class LoadClassSlowPathARMVIXL : public SlowPathCodeARMVIXL {
public:
LoadClassSlowPathARMVIXL(HLoadClass* cls, HInstruction* at)
: SlowPathCodeARMVIXL(at), cls_(cls) {
DCHECK(at->IsLoadClass() || at->IsClinitCheck());
DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
}
void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
Location out = locations->Out();
const uint32_t dex_pc = instruction_->GetDexPc();
bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
InvokeRuntimeCallingConventionARMVIXL calling_convention;
if (must_resolve_type) {
DCHECK(IsSameDexFile(cls_->GetDexFile(), arm_codegen->GetGraph()->GetDexFile()));
dex::TypeIndex type_index = cls_->GetTypeIndex();
__ Mov(calling_convention.GetRegisterAt(0), type_index.index_);
arm_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
// If we also must_do_clinit, the resolved type is now in the correct register.
} else {
DCHECK(must_do_clinit);
Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
arm_codegen->Move32(LocationFrom(calling_convention.GetRegisterAt(0)), source);
}
if (must_do_clinit) {
arm_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
}
// Move the class to the desired location.
if (out.IsValid()) {
DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
arm_codegen->Move32(locations->Out(), LocationFrom(r0));
}
RestoreLiveRegisters(codegen, locations);
__ B(GetExitLabel());
}
const char* GetDescription() const override { return "LoadClassSlowPathARMVIXL"; }
private:
// The class this slow path will load.
HLoadClass* const cls_;
DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARMVIXL);
};
class LoadStringSlowPathARMVIXL : public SlowPathCodeARMVIXL {
public:
explicit LoadStringSlowPathARMVIXL(HLoadString* instruction)
: SlowPathCodeARMVIXL(instruction) {}
void EmitNativeCode(CodeGenerator* codegen) override {
DCHECK(instruction_->IsLoadString());
DCHECK_EQ(instruction_->AsLoadString()->GetLoadKind(), HLoadString::LoadKind::kBssEntry);
LocationSummary* locations = instruction_->GetLocations();
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
InvokeRuntimeCallingConventionARMVIXL calling_convention;
__ Mov(calling_convention.GetRegisterAt(0), string_index.index_);
arm_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
arm_codegen->Move32(locations->Out(), LocationFrom(r0));
RestoreLiveRegisters(codegen, locations);
__ B(GetExitLabel());
}
const char* GetDescription() const override { return "LoadStringSlowPathARMVIXL"; }
private:
DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARMVIXL);
};
class TypeCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
public:
TypeCheckSlowPathARMVIXL(HInstruction* instruction, bool is_fatal)
: SlowPathCodeARMVIXL(instruction), is_fatal_(is_fatal) {}
void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
DCHECK(instruction_->IsCheckCast()
|| !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
__ Bind(GetEntryLabel());
if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
SaveLiveRegisters(codegen, locations);
}
// We're moving two locations to locations that could overlap, so we need a parallel
// move resolver.
InvokeRuntimeCallingConventionARMVIXL calling_convention;
codegen->EmitParallelMoves(locations->InAt(0),
LocationFrom(calling_convention.GetRegisterAt(0)),
DataType::Type::kReference,
locations->InAt(1),
LocationFrom(calling_convention.GetRegisterAt(1)),
DataType::Type::kReference);
if (instruction_->IsInstanceOf()) {
arm_codegen->InvokeRuntime(kQuickInstanceofNonTrivial,
instruction_,
instruction_->GetDexPc(),
this);
CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
arm_codegen->Move32(locations->Out(), LocationFrom(r0));
} else {
DCHECK(instruction_->IsCheckCast());
arm_codegen->InvokeRuntime(kQuickCheckInstanceOf,
instruction_,
instruction_->GetDexPc(),
this);
CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
}
if (!is_fatal_) {
RestoreLiveRegisters(codegen, locations);
__ B(GetExitLabel());
}
}
const char* GetDescription() const override { return "TypeCheckSlowPathARMVIXL"; }
bool IsFatal() const override { return is_fatal_; }
private:
const bool is_fatal_;
DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARMVIXL);
};
class DeoptimizationSlowPathARMVIXL : public SlowPathCodeARMVIXL {
public:
explicit DeoptimizationSlowPathARMVIXL(HDeoptimize* instruction)
: SlowPathCodeARMVIXL(instruction) {}
void EmitNativeCode(CodeGenerator* codegen) override {
CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
__ Bind(GetEntryLabel());
LocationSummary* locations = instruction_->GetLocations();
SaveLiveRegisters(codegen, locations);
InvokeRuntimeCallingConventionARMVIXL calling_convention;
__ Mov(calling_convention.GetRegisterAt(0),
static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
arm_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
}
const char* GetDescription() const override { return "DeoptimizationSlowPathARMVIXL"; }
private:
DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARMVIXL);
};
class ArraySetSlowPathARMVIXL : public SlowPathCodeARMVIXL {
public:
explicit ArraySetSlowPathARMVIXL(HInstruction* instruction) : SlowPathCodeARMVIXL(instruction) {}
void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
InvokeRuntimeCallingConventionARMVIXL calling_convention;
HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
parallel_move.AddMove(
locations->InAt(0),
LocationFrom(calling_convention.GetRegisterAt(0)),
DataType::Type::kReference,
nullptr);
parallel_move.AddMove(
locations->InAt(1),
LocationFrom(calling_convention.GetRegisterAt(1)),
DataType::Type::kInt32,
nullptr);
parallel_move.AddMove(
locations->InAt(2),
LocationFrom(calling_convention.GetRegisterAt(2)),
DataType::Type::kReference,
nullptr);
codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
arm_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
RestoreLiveRegisters(codegen, locations);
__ B(GetExitLabel());
}
const char* GetDescription() const override { return "ArraySetSlowPathARMVIXL"; }
private:
DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARMVIXL);
};
// Slow path generating a read barrier for a heap reference.
class ReadBarrierForHeapReferenceSlowPathARMVIXL : public SlowPathCodeARMVIXL {
public:
ReadBarrierForHeapReferenceSlowPathARMVIXL(HInstruction* instruction,
Location out,
Location ref,
Location obj,
uint32_t offset,
Location index)
: SlowPathCodeARMVIXL(instruction),
out_(out),
ref_(ref),
obj_(obj),
offset_(offset),
index_(index) {
DCHECK(kEmitCompilerReadBarrier);
// If `obj` is equal to `out` or `ref`, it means the initial object
// has been overwritten by (or after) the heap object reference load
// to be instrumented, e.g.:
//
// __ LoadFromOffset(kLoadWord, out, out, offset);
// codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
//
// In that case, we have lost the information about the original
// object, and the emitted read barrier cannot work properly.
DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
}
void EmitNativeCode(CodeGenerator* codegen) override {
CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
LocationSummary* locations = instruction_->GetLocations();
vixl32::Register reg_out = RegisterFrom(out_);
DCHECK(locations->CanCall());
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.GetCode()));
DCHECK(instruction_->IsInstanceFieldGet() ||
instruction_->IsStaticFieldGet() ||
instruction_->IsArrayGet() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
(instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier for heap reference slow path: "
<< instruction_->DebugName();
// The read barrier instrumentation of object ArrayGet
// instructions does not support the HIntermediateAddress
// instruction.
DCHECK(!(instruction_->IsArrayGet() &&
instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
// We may have to change the index's value, but as `index_` is a
// constant member (like other "inputs" of this slow path),
// introduce a copy of it, `index`.
Location index = index_;
if (index_.IsValid()) {
// Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
if (instruction_->IsArrayGet()) {
// Compute the actual memory offset and store it in `index`.
vixl32::Register index_reg = RegisterFrom(index_);
DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg.GetCode()));
if (codegen->IsCoreCalleeSaveRegister(index_reg.GetCode())) {
// We are about to change the value of `index_reg` (see the
// calls to art::arm::ArmVIXLMacroAssembler::Lsl and
// art::arm::ArmVIXLMacroAssembler::Add below), but it has
// not been saved by the previous call to
// art::SlowPathCode::SaveLiveRegisters, as it is a
// callee-save register --
// art::SlowPathCode::SaveLiveRegisters does not consider
// callee-save registers, as it has been designed with the
// assumption that callee-save registers are supposed to be
// handled by the called function. So, as a callee-save
// register, `index_reg` _would_ eventually be saved onto
// the stack, but it would be too late: we would have
// changed its value earlier. Therefore, we manually save
// it here into another freely available register,
// `free_reg`, chosen of course among the caller-save
// registers (as a callee-save `free_reg` register would
// exhibit the same problem).
//
// Note we could have requested a temporary register from
// the register allocator instead; but we prefer not to, as
// this is a slow path, and we know we can find a
// caller-save register that is available.
vixl32::Register free_reg = FindAvailableCallerSaveRegister(codegen);
__ Mov(free_reg, index_reg);
index_reg = free_reg;
index = LocationFrom(index_reg);
} else {
// The initial register stored in `index_` has already been
// saved in the call to art::SlowPathCode::SaveLiveRegisters
// (as it is not a callee-save register), so we can freely
// use it.
}
// Shifting the index value contained in `index_reg` by the scale
// factor (2) cannot overflow in practice, as the runtime is
// unable to allocate object arrays with a size larger than
// 2^26 - 1 (that is, 2^28 - 4 bytes).
__ Lsl(index_reg, index_reg, TIMES_4);
static_assert(
sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
"art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
__ Add(index_reg, index_reg, offset_);
} else {
// In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
// intrinsics, `index_` is not shifted by a scale factor of 2
// (as in the case of ArrayGet), as it is actually an offset
// to an object field within an object.
DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
DCHECK(instruction_->GetLocations()->Intrinsified());
DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
(instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
<< instruction_->AsInvoke()->GetIntrinsic();
DCHECK_EQ(offset_, 0U);
DCHECK(index_.IsRegisterPair());
// UnsafeGet's offset location is a register pair, the low
// part contains the correct offset.
index = index_.ToLow();
}
}
// We're moving two or three locations to locations that could
// overlap, so we need a parallel move resolver.
InvokeRuntimeCallingConventionARMVIXL calling_convention;
HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
parallel_move.AddMove(ref_,
LocationFrom(calling_convention.GetRegisterAt(0)),
DataType::Type::kReference,
nullptr);
parallel_move.AddMove(obj_,
LocationFrom(calling_convention.GetRegisterAt(1)),
DataType::Type::kReference,
nullptr);
if (index.IsValid()) {
parallel_move.AddMove(index,
LocationFrom(calling_convention.GetRegisterAt(2)),
DataType::Type::kInt32,
nullptr);
codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
} else {
codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
__ Mov(calling_convention.GetRegisterAt(2), offset_);
}
arm_codegen->InvokeRuntime(kQuickReadBarrierSlow, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<
kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
arm_codegen->Move32(out_, LocationFrom(r0));
RestoreLiveRegisters(codegen, locations);
__ B(GetExitLabel());
}
const char* GetDescription() const override {
return "ReadBarrierForHeapReferenceSlowPathARMVIXL";
}
private:
vixl32::Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
uint32_t ref = RegisterFrom(ref_).GetCode();
uint32_t obj = RegisterFrom(obj_).GetCode();
for (uint32_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
return vixl32::Register(i);
}
}
// We shall never fail to find a free caller-save register, as
// there are more than two core caller-save registers on ARM
// (meaning it is possible to find one which is different from
// `ref` and `obj`).
DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
LOG(FATAL) << "Could not find a free caller-save register";
UNREACHABLE();
}
const Location out_;
const Location ref_;
const Location obj_;
const uint32_t offset_;
// An additional location containing an index to an array.
// Only used for HArrayGet and the UnsafeGetObject &
// UnsafeGetObjectVolatile intrinsics.
const Location index_;
DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARMVIXL);
};
// Slow path generating a read barrier for a GC root.
class ReadBarrierForRootSlowPathARMVIXL : public SlowPathCodeARMVIXL {
public:
ReadBarrierForRootSlowPathARMVIXL(HInstruction* instruction, Location out, Location root)
: SlowPathCodeARMVIXL(instruction), out_(out), root_(root) {
DCHECK(kEmitCompilerReadBarrier);
}
void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
vixl32::Register reg_out = RegisterFrom(out_);
DCHECK(locations->CanCall());
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.GetCode()));
DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
<< "Unexpected instruction in read barrier for GC root slow path: "
<< instruction_->DebugName();
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
InvokeRuntimeCallingConventionARMVIXL calling_convention;
CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
arm_codegen->Move32(LocationFrom(calling_convention.GetRegisterAt(0)), root_);
arm_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
instruction_,
instruction_->GetDexPc(),
this);
CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
arm_codegen->Move32(out_, LocationFrom(r0));
RestoreLiveRegisters(codegen, locations);
__ B(GetExitLabel());
}
const char* GetDescription() const override { return "ReadBarrierForRootSlowPathARMVIXL"; }
private:
const Location out_;
const Location root_;
DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARMVIXL);
};
inline vixl32::Condition ARMCondition(IfCondition cond) {
switch (cond) {
case kCondEQ: return eq;
case kCondNE: return ne;
case kCondLT: return lt;
case kCondLE: return le;
case kCondGT: return gt;
case kCondGE: return ge;
case kCondB: return lo;
case kCondBE: return ls;
case kCondA: return hi;
case kCondAE: return hs;
}
LOG(FATAL) << "Unreachable";
UNREACHABLE();
}
// Maps signed condition to unsigned condition.
inline vixl32::Condition ARMUnsignedCondition(IfCondition cond) {
switch (cond) {
case kCondEQ: return eq;
case kCondNE: return ne;
// Signed to unsigned.
case kCondLT: return lo;
case kCondLE: return ls;
case kCondGT: return hi;
case kCondGE: return hs;
// Unsigned remain unchanged.
case kCondB: return lo;
case kCondBE: return ls;
case kCondA: return hi;
case kCondAE: return hs;
}
LOG(FATAL) << "Unreachable";
UNREACHABLE();
}
inline vixl32::Condition ARMFPCondition(IfCondition cond, bool gt_bias) {
// The ARM condition codes can express all the necessary branches, see the
// "Meaning (floating-point)" column in the table A8-1 of the ARMv7 reference manual.
// There is no dex instruction or HIR that would need the missing conditions
// "equal or unordered" or "not equal".
switch (cond) {
case kCondEQ: return eq;
case kCondNE: return ne /* unordered */;
case kCondLT: return gt_bias ? cc : lt /* unordered */;
case kCondLE: return gt_bias ? ls : le /* unordered */;
case kCondGT: return gt_bias ? hi /* unordered */ : gt;
case kCondGE: return gt_bias ? cs /* unordered */ : ge;
default:
LOG(FATAL) << "UNREACHABLE";
UNREACHABLE();
}
}
inline ShiftType ShiftFromOpKind(HDataProcWithShifterOp::OpKind op_kind) {
switch (op_kind) {
case HDataProcWithShifterOp::kASR: return ShiftType::ASR;
case HDataProcWithShifterOp::kLSL: return ShiftType::LSL;
case HDataProcWithShifterOp::kLSR: return ShiftType::LSR;
default:
LOG(FATAL) << "Unexpected op kind " << op_kind;
UNREACHABLE();
}
}
void CodeGeneratorARMVIXL::DumpCoreRegister(std::ostream& stream, int reg) const {
stream << vixl32::Register(reg);
}
void CodeGeneratorARMVIXL::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
stream << vixl32::SRegister(reg);
}
const ArmInstructionSetFeatures& CodeGeneratorARMVIXL::GetInstructionSetFeatures() const {
return *GetCompilerOptions().GetInstructionSetFeatures()->AsArmInstructionSetFeatures();
}
static uint32_t ComputeSRegisterListMask(const SRegisterList& regs) {
uint32_t mask = 0;
for (uint32_t i = regs.GetFirstSRegister().GetCode();
i <= regs.GetLastSRegister().GetCode();
++i) {
mask |= (1 << i);
}
return mask;
}
// Saves the register in the stack. Returns the size taken on stack.
size_t CodeGeneratorARMVIXL::SaveCoreRegister(size_t stack_index ATTRIBUTE_UNUSED,
uint32_t reg_id ATTRIBUTE_UNUSED) {
TODO_VIXL32(FATAL);
UNREACHABLE();
}
// Restores the register from the stack. Returns the size taken on stack.
size_t CodeGeneratorARMVIXL::RestoreCoreRegister(size_t stack_index ATTRIBUTE_UNUSED,
uint32_t reg_id ATTRIBUTE_UNUSED) {
TODO_VIXL32(FATAL);
UNREACHABLE();
}
size_t CodeGeneratorARMVIXL::SaveFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,
uint32_t reg_id ATTRIBUTE_UNUSED) {
TODO_VIXL32(FATAL);
UNREACHABLE();
}
size_t CodeGeneratorARMVIXL::RestoreFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,
uint32_t reg_id ATTRIBUTE_UNUSED) {
TODO_VIXL32(FATAL);
UNREACHABLE();
}
static void GenerateDataProcInstruction(HInstruction::InstructionKind kind,
vixl32::Register out,
vixl32::Register first,
const Operand& second,
CodeGeneratorARMVIXL* codegen) {
if (second.IsImmediate() && second.GetImmediate() == 0) {
const Operand in = kind == HInstruction::kAnd
? Operand(0)
: Operand(first);
__ Mov(out, in);
} else {
switch (kind) {
case HInstruction::kAdd:
__ Add(out, first, second);
break;
case HInstruction::kAnd:
__ And(out, first, second);
break;
case HInstruction::kOr:
__ Orr(out, first, second);
break;
case HInstruction::kSub:
__ Sub(out, first, second);
break;
case HInstruction::kXor:
__ Eor(out, first, second);
break;
default:
LOG(FATAL) << "Unexpected instruction kind: " << kind;
UNREACHABLE();
}
}
}
static void GenerateDataProc(HInstruction::InstructionKind kind,
const Location& out,
const Location& first,
const Operand& second_lo,
const Operand& second_hi,
CodeGeneratorARMVIXL* codegen) {
const vixl32::Register first_hi = HighRegisterFrom(first);
const vixl32::Register first_lo = LowRegisterFrom(first);
const vixl32::Register out_hi = HighRegisterFrom(out);
const vixl32::Register out_lo = LowRegisterFrom(out);
if (kind == HInstruction::kAdd) {
__ Adds(out_lo, first_lo, second_lo);
__ Adc(out_hi, first_hi, second_hi);
} else if (kind == HInstruction::kSub) {
__ Subs(out_lo, first_lo, second_lo);
__ Sbc(out_hi, first_hi, second_hi);
} else {
GenerateDataProcInstruction(kind, out_lo, first_lo, second_lo, codegen);
GenerateDataProcInstruction(kind, out_hi, first_hi, second_hi, codegen);
}
}
static Operand GetShifterOperand(vixl32::Register rm, ShiftType shift, uint32_t shift_imm) {
return shift_imm == 0 ? Operand(rm) : Operand(rm, shift, shift_imm);
}
static void GenerateLongDataProc(HDataProcWithShifterOp* instruction,
CodeGeneratorARMVIXL* codegen) {
DCHECK_EQ(instruction->GetType(), DataType::Type::kInt64);
DCHECK(HDataProcWithShifterOp::IsShiftOp(instruction->GetOpKind()));
const LocationSummary* const locations = instruction->GetLocations();
const uint32_t shift_value = instruction->GetShiftAmount();
const HInstruction::InstructionKind kind = instruction->GetInstrKind();
const Location first = locations->InAt(0);
const Location second = locations->InAt(1);
const Location out = locations->Out();
const vixl32::Register first_hi = HighRegisterFrom(first);
const vixl32::Register first_lo = LowRegisterFrom(first);
const vixl32::Register out_hi = HighRegisterFrom(out);
const vixl32::Register out_lo = LowRegisterFrom(out);
const vixl32::Register second_hi = HighRegisterFrom(second);
const vixl32::Register second_lo = LowRegisterFrom(second);
const ShiftType shift = ShiftFromOpKind(instruction->GetOpKind());
if (shift_value >= 32) {
if (shift == ShiftType::LSL) {
GenerateDataProcInstruction(kind,
out_hi,
first_hi,
Operand(second_lo, ShiftType::LSL, shift_value - 32),
codegen);
GenerateDataProcInstruction(kind, out_lo, first_lo, 0, codegen);
} else if (shift == ShiftType::ASR) {
GenerateDataProc(kind,
out,
first,
GetShifterOperand(second_hi, ShiftType::ASR, shift_value - 32),
Operand(second_hi, ShiftType::ASR, 31),
codegen);
} else {
DCHECK_EQ(shift, ShiftType::LSR);
GenerateDataProc(kind,
out,
first,
GetShifterOperand(second_hi, ShiftType::LSR, shift_value - 32),
0,
codegen);
}
} else {
DCHECK_GT(shift_value, 1U);
DCHECK_LT(shift_value, 32U);
UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
if (shift == ShiftType::LSL) {
// We are not doing this for HInstruction::kAdd because the output will require
// Location::kOutputOverlap; not applicable to other cases.
if (kind == HInstruction::kOr || kind == HInstruction::kXor) {
GenerateDataProcInstruction(kind,
out_hi,
first_hi,
Operand(second_hi, ShiftType::LSL, shift_value),
codegen);
GenerateDataProcInstruction(kind,
out_hi,
out_hi,
Operand(second_lo, ShiftType::LSR, 32 - shift_value),
codegen);
GenerateDataProcInstruction(kind,
out_lo,
first_lo,
Operand(second_lo, ShiftType::LSL, shift_value),
codegen);
} else {
const vixl32::Register temp = temps.Acquire();
__ Lsl(temp, second_hi, shift_value);
__ Orr(temp, temp, Operand(second_lo, ShiftType::LSR, 32 - shift_value));
GenerateDataProc(kind,
out,
first,
Operand(second_lo, ShiftType::LSL, shift_value),
temp,
codegen);
}
} else {
DCHECK(shift == ShiftType::ASR || shift == ShiftType::LSR);
// We are not doing this for HInstruction::kAdd because the output will require
// Location::kOutputOverlap; not applicable to other cases.
if (kind == HInstruction::kOr || kind == HInstruction::kXor) {
GenerateDataProcInstruction(kind,
out_lo,
first_lo,
Operand(second_lo, ShiftType::LSR, shift_value),
codegen);
GenerateDataProcInstruction(kind,
out_lo,
out_lo,
Operand(second_hi, ShiftType::LSL, 32 - shift_value),
codegen);
GenerateDataProcInstruction(kind,
out_hi,
first_hi,
Operand(second_hi, shift, shift_value),
codegen);
} else {
const vixl32::Register temp = temps.Acquire();
__ Lsr(temp, second_lo, shift_value);
__ Orr(temp, temp, Operand(second_hi, ShiftType::LSL, 32 - shift_value));
GenerateDataProc(kind,
out,
first,
temp,
Operand(second_hi, shift, shift_value),
codegen);
}
}
}
}
static void GenerateVcmp(HInstruction* instruction, CodeGeneratorARMVIXL* codegen) {
const Location rhs_loc = instruction->GetLocations()->InAt(1);
if (rhs_loc.IsConstant()) {
// 0.0 is the only immediate that can be encoded directly in
// a VCMP instruction.
//
// Both the JLS (section 15.20.1) and the JVMS (section 6.5)
// specify that in a floating-point comparison, positive zero
// and negative zero are considered equal, so we can use the
// literal 0.0 for both cases here.
//
// Note however that some methods (Float.equal, Float.compare,
// Float.compareTo, Double.equal, Double.compare,
// Double.compareTo, Math.max, Math.min, StrictMath.max,
// StrictMath.min) consider 0.0 to be (strictly) greater than
// -0.0. So if we ever translate calls to these methods into a
// HCompare instruction, we must handle the -0.0 case with
// care here.
DCHECK(rhs_loc.GetConstant()->IsArithmeticZero());
const DataType::Type type = instruction->InputAt(0)->GetType();
if (type == DataType::Type::kFloat32) {
__ Vcmp(F32, InputSRegisterAt(instruction, 0), 0.0);
} else {
DCHECK_EQ(type, DataType::Type::kFloat64);
__ Vcmp(F64, InputDRegisterAt(instruction, 0), 0.0);
}
} else {
__ Vcmp(InputVRegisterAt(instruction, 0), InputVRegisterAt(instruction, 1));
}
}
static int64_t AdjustConstantForCondition(int64_t value,
IfCondition* condition,
IfCondition* opposite) {
if (value == 1) {
if (*condition == kCondB) {
value = 0;
*condition = kCondEQ;
*opposite = kCondNE;
} else if (*condition == kCondAE) {
value = 0;
*condition = kCondNE;
*opposite = kCondEQ;
}
} else if (value == -1) {
if (*condition == kCondGT) {
value = 0;
*condition = kCondGE;
*opposite = kCondLT;
} else if (*condition == kCondLE) {
value = 0;
*condition = kCondLT;
*opposite = kCondGE;
}
}
return value;
}
static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTestConstant(
HCondition* condition,
bool invert,
CodeGeneratorARMVIXL* codegen) {
DCHECK_EQ(condition->GetLeft()->GetType(), DataType::Type::kInt64);
const LocationSummary* const locations = condition->GetLocations();
IfCondition cond = condition->GetCondition();
IfCondition opposite = condition->GetOppositeCondition();
if (invert) {
std::swap(cond, opposite);
}
std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne);
const Location left = locations->InAt(0);
const Location right = locations->InAt(1);
DCHECK(right.IsConstant());
const vixl32::Register left_high = HighRegisterFrom(left);
const vixl32::Register left_low = LowRegisterFrom(left);
int64_t value = AdjustConstantForCondition(Int64ConstantFrom(right), &cond, &opposite);
UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
// Comparisons against 0 are common enough to deserve special attention.
if (value == 0) {
switch (cond) {
case kCondNE:
// x > 0 iff x != 0 when the comparison is unsigned.
case kCondA:
ret = std::make_pair(ne, eq);
FALLTHROUGH_INTENDED;
case kCondEQ:
// x <= 0 iff x == 0 when the comparison is unsigned.
case kCondBE:
__ Orrs(temps.Acquire(), left_low, left_high);
return ret;
case kCondLT:
case kCondGE:
__ Cmp(left_high, 0);
return std::make_pair(ARMCondition(cond), ARMCondition(opposite));
// Trivially true or false.
case kCondB:
ret = std::make_pair(ne, eq);
FALLTHROUGH_INTENDED;
case kCondAE:
__ Cmp(left_low, left_low);
return ret;
default:
break;
}
}
switch (cond) {
case kCondEQ:
case kCondNE:
case kCondB:
case kCondBE:
case kCondA:
case kCondAE: {
const uint32_t value_low = Low32Bits(value);
Operand operand_low(value_low);
__ Cmp(left_high, High32Bits(value));
// Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
// we must ensure that the operands corresponding to the least significant
// halves of the inputs fit into a 16-bit CMP encoding.
if (!left_low.IsLow() || !IsUint<8>(value_low)) {
operand_low = Operand(temps.Acquire());
__ Mov(LeaveFlags, operand_low.GetBaseRegister(), value_low);
}
// We use the scope because of the IT block that follows.
ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
2 * vixl32::k16BitT32InstructionSizeInBytes,
CodeBufferCheckScope::kExactSize);
__ it(eq);
__ cmp(eq, left_low, operand_low);
ret = std::make_pair(ARMUnsignedCondition(cond), ARMUnsignedCondition(opposite));
break;
}
case kCondLE:
case kCondGT:
// Trivially true or false.
if (value == std::numeric_limits<int64_t>::max()) {
__ Cmp(left_low, left_low);
ret = cond == kCondLE ? std::make_pair(eq, ne) : std::make_pair(ne, eq);
break;
}
if (cond == kCondLE) {
DCHECK_EQ(opposite, kCondGT);
cond = kCondLT;
opposite = kCondGE;
} else {
DCHECK_EQ(cond, kCondGT);
DCHECK_EQ(opposite, kCondLE);
cond = kCondGE;
opposite = kCondLT;
}
value++;
FALLTHROUGH_INTENDED;
case kCondGE:
case kCondLT: {
__ Cmp(left_low, Low32Bits(value));
__ Sbcs(temps.Acquire(), left_high, High32Bits(value));
ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
break;
}
default:
LOG(FATAL) << "Unreachable";
UNREACHABLE();
}
return ret;
}
static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTest(
HCondition* condition,
bool invert,
CodeGeneratorARMVIXL* codegen) {
DCHECK_EQ(condition->GetLeft()->GetType(), DataType::Type::kInt64);
const LocationSummary* const locations = condition->GetLocations();
IfCondition cond = condition->GetCondition();
IfCondition opposite = condition->GetOppositeCondition();
if (invert) {
std::swap(cond, opposite);
}
std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne);
Location left = locations->InAt(0);
Location right = locations->InAt(1);
DCHECK(right.IsRegisterPair());
switch (cond) {
case kCondEQ:
case kCondNE:
case kCondB:
case kCondBE:
case kCondA:
case kCondAE: {
__ Cmp(HighRegisterFrom(left), HighRegisterFrom(right));
// We use the scope because of the IT block that follows.
ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
2 * vixl32::k16BitT32InstructionSizeInBytes,
CodeBufferCheckScope::kExactSize);
__ it(eq);
__ cmp(eq, LowRegisterFrom(left), LowRegisterFrom(right));
ret = std::make_pair(ARMUnsignedCondition(cond), ARMUnsignedCondition(opposite));
break;
}
case kCondLE:
case kCondGT:
if (cond == kCondLE) {
DCHECK_EQ(opposite, kCondGT);
cond = kCondGE;
opposite = kCondLT;
} else {
DCHECK_EQ(cond, kCondGT);
DCHECK_EQ(opposite, kCondLE);
cond = kCondLT;
opposite = kCondGE;
}
std::swap(left, right);
FALLTHROUGH_INTENDED;
case kCondGE:
case kCondLT: {
UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
__ Cmp(LowRegisterFrom(left), LowRegisterFrom(right));
__ Sbcs(temps.Acquire(), HighRegisterFrom(left), HighRegisterFrom(right));
ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
break;
}
default:
LOG(FATAL) << "Unreachable";
UNREACHABLE();
}
return ret;
}
static std::pair<vixl32::Condition, vixl32::Condition> GenerateTest(HCondition* condition,
bool invert,
CodeGeneratorARMVIXL* codegen) {
const DataType::Type type = condition->GetLeft()->GetType();
IfCondition cond = condition->GetCondition();
IfCondition opposite = condition->GetOppositeCondition();
std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne);
if (invert) {
std::swap(cond, opposite);
}
if (type == DataType::Type::kInt64) {
ret = condition->GetLocations()->InAt(1).IsConstant()
? GenerateLongTestConstant(condition, invert, codegen)
: GenerateLongTest(condition, invert, codegen);
} else if (DataType::IsFloatingPointType(type)) {
GenerateVcmp(condition, codegen);
__ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
ret = std::make_pair(ARMFPCondition(cond, condition->IsGtBias()),
ARMFPCondition(opposite, condition->IsGtBias()));
} else {
DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
__ Cmp(InputRegisterAt(condition, 0), InputOperandAt(condition, 1));
ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
}
return ret;
}
static void GenerateConditionGeneric(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
const vixl32::Register out = OutputRegister(cond);
const auto condition = GenerateTest(cond, false, codegen);
__ Mov(LeaveFlags, out, 0);
if (out.IsLow()) {
// We use the scope because of the IT block that follows.
ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
2 * vixl32::k16BitT32InstructionSizeInBytes,
CodeBufferCheckScope::kExactSize);
__ it(condition.first);
__ mov(condition.first, out, 1);
} else {
vixl32::Label done_label;
vixl32::Label* const final_label = codegen->GetFinalLabel(cond, &done_label);
__ B(condition.second, final_label, /* is_far_target= */ false);
__ Mov(out, 1);
if (done_label.IsReferenced()) {
__ Bind(&done_label);
}
}
}
static void GenerateEqualLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
DCHECK_EQ(cond->GetLeft()->GetType(), DataType::Type::kInt64);
const LocationSummary* const locations = cond->GetLocations();
IfCondition condition = cond->GetCondition();
const vixl32::Register out = OutputRegister(cond);
const Location left = locations->InAt(0);
const Location right = locations->InAt(1);
vixl32::Register left_high = HighRegisterFrom(left);
vixl32::Register left_low = LowRegisterFrom(left);
vixl32::Register temp;
UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
if (right.IsConstant()) {
IfCondition opposite = cond->GetOppositeCondition();
const int64_t value = AdjustConstantForCondition(Int64ConstantFrom(right),
&condition,
&opposite);
Operand right_high = High32Bits(value);
Operand right_low = Low32Bits(value);
// The output uses Location::kNoOutputOverlap.
if (out.Is(left_high)) {
std::swap(left_low, left_high);
std::swap(right_low, right_high);
}
__ Sub(out, left_low, right_low);
temp = temps.Acquire();
__ Sub(temp, left_high, right_high);
} else {
DCHECK(right.IsRegisterPair());
temp = temps.Acquire();
__ Sub(temp, left_high, HighRegisterFrom(right));
__ Sub(out, left_low, LowRegisterFrom(right));
}
// Need to check after calling AdjustConstantForCondition().
DCHECK(condition == kCondEQ || condition == kCondNE) << condition;
if (condition == kCondNE && out.IsLow()) {
__ Orrs(out, out, temp);
// We use the scope because of the IT block that follows.
ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
2 * vixl32::k16BitT32InstructionSizeInBytes,
CodeBufferCheckScope::kExactSize);
__ it(ne);
__ mov(ne, out, 1);
} else {
__ Orr(out, out, temp);
codegen->GenerateConditionWithZero(condition, out, out, temp);
}
}
static void GenerateConditionLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
DCHECK_EQ(cond->GetLeft()->GetType(), DataType::Type::kInt64);
const LocationSummary* const locations = cond->GetLocations();
IfCondition condition = cond->GetCondition();
const vixl32::Register out = OutputRegister(cond);
const Location left = locations->InAt(0);
const Location right = locations->InAt(1);
if (right.IsConstant()) {
IfCondition opposite = cond->GetOppositeCondition();
// Comparisons against 0 are common enough to deserve special attention.
if (AdjustConstantForCondition(Int64ConstantFrom(right), &condition, &opposite) == 0) {
switch (condition) {
case kCondNE:
case kCondA:
if (out.IsLow()) {
// We only care if both input registers are 0 or not.
__ Orrs(out, LowRegisterFrom(left), HighRegisterFrom(left));
// We use the scope because of the IT block that follows.
ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
2 * vixl32::k16BitT32InstructionSizeInBytes,
CodeBufferCheckScope::kExactSize);
__ it(ne);
__ mov(ne, out, 1);
return;
}
FALLTHROUGH_INTENDED;
case kCondEQ:
case kCondBE:
// We only care if both input registers are 0 or not.
__ Orr(out, LowRegisterFrom(left), HighRegisterFrom(left));
codegen->GenerateConditionWithZero(condition, out, out);
return;
case kCondLT:
case kCondGE:
// We only care about the sign bit.
FALLTHROUGH_INTENDED;
case kCondAE:
case kCondB:
codegen->GenerateConditionWithZero(condition, out, HighRegisterFrom(left));
return;
case kCondLE:
case kCondGT:
default:
break;
}
}
}
// If `out` is a low register, then the GenerateConditionGeneric()
// function generates a shorter code sequence that is still branchless.
if ((condition == kCondEQ || condition == kCondNE) && !out.IsLow()) {
GenerateEqualLong(cond, codegen);
return;
}
GenerateConditionGeneric(cond, codegen);
}
static void GenerateConditionIntegralOrNonPrimitive(HCondition* cond,
CodeGeneratorARMVIXL* codegen) {
const DataType::Type type = cond->GetLeft()->GetType();
DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
if (type == DataType::Type::kInt64) {
GenerateConditionLong(cond, codegen);
return;
}
IfCondition condition = cond->GetCondition();
vixl32::Register in = InputRegisterAt(cond, 0);
const vixl32::Register out = OutputRegister(cond);
const Location right = cond->GetLocations()->InAt(1);
int64_t value;
if (right.IsConstant()) {
IfCondition opposite = cond->GetOppositeCondition();
value = AdjustConstantForCondition(Int64ConstantFrom(right), &condition, &opposite);
// Comparisons against 0 are common enough to deserve special attention.
if (value == 0) {
switch (condition) {
case kCondNE:
case kCondA:
if (out.IsLow() && out.Is(in)) {
__ Cmp(out, 0);
// We use the scope because of the IT block that follows.
ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
2 * vixl32::k16BitT32InstructionSizeInBytes,
CodeBufferCheckScope::kExactSize);
__ it(ne);
__ mov(ne, out, 1);
return;
}
FALLTHROUGH_INTENDED;
case kCondEQ:
case kCondBE:
case kCondLT:
case kCondGE:
case kCondAE:
case kCondB:
codegen->GenerateConditionWithZero(condition, out, in);
return;
case kCondLE:
case kCondGT:
default:
break;
}
}
}
if (condition == kCondEQ || condition == kCondNE) {
Operand operand(0);
if (right.IsConstant()) {
operand = Operand::From(value);
} else if (out.Is(RegisterFrom(right))) {
// Avoid 32-bit instructions if possible.
operand = InputOperandAt(cond, 0);
in = RegisterFrom(right);
} else {
operand = InputOperandAt(cond, 1);
}
if (condition == kCondNE && out.IsLow()) {
__ Subs(out, in, operand);
// We use the scope because of the IT block that follows.
ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
2 * vixl32::k16BitT32InstructionSizeInBytes,
CodeBufferCheckScope::kExactSize);
__ it(ne);
__ mov(ne, out, 1);
} else {
__ Sub(out, in, operand);
codegen->GenerateConditionWithZero(condition, out, out);
}
return;
}
GenerateConditionGeneric(cond, codegen);
}
static bool CanEncodeConstantAs8BitImmediate(HConstant* constant) {
const DataType::Type type = constant->GetType();
bool ret = false;
DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
if (type == DataType::Type::kInt64) {
const uint64_t value = Uint64ConstantFrom(constant);
ret = IsUint<8>(Low32Bits(value)) && IsUint<8>(High32Bits(value));
} else {
ret = IsUint<8>(Int32ConstantFrom(constant));
}
return ret;
}
static Location Arm8BitEncodableConstantOrRegister(HInstruction* constant) {
DCHECK(!DataType::IsFloatingPointType(constant->GetType()));
if (constant->IsConstant() && CanEncodeConstantAs8BitImmediate(constant->AsConstant())) {
return Location::ConstantLocation(constant->AsConstant());
}
return Location::RequiresRegister();
}
static bool CanGenerateConditionalMove(const Location& out, const Location& src) {
// Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
// we check that we are not dealing with floating-point output (there is no
// 16-bit VMOV encoding).
if (!out.IsRegister() && !out.IsRegisterPair()) {
return false;
}
// For constants, we also check that the output is in one or two low registers,
// and that the constants fit in an 8-bit unsigned integer, so that a 16-bit
// MOV encoding can be used.
if (src.IsConstant()) {
if (!CanEncodeConstantAs8BitImmediate(src.GetConstant())) {
return false;
}
if (out.IsRegister()) {
if (!RegisterFrom(out).IsLow()) {
return false;
}
} else {
DCHECK(out.IsRegisterPair());
if (!HighRegisterFrom(out).IsLow()) {
return false;
}
}
}
return true;
}
#undef __
vixl32::Label* CodeGeneratorARMVIXL::GetFinalLabel(HInstruction* instruction,
vixl32::Label* final_label) {
DCHECK(!instruction->IsControlFlow() && !instruction->IsSuspendCheck());
DCHECK(!instruction->IsInvoke() || !instruction->GetLocations()->CanCall());
const HBasicBlock* const block = instruction->GetBlock();
const HLoopInformation* const info = block->GetLoopInformation();
HInstruction* const next = instruction->GetNext();
// Avoid a branch to a branch.
if (next->IsGoto() && (info == nullptr ||
!info->IsBackEdge(*block) ||
!info->HasSuspendCheck())) {
final_label = GetLabelOf(next->AsGoto()->GetSuccessor());
}
return final_label;
}
CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph,
const CompilerOptions& compiler_options,
OptimizingCompilerStats* stats)
: CodeGenerator(graph,
kNumberOfCoreRegisters,
kNumberOfSRegisters,
kNumberOfRegisterPairs,
kCoreCalleeSaves.GetList(),
ComputeSRegisterListMask(kFpuCalleeSaves),
compiler_options,
stats),
block_labels_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
location_builder_(graph, this),
instruction_visitor_(graph, this),
move_resolver_(graph->GetAllocator(), this),
assembler_(graph->GetAllocator()),
uint32_literals_(std::less<uint32_t>(),
graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
boot_image_intrinsic_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
jit_string_patches_(StringReferenceValueComparator(),
graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
jit_class_patches_(TypeReferenceValueComparator(),
graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
jit_baker_read_barrier_slow_paths_(std::less<uint32_t>(),
graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
// Always save the LR register to mimic Quick.
AddAllocatedRegister(Location::RegisterLocation(LR));
// Give D30 and D31 as scratch register to VIXL. The register allocator only works on
// S0-S31, which alias to D0-D15.
GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d31);
GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d30);
}
void JumpTableARMVIXL::EmitTable(CodeGeneratorARMVIXL* codegen) {
uint32_t num_entries = switch_instr_->GetNumEntries();
DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
// We are about to use the assembler to place literals directly. Make sure we have enough
// underlying code buffer and we have generated a jump table of the right size, using
// codegen->GetVIXLAssembler()->GetBuffer().Align();
ExactAssemblyScope aas(codegen->GetVIXLAssembler(),
num_entries * sizeof(int32_t),
CodeBufferCheckScope::kMaximumSize);
// TODO(VIXL): Check that using lower case bind is fine here.
codegen->GetVIXLAssembler()->bind(&table_start_);
for (uint32_t i = 0; i < num_entries; i++) {
codegen->GetVIXLAssembler()->place(bb_addresses_[i].get());
}
}
void JumpTableARMVIXL::FixTable(CodeGeneratorARMVIXL* codegen) {
uint32_t num_entries = switch_instr_->GetNumEntries();
DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors();
for (uint32_t i = 0; i < num_entries; i++) {
vixl32::Label* target_label = codegen->GetLabelOf(successors[i]);
DCHECK(target_label->IsBound());
int32_t jump_offset = target_label->GetLocation() - table_start_.GetLocation();
// When doing BX to address we need to have lower bit set to 1 in T32.
if (codegen->GetVIXLAssembler()->IsUsingT32()) {
jump_offset++;
}
DCHECK_GT(jump_offset, std::numeric_limits<int32_t>::min());
DCHECK_LE(jump_offset, std::numeric_limits<int32_t>::max());
bb_addresses_[i].get()->UpdateValue(jump_offset, codegen->GetVIXLAssembler()->GetBuffer());
}
}
void CodeGeneratorARMVIXL::FixJumpTables() {
for (auto&& jump_table : jump_tables_) {
jump_table->FixTable(this);
}
}
#define __ reinterpret_cast<ArmVIXLAssembler*>(GetAssembler())->GetVIXLAssembler()-> // NOLINT
void CodeGeneratorARMVIXL::Finalize(CodeAllocator* allocator) {
FixJumpTables();
// Emit JIT baker read barrier slow paths.
DCHECK(Runtime::Current()->UseJitCompilation() || jit_baker_read_barrier_slow_paths_.empty());
for (auto& entry : jit_baker_read_barrier_slow_paths_) {
uint32_t encoded_data = entry.first;
vixl::aarch32::Label* slow_path_entry = &entry.second.label;
__ Bind(slow_path_entry);
CompileBakerReadBarrierThunk(*GetAssembler(), encoded_data, /* debug_name= */ nullptr);
}
GetAssembler()->FinalizeCode();
CodeGenerator::Finalize(allocator);
// Verify Baker read barrier linker patches.
if (kIsDebugBuild) {
ArrayRef<const uint8_t> code = allocator->GetMemory();
for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
DCHECK(info.label.IsBound());
uint32_t literal_offset = info.label.GetLocation();
DCHECK_ALIGNED(literal_offset, 2u);
auto GetInsn16 = [&code](uint32_t offset) {
DCHECK_ALIGNED(offset, 2u);
return (static_cast<uint32_t>(code[offset + 0]) << 0) +
(static_cast<uint32_t>(code[offset + 1]) << 8);
};
auto GetInsn32 = [=](uint32_t offset) {
return (GetInsn16(offset) << 16) + (GetInsn16(offset + 2u) << 0);
};
uint32_t encoded_data = info.custom_data;
BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
// Check that the next instruction matches the expected LDR.
switch (kind) {
case BakerReadBarrierKind::kField: {
BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
if (width == BakerReadBarrierWidth::kWide) {
DCHECK_GE(code.size() - literal_offset, 8u);
uint32_t next_insn = GetInsn32(literal_offset + 4u);
// LDR (immediate), encoding T3, with correct base_reg.
CheckValidReg((next_insn >> 12) & 0xfu); // Check destination register.
const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
CHECK_EQ(next_insn & 0xffff0000u, 0xf8d00000u | (base_reg << 16));
} else {
DCHECK_GE(code.size() - literal_offset, 6u);
uint32_t next_insn = GetInsn16(literal_offset + 4u);
// LDR (immediate), encoding T1, with correct base_reg.
CheckValidReg(next_insn & 0x7u); // Check destination register.
const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
CHECK_EQ(next_insn & 0xf838u, 0x6800u | (base_reg << 3));
}
break;
}
case BakerReadBarrierKind::kArray: {
DCHECK_GE(code.size() - literal_offset, 8u);
uint32_t next_insn = GetInsn32(literal_offset + 4u);
// LDR (register) with correct base_reg, S=1 and option=011 (LDR Wt, [Xn, Xm, LSL #2]).
CheckValidReg((next_insn >> 12) & 0xfu); // Check destination register.
const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
CHECK_EQ(next_insn & 0xffff0ff0u, 0xf8500020u | (base_reg << 16));
CheckValidReg(next_insn & 0xf); // Check index register
break;
}
case BakerReadBarrierKind::kGcRoot: {
BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
if (width == BakerReadBarrierWidth::kWide) {
DCHECK_GE(literal_offset, 4u);
uint32_t prev_insn = GetInsn32(literal_offset - 4u);
// LDR (immediate), encoding T3, with correct root_reg.
const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
CHECK_EQ(prev_insn & 0xfff0f000u, 0xf8d00000u | (root_reg << 12));
} else {
DCHECK_GE(literal_offset, 2u);
uint32_t prev_insn = GetInsn16(literal_offset - 2u);
// LDR (immediate), encoding T1, with correct root_reg.
const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
CHECK_EQ(prev_insn & 0xf807u, 0x6800u | root_reg);
}
break;
}
case BakerReadBarrierKind::kUnsafeCas: {
DCHECK_GE(literal_offset, 4u);
uint32_t prev_insn = GetInsn32(literal_offset - 4u);
// ADD (register), encoding T3, with correct root_reg.
const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
CHECK_EQ(prev_insn & 0xfff0fff0u, 0xeb000000u | (root_reg << 8));
break;
}
default:
LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
UNREACHABLE();
}
}
}
}
void CodeGeneratorARMVIXL::SetupBlockedRegisters() const {
// Stack register, LR and PC are always reserved.
blocked_core_registers_[SP] = true;
blocked_core_registers_[LR] = true;
blocked_core_registers_[PC] = true;
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
// Reserve marking register.
blocked_core_registers_[MR] = true;
}
// Reserve thread register.
blocked_core_registers_[TR] = true;
// Reserve temp register.
blocked_core_registers_[IP] = true;
if (GetGraph()->IsDebuggable()) {
// Stubs do not save callee-save floating point registers. If the graph
// is debuggable, we need to deal with these registers differently. For
// now, just block them.
for (uint32_t i = kFpuCalleeSaves.GetFirstSRegister().GetCode();
i <= kFpuCalleeSaves.GetLastSRegister().GetCode();
++i) {
blocked_fpu_registers_[i] = true;
}
}
}
InstructionCodeGeneratorARMVIXL::InstructionCodeGeneratorARMVIXL(HGraph* graph,
CodeGeneratorARMVIXL* codegen)
: InstructionCodeGenerator(graph, codegen),
assembler_(codegen->GetAssembler()),
codegen_(codegen) {}
void CodeGeneratorARMVIXL::ComputeSpillMask() {
core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved";
// There is no easy instruction to restore just the PC on thumb2. We spill and
// restore another arbitrary register.
core_spill_mask_ |= (1 << kCoreAlwaysSpillRegister.GetCode());
fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
// We use vpush and vpop for saving and restoring floating point registers, which take
// a SRegister and the number of registers to save/restore after that SRegister. We
// therefore update the `fpu_spill_mask_` to also contain those registers not allocated,
// but in the range.
if (fpu_spill_mask_ != 0) {
uint32_t least_significant_bit = LeastSignificantBit(fpu_spill_mask_);
uint32_t most_significant_bit = MostSignificantBit(fpu_spill_mask_);
for (uint32_t i = least_significant_bit + 1 ; i < most_significant_bit; ++i) {
fpu_spill_mask_ |= (1 << i);
}
}
}
void CodeGeneratorARMVIXL::GenerateFrameEntry() {
bool skip_overflow_check =
IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm);
DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
__ Bind(&frame_entry_label_);
if (GetCompilerOptions().CountHotnessInCompiledCode()) {
UseScratchRegisterScope temps(GetVIXLAssembler());
vixl32::Register temp = temps.Acquire();
__ Ldrh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
__ Add(temp, temp, 1);
__ Strh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
}
if (HasEmptyFrame()) {
return;
}
if (!skip_overflow_check) {
// Using r4 instead of IP saves 2 bytes.
UseScratchRegisterScope temps(GetVIXLAssembler());
vixl32::Register temp;
// TODO: Remove this check when R4 is made a callee-save register
// in ART compiled code (b/72801708). Currently we need to make
// sure r4 is not blocked, e.g. in special purpose
// TestCodeGeneratorARMVIXL; also asserting that r4 is available
// here.
if (!blocked_core_registers_[R4]) {
for (vixl32::Register reg : kParameterCoreRegistersVIXL) {
DCHECK(!reg.Is(r4));
}
DCHECK(!kCoreCalleeSaves.Includes(r4));
temp = r4;
} else {
temp = temps.Acquire();
}
__ Sub(temp, sp, Operand::From(GetStackOverflowReservedBytes(InstructionSet::kArm)));
// The load must immediately precede RecordPcInfo.
ExactAssemblyScope aas(GetVIXLAssembler(),
vixl32::kMaxInstructionSizeInBytes,
CodeBufferCheckScope::kMaximumSize);
__ ldr(temp, MemOperand(temp));
RecordPcInfo(nullptr, 0);
}
__ Push(RegisterList(core_spill_mask_));
GetAssembler()->cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(core_spill_mask_));
GetAssembler()->cfi().RelOffsetForMany(DWARFReg(kMethodRegister),
0,
core_spill_mask_,
kArmWordSize);
if (fpu_spill_mask_ != 0) {
uint32_t first = LeastSignificantBit(fpu_spill_mask_);
// Check that list is contiguous.
DCHECK_EQ(fpu_spill_mask_ >> CTZ(fpu_spill_mask_), ~0u >> (32 - POPCOUNT(fpu_spill_mask_)));
__ Vpush(SRegisterList(vixl32::SRegister(first), POPCOUNT(fpu_spill_mask_)));
GetAssembler()->cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(fpu_spill_mask_));
GetAssembler()->cfi().RelOffsetForMany(DWARFReg(s0), 0, fpu_spill_mask_, kArmWordSize);
}
int adjust = GetFrameSize() - FrameEntrySpillSize();
__ Sub(sp, sp, adjust);
GetAssembler()->cfi().AdjustCFAOffset(adjust);
// Save the current method if we need it. Note that we do not
// do this in HCurrentMethod, as the instruction might have been removed
// in the SSA graph.
if (RequiresCurrentMethod()) {
GetAssembler()->StoreToOffset(kStoreWord, kMethodRegister, sp, 0);
}
if (GetGraph()->HasShouldDeoptimizeFlag()) {
UseScratchRegisterScope temps(GetVIXLAssembler());
vixl32::Register temp = temps.Acquire();
// Initialize should_deoptimize flag to 0.
__ Mov(temp, 0);
GetAssembler()->StoreToOffset(kStoreWord, temp, sp, GetStackOffsetOfShouldDeoptimizeFlag());
}
MaybeGenerateMarkingRegisterCheck(/* code= */ 1);
}
void CodeGeneratorARMVIXL::GenerateFrameExit() {
if (HasEmptyFrame()) {
__ Bx(lr);
return;
}
GetAssembler()->cfi().RememberState();
int adjust = GetFrameSize() - FrameEntrySpillSize();
__ Add(sp, sp, adjust);
GetAssembler()->cfi().AdjustCFAOffset(-adjust);
if (fpu_spill_mask_ != 0) {
uint32_t first = LeastSignificantBit(fpu_spill_mask_);
// Check that list is contiguous.
DCHECK_EQ(fpu_spill_mask_ >> CTZ(fpu_spill_mask_), ~0u >> (32 - POPCOUNT(fpu_spill_mask_)));
__ Vpop(SRegisterList(vixl32::SRegister(first), POPCOUNT(fpu_spill_mask_)));
GetAssembler()->cfi().AdjustCFAOffset(
-static_cast<int>(kArmWordSize) * POPCOUNT(fpu_spill_mask_));
GetAssembler()->cfi().RestoreMany(DWARFReg(vixl32::SRegister(0)), fpu_spill_mask_);
}
// Pop LR into PC to return.
DCHECK_NE(core_spill_mask_ & (1 << kLrCode), 0U);
uint32_t pop_mask = (core_spill_mask_ & (~(1 << kLrCode))) | 1 << kPcCode;
__ Pop(RegisterList(pop_mask));
GetAssembler()->cfi().RestoreState();
GetAssembler()->cfi().DefCFAOffset(GetFrameSize());
}
void CodeGeneratorARMVIXL::Bind(HBasicBlock* block) {
__ Bind(GetLabelOf(block));
}
Location InvokeDexCallingConventionVisitorARMVIXL::GetNextLocation(DataType::Type type) {
switch (type) {
case DataType::Type::kReference:
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
case DataType::Type::kUint16:
case DataType::Type::kInt16:
case DataType::Type::kInt32: {
uint32_t index = gp_index_++;
uint32_t stack_index = stack_index_++;
if (index < calling_convention.GetNumberOfRegisters()) {
return LocationFrom(calling_convention.GetRegisterAt(index));
} else {
return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index));
}
}
case DataType::Type::kInt64: {
uint32_t index = gp_index_;
uint32_t stack_index = stack_index_;
gp_index_ += 2;
stack_index_ += 2;
if (index + 1 < calling_convention.GetNumberOfRegisters()) {
if (calling_convention.GetRegisterAt(index).Is(r1)) {
// Skip R1, and use R2_R3 instead.
gp_index_++;
index++;
}
}
if (index + 1 < calling_convention.GetNumberOfRegisters()) {
DCHECK_EQ(calling_convention.GetRegisterAt(index).GetCode() + 1,
calling_convention.GetRegisterAt(index + 1).GetCode());
return LocationFrom(calling_convention.GetRegisterAt(index),
calling_convention.GetRegisterAt(index + 1));
} else {
return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index));
}
}
case DataType::Type::kFloat32: {
uint32_t stack_index = stack_index_++;
if (float_index_ % 2 == 0) {
float_index_ = std::max(double_index_, float_index_);
}
if (float_index_ < calling_convention.GetNumberOfFpuRegisters()) {
return LocationFrom(calling_convention.GetFpuRegisterAt(float_index_++));
} else {
return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index));
}
}
case DataType::Type::kFloat64: {
double_index_ = std::max(double_index_, RoundUp(float_index_, 2));
uint32_t stack_index = stack_index_;
stack_index_ += 2;
if (double_index_ + 1 < calling_convention.GetNumberOfFpuRegisters()) {
uint32_t index = double_index_;
double_index_ += 2;
Location result = LocationFrom(
calling_convention.GetFpuRegisterAt(index),
calling_convention.GetFpuRegisterAt(index + 1));
DCHECK(ExpectedPairLayout(result));
return result;
} else {
return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index));
}
}
case DataType::Type::kUint32:
case DataType::Type::kUint64:
case DataType::Type::kVoid:
LOG(FATAL) << "Unexpected parameter type " << type;
UNREACHABLE();
}
return Location::NoLocation();
}
Location InvokeDexCallingConventionVisitorARMVIXL::GetReturnLocation(DataType::Type type) const {
switch (type) {
case DataType::Type::kReference:
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
case DataType::Type::kUint16:
case DataType::Type::kInt16:
case DataType::Type::kUint32:
case DataType::Type::kInt32: {
return LocationFrom(r0);
}
case DataType::Type::kFloat32: {
return LocationFrom(s0);
}
case DataType::Type::kUint64:
case DataType::Type::kInt64: {
return LocationFrom(r0, r1);
}
case DataType::Type::kFloat64: {
return LocationFrom(s0, s1);
}
case DataType::Type::kVoid:
return Location::NoLocation();
}
UNREACHABLE();
}
Location InvokeDexCallingConventionVisitorARMVIXL::GetMethodLocation() const {
return LocationFrom(kMethodRegister);
}
void CodeGeneratorARMVIXL::Move32(Location destination, Location source) {
if (source.Equals(destination)) {
return;
}
if (destination.IsRegister()) {
if (source.IsRegister()) {
__ Mov(RegisterFrom(destination), RegisterFrom(source));
} else if (source.IsFpuRegister()) {
__ Vmov(RegisterFrom(destination), SRegisterFrom(source));
} else {
GetAssembler()->LoadFromOffset(kLoadWord,
RegisterFrom(destination),
sp,
source.GetStackIndex());
}
} else if (destination.IsFpuRegister()) {
if (source.IsRegister()) {
__ Vmov(SRegisterFrom(destination), RegisterFrom(source));
} else if (source.IsFpuRegister()) {
__ Vmov(SRegisterFrom(destination), SRegisterFrom(source));
} else {
GetAssembler()->LoadSFromOffset(SRegisterFrom(destination), sp, source.GetStackIndex());
}
} else {
DCHECK(destination.IsStackSlot()) << destination;
if (source.IsRegister()) {
GetAssembler()->StoreToOffset(kStoreWord,
RegisterFrom(source),
sp,
destination.GetStackIndex());
} else if (source.IsFpuRegister()) {
GetAssembler()->StoreSToOffset(SRegisterFrom(source), sp, destination.GetStackIndex());
} else {
DCHECK(source.IsStackSlot()) << source;
UseScratchRegisterScope temps(GetVIXLAssembler());
vixl32::Register temp = temps.Acquire();
GetAssembler()->LoadFromOffset(kLoadWord, temp, sp, source.GetStackIndex());
GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
}
}
}
void CodeGeneratorARMVIXL::MoveConstant(Location location, int32_t value) {
DCHECK(location.IsRegister());
__ Mov(RegisterFrom(location), value);
}
void CodeGeneratorARMVIXL::MoveLocation(Location dst, Location src, DataType::Type dst_type) {
// TODO(VIXL): Maybe refactor to have the 'move' implementation here and use it in
// `ParallelMoveResolverARMVIXL::EmitMove`, as is done in the `arm64` backend.
HParallelMove move(GetGraph()->GetAllocator());
move.AddMove(src, dst, dst_type, nullptr);
GetMoveResolver()->EmitNativeCode(&move);
}
void CodeGeneratorARMVIXL::AddLocationAsTemp(Location location, LocationSummary* locations) {
if (location.IsRegister()) {
locations->AddTemp(location);
} else if (location.IsRegisterPair()) {
locations->AddTemp(LocationFrom(LowRegisterFrom(location)));
locations->AddTemp(LocationFrom(HighRegisterFrom(location)));
} else {
UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
}
}
void CodeGeneratorARMVIXL::InvokeRuntime(QuickEntrypointEnum entrypoint,
HInstruction* instruction,
uint32_t dex_pc,
SlowPathCode* slow_path) {
ValidateInvokeRuntime(entrypoint, instruction, slow_path);
__ Ldr(lr, MemOperand(tr, GetThreadOffset<kArmPointerSize>(entrypoint).Int32Value()));
// Ensure the pc position is recorded immediately after the `blx` instruction.
// blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
ExactAssemblyScope aas(GetVIXLAssembler(),
vixl32::k16BitT32InstructionSizeInBytes,
CodeBufferCheckScope::kExactSize);
__ blx(lr);
if (EntrypointRequiresStackMap(entrypoint)) {
RecordPcInfo(instruction, dex_pc, slow_path);
}
}
void CodeGeneratorARMVIXL::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
HInstruction* instruction,
SlowPathCode* slow_path) {
ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
__ Ldr(lr, MemOperand(tr, entry_point_offset));
__ Blx(lr);
}
void InstructionCodeGeneratorARMVIXL::HandleGoto(HInstruction* got, HBasicBlock* successor) {
if (successor->IsExitBlock()) {
DCHECK(got->GetPrevious()->AlwaysThrows());
return; // no code needed
}
HBasicBlock* block = got->GetBlock();
HInstruction* previous = got->GetPrevious();
HLoopInformation* info = block->GetLoopInformation();
if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) {
UseScratchRegisterScope temps(GetVIXLAssembler());
vixl32::Register temp = temps.Acquire();
__ Push(vixl32::Register(kMethodRegister));
GetAssembler()->LoadFromOffset(kLoadWord, kMethodRegister, sp, kArmWordSize);
__ Ldrh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
__ Add(temp, temp, 1);
__ Strh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
__ Pop(vixl32::Register(kMethodRegister));
}
GenerateSuspendCheck(info->GetSuspendCheck(), successor);
return;
}
if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 2);
}
if (!codegen_->GoesToNextBlock(block, successor)) {
__ B(codegen_->GetLabelOf(successor));
}
}
void LocationsBuilderARMVIXL::VisitGoto(HGoto* got) {
got->SetLocations(nullptr);
}
void InstructionCodeGeneratorARMVIXL::VisitGoto(HGoto* got) {
HandleGoto(got, got->GetSuccessor());
}
void LocationsBuilderARMVIXL::VisitTryBoundary(HTryBoundary* try_boundary) {
try_boundary->SetLocations(nullptr);
}
void InstructionCodeGeneratorARMVIXL::VisitTryBoundary(HTryBoundary* try_boundary) {
HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
if (!successor->IsExitBlock()) {
HandleGoto(try_boundary, successor);
}
}
void LocationsBuilderARMVIXL::VisitExit(HExit* exit) {
exit->SetLocations(nullptr);
}
void InstructionCodeGeneratorARMVIXL::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
}
void InstructionCodeGeneratorARMVIXL::GenerateCompareTestAndBranch(HCondition* condition,
vixl32::Label* true_target,
vixl32::Label* false_target,
bool is_far_target) {
if (true_target == false_target) {
DCHECK(true_target != nullptr);
__ B(true_target);
return;
}
vixl32::Label*