blob: 824b6c947679141d672514092295d6fabe786eb7 [file] [log] [blame]
/*
* Copyright (C) 2020 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "code_generator_arm64.h"
#include "arch/arm64/instruction_set_features_arm64.h"
#include "base/bit_utils_iterator.h"
#include "mirror/array-inl.h"
#include "mirror/string.h"
using namespace vixl::aarch64; // NOLINT(build/namespaces)
namespace art {
namespace arm64 {
using helpers::DRegisterFrom;
using helpers::HeapOperand;
using helpers::InputRegisterAt;
using helpers::Int64FromLocation;
using helpers::LocationFrom;
using helpers::OutputRegister;
using helpers::QRegisterFrom;
using helpers::StackOperandFrom;
using helpers::SveStackOperandFrom;
using helpers::VRegisterFrom;
using helpers::ZRegisterFrom;
using helpers::XRegisterFrom;
#define __ GetVIXLAssembler()->
// Returns whether the value of the constant can be directly encoded into the instruction as
// immediate.
static bool SVECanEncodeConstantAsImmediate(HConstant* constant, HInstruction* instr) {
if (instr->IsVecReplicateScalar()) {
if (constant->IsLongConstant()) {
return false;
} else if (constant->IsFloatConstant()) {
return vixl::aarch64::Assembler::IsImmFP32(constant->AsFloatConstant()->GetValue());
} else if (constant->IsDoubleConstant()) {
return vixl::aarch64::Assembler::IsImmFP64(constant->AsDoubleConstant()->GetValue());
}
// TODO: Make use of shift part of DUP instruction.
int64_t value = CodeGenerator::GetInt64ValueOf(constant);
return IsInt<8>(value);
}
return false;
}
// Returns
// - constant location - if 'constant' is an actual constant and its value can be
// encoded into the instruction.
// - register location otherwise.
inline Location SVEEncodableConstantOrRegister(HInstruction* constant, HInstruction* instr) {
if (constant->IsConstant()
&& SVECanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
return Location::ConstantLocation(constant->AsConstant());
}
return Location::RequiresRegister();
}
void InstructionCodeGeneratorARM64Sve::ValidateVectorLength(HVecOperation* instr) const {
DCHECK_EQ(DataType::Size(instr->GetPackedType()) * instr->GetVectorLength(),
codegen_->GetSIMDRegisterWidth());
}
void LocationsBuilderARM64Sve::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
HInstruction* input = instruction->InputAt(0);
switch (instruction->GetPackedType()) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
case DataType::Type::kUint16:
case DataType::Type::kInt16:
case DataType::Type::kInt32:
case DataType::Type::kInt64:
locations->SetInAt(0, SVEEncodableConstantOrRegister(input, instruction));
locations->SetOut(Location::RequiresFpuRegister());
break;
case DataType::Type::kFloat32:
case DataType::Type::kFloat64:
if (input->IsConstant() &&
SVECanEncodeConstantAsImmediate(input->AsConstant(), instruction)) {
locations->SetInAt(0, Location::ConstantLocation(input->AsConstant()));
locations->SetOut(Location::RequiresFpuRegister());
} else {
locations->SetInAt(0, Location::RequiresFpuRegister());
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
}
break;
default:
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
void InstructionCodeGeneratorARM64Sve::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
DCHECK(instruction->IsPredicated());
LocationSummary* locations = instruction->GetLocations();
Location src_loc = locations->InAt(0);
const ZRegister dst = ZRegisterFrom(locations->Out());
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
if (src_loc.IsConstant()) {
__ Dup(dst.VnB(), Int64FromLocation(src_loc));
} else {
__ Dup(dst.VnB(), InputRegisterAt(instruction, 0));
}
break;
case DataType::Type::kUint16:
case DataType::Type::kInt16:
if (src_loc.IsConstant()) {
__ Dup(dst.VnH(), Int64FromLocation(src_loc));
} else {
__ Dup(dst.VnH(), InputRegisterAt(instruction, 0));
}
break;
case DataType::Type::kInt32:
if (src_loc.IsConstant()) {
__ Dup(dst.VnS(), Int64FromLocation(src_loc));
} else {
__ Dup(dst.VnS(), InputRegisterAt(instruction, 0));
}
break;
case DataType::Type::kInt64:
if (src_loc.IsConstant()) {
__ Dup(dst.VnD(), Int64FromLocation(src_loc));
} else {
__ Dup(dst.VnD(), XRegisterFrom(src_loc));
}
break;
case DataType::Type::kFloat32:
if (src_loc.IsConstant()) {
__ Fdup(dst.VnS(), src_loc.GetConstant()->AsFloatConstant()->GetValue());
} else {
__ Dup(dst.VnS(), ZRegisterFrom(src_loc).VnS(), 0);
}
break;
case DataType::Type::kFloat64:
if (src_loc.IsConstant()) {
__ Fdup(dst.VnD(), src_loc.GetConstant()->AsDoubleConstant()->GetValue());
} else {
__ Dup(dst.VnD(), ZRegisterFrom(src_loc).VnD(), 0);
}
break;
default:
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
void LocationsBuilderARM64Sve::VisitVecExtractScalar(HVecExtractScalar* instruction) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
case DataType::Type::kUint16:
case DataType::Type::kInt16:
case DataType::Type::kInt32:
case DataType::Type::kInt64:
locations->SetInAt(0, Location::RequiresFpuRegister());
locations->SetOut(Location::RequiresRegister());
break;
case DataType::Type::kFloat32:
case DataType::Type::kFloat64:
locations->SetInAt(0, Location::RequiresFpuRegister());
locations->SetOut(Location::SameAsFirstInput());
break;
default:
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
void InstructionCodeGeneratorARM64Sve::VisitVecExtractScalar(HVecExtractScalar* instruction) {
DCHECK(instruction->IsPredicated());
LocationSummary* locations = instruction->GetLocations();
const VRegister src = VRegisterFrom(locations->InAt(0));
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kInt32:
__ Umov(OutputRegister(instruction), src.V4S(), 0);
break;
case DataType::Type::kInt64:
__ Umov(OutputRegister(instruction), src.V2D(), 0);
break;
case DataType::Type::kFloat32:
case DataType::Type::kFloat64:
DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required
break;
default:
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
// Helper to set up locations for vector unary operations.
static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) {
LocationSummary* locations = new (allocator) LocationSummary(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kBool:
locations->SetInAt(0, Location::RequiresFpuRegister());
locations->SetOut(Location::RequiresFpuRegister(),
instruction->IsVecNot() ? Location::kOutputOverlap
: Location::kNoOutputOverlap);
break;
case DataType::Type::kUint8:
case DataType::Type::kInt8:
case DataType::Type::kUint16:
case DataType::Type::kInt16:
case DataType::Type::kInt32:
case DataType::Type::kInt64:
case DataType::Type::kFloat32:
case DataType::Type::kFloat64:
locations->SetInAt(0, Location::RequiresFpuRegister());
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
default:
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
void LocationsBuilderARM64Sve::VisitVecReduce(HVecReduce* instruction) {
CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
}
void InstructionCodeGeneratorARM64Sve::VisitVecReduce(HVecReduce* instruction) {
DCHECK(instruction->IsPredicated());
LocationSummary* locations = instruction->GetLocations();
const ZRegister src = ZRegisterFrom(locations->InAt(0));
const VRegister dst = DRegisterFrom(locations->Out());
const PRegister p_reg = LoopPReg();
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kInt32:
switch (instruction->GetReductionKind()) {
case HVecReduce::kSum:
__ Saddv(dst.S(), p_reg, src.VnS());
break;
default:
LOG(FATAL) << "Unsupported SIMD instruction";
UNREACHABLE();
}
break;
case DataType::Type::kInt64:
switch (instruction->GetReductionKind()) {
case HVecReduce::kSum:
__ Uaddv(dst.D(), p_reg, src.VnD());
break;
default:
LOG(FATAL) << "Unsupported SIMD instruction";
UNREACHABLE();
}
break;
default:
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
void LocationsBuilderARM64Sve::VisitVecCnv(HVecCnv* instruction) {
CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
}
void InstructionCodeGeneratorARM64Sve::VisitVecCnv(HVecCnv* instruction) {
DCHECK(instruction->IsPredicated());
LocationSummary* locations = instruction->GetLocations();
const ZRegister src = ZRegisterFrom(locations->InAt(0));
const ZRegister dst = ZRegisterFrom(locations->Out());
const PRegisterM p_reg = LoopPReg().Merging();
DataType::Type from = instruction->GetInputType();
DataType::Type to = instruction->GetResultType();
ValidateVectorLength(instruction);
if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) {
__ Scvtf(dst.VnS(), p_reg, src.VnS());
} else {
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
}
}
void LocationsBuilderARM64Sve::VisitVecNeg(HVecNeg* instruction) {
CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
}
void InstructionCodeGeneratorARM64Sve::VisitVecNeg(HVecNeg* instruction) {
DCHECK(instruction->IsPredicated());
LocationSummary* locations = instruction->GetLocations();
const ZRegister src = ZRegisterFrom(locations->InAt(0));
const ZRegister dst = ZRegisterFrom(locations->Out());
const PRegisterM p_reg = LoopPReg().Merging();
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kUint8:
case DataType::Type::kInt8:
__ Neg(dst.VnB(), p_reg, src.VnB());
break;
case DataType::Type::kUint16:
case DataType::Type::kInt16:
__ Neg(dst.VnH(), p_reg, src.VnH());
break;
case DataType::Type::kInt32:
__ Neg(dst.VnS(), p_reg, src.VnS());
break;
case DataType::Type::kInt64:
__ Neg(dst.VnD(), p_reg, src.VnD());
break;
case DataType::Type::kFloat32:
__ Fneg(dst.VnS(), p_reg, src.VnS());
break;
case DataType::Type::kFloat64:
__ Fneg(dst.VnD(), p_reg, src.VnD());
break;
default:
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
void LocationsBuilderARM64Sve::VisitVecAbs(HVecAbs* instruction) {
CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
}
void InstructionCodeGeneratorARM64Sve::VisitVecAbs(HVecAbs* instruction) {
DCHECK(instruction->IsPredicated());
LocationSummary* locations = instruction->GetLocations();
const ZRegister src = ZRegisterFrom(locations->InAt(0));
const ZRegister dst = ZRegisterFrom(locations->Out());
const PRegisterM p_reg = LoopPReg().Merging();
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kInt8:
__ Abs(dst.VnB(), p_reg, src.VnB());
break;
case DataType::Type::kInt16:
__ Abs(dst.VnH(), p_reg, src.VnH());
break;
case DataType::Type::kInt32:
__ Abs(dst.VnS(), p_reg, src.VnS());
break;
case DataType::Type::kInt64:
__ Abs(dst.VnD(), p_reg, src.VnD());
break;
case DataType::Type::kFloat32:
__ Fabs(dst.VnS(), p_reg, src.VnS());
break;
case DataType::Type::kFloat64:
__ Fabs(dst.VnD(), p_reg, src.VnD());
break;
default:
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
void LocationsBuilderARM64Sve::VisitVecNot(HVecNot* instruction) {
CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
}
void InstructionCodeGeneratorARM64Sve::VisitVecNot(HVecNot* instruction) {
DCHECK(instruction->IsPredicated());
LocationSummary* locations = instruction->GetLocations();
const ZRegister src = ZRegisterFrom(locations->InAt(0));
const ZRegister dst = ZRegisterFrom(locations->Out());
const PRegisterM p_reg = LoopPReg().Merging();
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kBool: // special case boolean-not
__ Dup(dst.VnB(), 1);
__ Eor(dst.VnB(), p_reg, dst.VnB(), src.VnB());
break;
case DataType::Type::kUint8:
case DataType::Type::kInt8:
__ Not(dst.VnB(), p_reg, src.VnB());
break;
case DataType::Type::kUint16:
case DataType::Type::kInt16:
__ Not(dst.VnH(), p_reg, src.VnH());
break;
case DataType::Type::kInt32:
__ Not(dst.VnS(), p_reg, src.VnS());
break;
case DataType::Type::kInt64:
__ Not(dst.VnD(), p_reg, src.VnD());
break;
default:
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
// Helper to set up locations for vector binary operations.
static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
LocationSummary* locations = new (allocator) LocationSummary(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
case DataType::Type::kUint16:
case DataType::Type::kInt16:
case DataType::Type::kInt32:
case DataType::Type::kInt64:
case DataType::Type::kFloat32:
case DataType::Type::kFloat64:
locations->SetInAt(0, Location::RequiresFpuRegister());
locations->SetInAt(1, Location::RequiresFpuRegister());
locations->SetOut(Location::SameAsFirstInput());
break;
default:
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
void LocationsBuilderARM64Sve::VisitVecAdd(HVecAdd* instruction) {
CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}
void InstructionCodeGeneratorARM64Sve::VisitVecAdd(HVecAdd* instruction) {
DCHECK(instruction->IsPredicated());
LocationSummary* locations = instruction->GetLocations();
const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
const ZRegister dst = ZRegisterFrom(locations->Out());
const PRegisterM p_reg = LoopPReg().Merging();
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kUint8:
case DataType::Type::kInt8:
__ Add(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
break;
case DataType::Type::kUint16:
case DataType::Type::kInt16:
__ Add(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
break;
case DataType::Type::kInt32:
__ Add(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
break;
case DataType::Type::kInt64:
__ Add(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
break;
case DataType::Type::kFloat32:
__ Fadd(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS(), StrictNaNPropagation);
break;
case DataType::Type::kFloat64:
__ Fadd(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD(), StrictNaNPropagation);
break;
default:
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
void LocationsBuilderARM64Sve::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
UNREACHABLE();
}
void InstructionCodeGeneratorARM64Sve::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
UNREACHABLE();
}
void LocationsBuilderARM64Sve::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
UNREACHABLE();
}
void InstructionCodeGeneratorARM64Sve::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
UNREACHABLE();
}
void LocationsBuilderARM64Sve::VisitVecSub(HVecSub* instruction) {
CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}
void InstructionCodeGeneratorARM64Sve::VisitVecSub(HVecSub* instruction) {
DCHECK(instruction->IsPredicated());
LocationSummary* locations = instruction->GetLocations();
const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
const ZRegister dst = ZRegisterFrom(locations->Out());
const PRegisterM p_reg = LoopPReg().Merging();
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kUint8:
case DataType::Type::kInt8:
__ Sub(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
break;
case DataType::Type::kUint16:
case DataType::Type::kInt16:
__ Sub(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
break;
case DataType::Type::kInt32:
__ Sub(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
break;
case DataType::Type::kInt64:
__ Sub(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
break;
case DataType::Type::kFloat32:
__ Fsub(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
break;
case DataType::Type::kFloat64:
__ Fsub(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
break;
default:
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
void LocationsBuilderARM64Sve::VisitVecSaturationSub(HVecSaturationSub* instruction) {
LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
UNREACHABLE();
}
void InstructionCodeGeneratorARM64Sve::VisitVecSaturationSub(HVecSaturationSub* instruction) {
LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
UNREACHABLE();
}
void LocationsBuilderARM64Sve::VisitVecMul(HVecMul* instruction) {
CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}
void InstructionCodeGeneratorARM64Sve::VisitVecMul(HVecMul* instruction) {
DCHECK(instruction->IsPredicated());
LocationSummary* locations = instruction->GetLocations();
const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
const ZRegister dst = ZRegisterFrom(locations->Out());
const PRegisterM p_reg = LoopPReg().Merging();
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kUint8:
case DataType::Type::kInt8:
__ Mul(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
break;
case DataType::Type::kUint16:
case DataType::Type::kInt16:
__ Mul(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
break;
case DataType::Type::kInt32:
__ Mul(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
break;
case DataType::Type::kInt64:
__ Mul(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
break;
case DataType::Type::kFloat32:
__ Fmul(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS(), StrictNaNPropagation);
break;
case DataType::Type::kFloat64:
__ Fmul(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD(), StrictNaNPropagation);
break;
default:
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
void LocationsBuilderARM64Sve::VisitVecDiv(HVecDiv* instruction) {
CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}
void InstructionCodeGeneratorARM64Sve::VisitVecDiv(HVecDiv* instruction) {
DCHECK(instruction->IsPredicated());
LocationSummary* locations = instruction->GetLocations();
const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
const ZRegister dst = ZRegisterFrom(locations->Out());
const PRegisterM p_reg = LoopPReg().Merging();
ValidateVectorLength(instruction);
// Note: VIXL guarantees StrictNaNPropagation for Fdiv.
switch (instruction->GetPackedType()) {
case DataType::Type::kFloat32:
__ Fdiv(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
break;
case DataType::Type::kFloat64:
__ Fdiv(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
break;
default:
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
void LocationsBuilderARM64Sve::VisitVecMin(HVecMin* instruction) {
LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
UNREACHABLE();
}
void InstructionCodeGeneratorARM64Sve::VisitVecMin(HVecMin* instruction) {
LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
UNREACHABLE();
}
void LocationsBuilderARM64Sve::VisitVecMax(HVecMax* instruction) {
LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
UNREACHABLE();
}
void InstructionCodeGeneratorARM64Sve::VisitVecMax(HVecMax* instruction) {
LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
UNREACHABLE();
}
void LocationsBuilderARM64Sve::VisitVecAnd(HVecAnd* instruction) {
// TODO: Allow constants supported by BIC (vector, immediate).
CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}
void InstructionCodeGeneratorARM64Sve::VisitVecAnd(HVecAnd* instruction) {
DCHECK(instruction->IsPredicated());
LocationSummary* locations = instruction->GetLocations();
const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
const ZRegister dst = ZRegisterFrom(locations->Out());
const PRegisterM p_reg = LoopPReg().Merging();
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
__ And(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
break;
case DataType::Type::kUint16:
case DataType::Type::kInt16:
__ And(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
break;
case DataType::Type::kInt32:
case DataType::Type::kFloat32:
__ And(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
break;
case DataType::Type::kInt64:
case DataType::Type::kFloat64:
__ And(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
break;
default:
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
void LocationsBuilderARM64Sve::VisitVecAndNot(HVecAndNot* instruction) {
LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
}
void InstructionCodeGeneratorARM64Sve::VisitVecAndNot(HVecAndNot* instruction) {
// TODO: Use BIC (vector, register).
LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
}
void LocationsBuilderARM64Sve::VisitVecOr(HVecOr* instruction) {
CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}
void InstructionCodeGeneratorARM64Sve::VisitVecOr(HVecOr* instruction) {
DCHECK(instruction->IsPredicated());
LocationSummary* locations = instruction->GetLocations();
const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
const ZRegister dst = ZRegisterFrom(locations->Out());
const PRegisterM p_reg = LoopPReg().Merging();
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
__ Orr(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
break;
case DataType::Type::kUint16:
case DataType::Type::kInt16:
__ Orr(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
break;
case DataType::Type::kInt32:
case DataType::Type::kFloat32:
__ Orr(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
break;
case DataType::Type::kInt64:
case DataType::Type::kFloat64:
__ Orr(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
break;
default:
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
void LocationsBuilderARM64Sve::VisitVecXor(HVecXor* instruction) {
CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}
void InstructionCodeGeneratorARM64Sve::VisitVecXor(HVecXor* instruction) {
DCHECK(instruction->IsPredicated());
LocationSummary* locations = instruction->GetLocations();
const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
const ZRegister dst = ZRegisterFrom(locations->Out());
const PRegisterM p_reg = LoopPReg().Merging();
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
__ Eor(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
break;
case DataType::Type::kUint16:
case DataType::Type::kInt16:
__ Eor(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
break;
case DataType::Type::kInt32:
case DataType::Type::kFloat32:
__ Eor(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
break;
case DataType::Type::kInt64:
case DataType::Type::kFloat64:
__ Eor(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
break;
default:
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
// Helper to set up locations for vector shift operations.
static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
LocationSummary* locations = new (allocator) LocationSummary(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kUint8:
case DataType::Type::kInt8:
case DataType::Type::kUint16:
case DataType::Type::kInt16:
case DataType::Type::kInt32:
case DataType::Type::kInt64:
locations->SetInAt(0, Location::RequiresFpuRegister());
locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
default:
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
void LocationsBuilderARM64Sve::VisitVecShl(HVecShl* instruction) {
CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
}
void InstructionCodeGeneratorARM64Sve::VisitVecShl(HVecShl* instruction) {
DCHECK(instruction->IsPredicated());
LocationSummary* locations = instruction->GetLocations();
const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
const ZRegister dst = ZRegisterFrom(locations->Out());
const PRegisterM p_reg = LoopPReg().Merging();
int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kUint8:
case DataType::Type::kInt8:
__ Lsl(dst.VnB(), p_reg, lhs.VnB(), value);
break;
case DataType::Type::kUint16:
case DataType::Type::kInt16:
__ Lsl(dst.VnH(), p_reg, lhs.VnH(), value);
break;
case DataType::Type::kInt32:
__ Lsl(dst.VnS(), p_reg, lhs.VnS(), value);
break;
case DataType::Type::kInt64:
__ Lsl(dst.VnD(), p_reg, lhs.VnD(), value);
break;
default:
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
void LocationsBuilderARM64Sve::VisitVecShr(HVecShr* instruction) {
CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
}
void InstructionCodeGeneratorARM64Sve::VisitVecShr(HVecShr* instruction) {
DCHECK(instruction->IsPredicated());
LocationSummary* locations = instruction->GetLocations();
const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
const ZRegister dst = ZRegisterFrom(locations->Out());
const PRegisterM p_reg = LoopPReg().Merging();
int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kUint8:
case DataType::Type::kInt8:
__ Asr(dst.VnB(), p_reg, lhs.VnB(), value);
break;
case DataType::Type::kUint16:
case DataType::Type::kInt16:
__ Asr(dst.VnH(), p_reg, lhs.VnH(), value);
break;
case DataType::Type::kInt32:
__ Asr(dst.VnS(), p_reg, lhs.VnS(), value);
break;
case DataType::Type::kInt64:
__ Asr(dst.VnD(), p_reg, lhs.VnD(), value);
break;
default:
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
void LocationsBuilderARM64Sve::VisitVecUShr(HVecUShr* instruction) {
CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
}
void InstructionCodeGeneratorARM64Sve::VisitVecUShr(HVecUShr* instruction) {
DCHECK(instruction->IsPredicated());
LocationSummary* locations = instruction->GetLocations();
const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
const ZRegister dst = ZRegisterFrom(locations->Out());
const PRegisterM p_reg = LoopPReg().Merging();
int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kUint8:
case DataType::Type::kInt8:
__ Lsr(dst.VnB(), p_reg, lhs.VnB(), value);
break;
case DataType::Type::kUint16:
case DataType::Type::kInt16:
__ Lsr(dst.VnH(), p_reg, lhs.VnH(), value);
break;
case DataType::Type::kInt32:
__ Lsr(dst.VnS(), p_reg, lhs.VnS(), value);
break;
case DataType::Type::kInt64:
__ Lsr(dst.VnD(), p_reg, lhs.VnD(), value);
break;
default:
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
void LocationsBuilderARM64Sve::VisitVecSetScalars(HVecSetScalars* instruction) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
DCHECK_EQ(2u, instruction->InputCount()); // only one input currently implemented + predicate.
HInstruction* input = instruction->InputAt(0);
bool is_zero = IsZeroBitPattern(input);
switch (instruction->GetPackedType()) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
case DataType::Type::kUint16:
case DataType::Type::kInt16:
case DataType::Type::kInt32:
case DataType::Type::kInt64:
locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
: Location::RequiresRegister());
locations->SetOut(Location::RequiresFpuRegister());
break;
case DataType::Type::kFloat32:
case DataType::Type::kFloat64:
locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
: Location::RequiresFpuRegister());
locations->SetOut(Location::RequiresFpuRegister());
break;
default:
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
void InstructionCodeGeneratorARM64Sve::VisitVecSetScalars(HVecSetScalars* instruction) {
DCHECK(instruction->IsPredicated());
LocationSummary* locations = instruction->GetLocations();
const ZRegister z_dst = ZRegisterFrom(locations->Out());
DCHECK_EQ(2u, instruction->InputCount()); // only one input currently implemented + predicate.
// Zero out all other elements first.
__ Dup(z_dst.VnB(), 0);
const VRegister dst = VRegisterFrom(locations->Out());
// Shorthand for any type of zero.
if (IsZeroBitPattern(instruction->InputAt(0))) {
return;
}
ValidateVectorLength(instruction);
// Set required elements.
switch (instruction->GetPackedType()) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
__ Mov(dst.V16B(), 0, InputRegisterAt(instruction, 0));
break;
case DataType::Type::kUint16:
case DataType::Type::kInt16:
__ Mov(dst.V8H(), 0, InputRegisterAt(instruction, 0));
break;
case DataType::Type::kInt32:
__ Mov(dst.V4S(), 0, InputRegisterAt(instruction, 0));
break;
case DataType::Type::kInt64:
__ Mov(dst.V2D(), 0, InputRegisterAt(instruction, 0));
break;
default:
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
// Helper to set up locations for vector accumulations.
static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) {
LocationSummary* locations = new (allocator) LocationSummary(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kUint8:
case DataType::Type::kInt8:
case DataType::Type::kUint16:
case DataType::Type::kInt16:
case DataType::Type::kInt32:
case DataType::Type::kInt64:
locations->SetInAt(0, Location::RequiresFpuRegister());
locations->SetInAt(1, Location::RequiresFpuRegister());
locations->SetInAt(2, Location::RequiresFpuRegister());
locations->SetOut(Location::SameAsFirstInput());
break;
default:
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
void LocationsBuilderARM64Sve::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
}
// Some early revisions of the Cortex-A53 have an erratum (835769) whereby it is possible for a
// 64-bit scalar multiply-accumulate instruction in AArch64 state to generate an incorrect result.
// However vector MultiplyAccumulate instruction is not affected.
void InstructionCodeGeneratorARM64Sve::VisitVecMultiplyAccumulate(
HVecMultiplyAccumulate* instruction) {
DCHECK(instruction->IsPredicated());
LocationSummary* locations = instruction->GetLocations();
const ZRegister acc = ZRegisterFrom(locations->InAt(0));
const ZRegister left = ZRegisterFrom(locations->InAt(1));
const ZRegister right = ZRegisterFrom(locations->InAt(2));
const PRegisterM p_reg = LoopPReg().Merging();
DCHECK(locations->InAt(0).Equals(locations->Out()));
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kUint8:
case DataType::Type::kInt8:
if (instruction->GetOpKind() == HInstruction::kAdd) {
__ Mla(acc.VnB(), p_reg, acc.VnB(), left.VnB(), right.VnB());
} else {
__ Mls(acc.VnB(), p_reg, acc.VnB(), left.VnB(), right.VnB());
}
break;
case DataType::Type::kUint16:
case DataType::Type::kInt16:
if (instruction->GetOpKind() == HInstruction::kAdd) {
__ Mla(acc.VnH(), p_reg, acc.VnB(), left.VnH(), right.VnH());
} else {
__ Mls(acc.VnH(), p_reg, acc.VnB(), left.VnH(), right.VnH());
}
break;
case DataType::Type::kInt32:
if (instruction->GetOpKind() == HInstruction::kAdd) {
__ Mla(acc.VnS(), p_reg, acc.VnB(), left.VnS(), right.VnS());
} else {
__ Mls(acc.VnS(), p_reg, acc.VnB(), left.VnS(), right.VnS());
}
break;
default:
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
void LocationsBuilderARM64Sve::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
UNREACHABLE();
}
void InstructionCodeGeneratorARM64Sve::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
UNREACHABLE();
}
void LocationsBuilderARM64Sve::VisitVecDotProd(HVecDotProd* instruction) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
DCHECK(instruction->GetPackedType() == DataType::Type::kInt32);
locations->SetInAt(0, Location::RequiresFpuRegister());
locations->SetInAt(1, Location::RequiresFpuRegister());
locations->SetInAt(2, Location::RequiresFpuRegister());
locations->SetOut(Location::SameAsFirstInput());
locations->AddTemp(Location::RequiresFpuRegister());
}
void InstructionCodeGeneratorARM64Sve::VisitVecDotProd(HVecDotProd* instruction) {
DCHECK(instruction->IsPredicated());
LocationSummary* locations = instruction->GetLocations();
DCHECK(locations->InAt(0).Equals(locations->Out()));
const ZRegister acc = ZRegisterFrom(locations->InAt(0));
const ZRegister left = ZRegisterFrom(locations->InAt(1));
const ZRegister right = ZRegisterFrom(locations->InAt(2));
const PRegisterM p_reg = LoopPReg().Merging();
HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
HVecOperation::ToSignedType(b->GetPackedType()));
DCHECK_EQ(instruction->GetPackedType(), DataType::Type::kInt32);
ValidateVectorLength(instruction);
size_t inputs_data_size = DataType::Size(a->GetPackedType());
switch (inputs_data_size) {
case 1u: {
UseScratchRegisterScope temps(GetVIXLAssembler());
const ZRegister tmp0 = temps.AcquireZ();
const ZRegister tmp1 = ZRegisterFrom(locations->GetTemp(0));
__ Dup(tmp1.VnB(), 0u);
__ Sel(tmp0.VnB(), p_reg, left.VnB(), tmp1.VnB());
__ Sel(tmp1.VnB(), p_reg, right.VnB(), tmp1.VnB());
if (instruction->IsZeroExtending()) {
__ Udot(acc.VnS(), acc.VnS(), tmp0.VnB(), tmp1.VnB());
} else {
__ Sdot(acc.VnS(), acc.VnS(), tmp0.VnB(), tmp1.VnB());
}
break;
}
default:
LOG(FATAL) << "Unsupported SIMD type size: " << inputs_data_size;
}
}
// Helper to set up locations for vector memory operations.
static void CreateVecMemLocations(ArenaAllocator* allocator,
HVecMemoryOperation* instruction,
bool is_load) {
LocationSummary* locations = new (allocator) LocationSummary(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
case DataType::Type::kUint16:
case DataType::Type::kInt16:
case DataType::Type::kInt32:
case DataType::Type::kInt64:
case DataType::Type::kFloat32:
case DataType::Type::kFloat64:
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
if (is_load) {
locations->SetOut(Location::RequiresFpuRegister());
} else {
locations->SetInAt(2, Location::RequiresFpuRegister());
}
break;
default:
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
void LocationsBuilderARM64Sve::VisitVecLoad(HVecLoad* instruction) {
CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true);
}
void InstructionCodeGeneratorARM64Sve::VisitVecLoad(HVecLoad* instruction) {
DCHECK(instruction->IsPredicated());
LocationSummary* locations = instruction->GetLocations();
size_t size = DataType::Size(instruction->GetPackedType());
const ZRegister reg = ZRegisterFrom(locations->Out());
UseScratchRegisterScope temps(GetVIXLAssembler());
Register scratch;
const PRegisterZ p_reg = LoopPReg().Zeroing();
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kInt16: // (short) s.charAt(.) can yield HVecLoad/Int16/StringCharAt.
case DataType::Type::kUint16:
__ Ld1h(reg.VnH(), p_reg,
VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
break;
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
__ Ld1b(reg.VnB(), p_reg,
VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
break;
case DataType::Type::kInt32:
case DataType::Type::kFloat32:
__ Ld1w(reg.VnS(), p_reg,
VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
break;
case DataType::Type::kInt64:
case DataType::Type::kFloat64:
__ Ld1d(reg.VnD(), p_reg,
VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
break;
default:
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
void LocationsBuilderARM64Sve::VisitVecStore(HVecStore* instruction) {
CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false);
}
void InstructionCodeGeneratorARM64Sve::VisitVecStore(HVecStore* instruction) {
DCHECK(instruction->IsPredicated());
LocationSummary* locations = instruction->GetLocations();
size_t size = DataType::Size(instruction->GetPackedType());
const ZRegister reg = ZRegisterFrom(locations->InAt(2));
UseScratchRegisterScope temps(GetVIXLAssembler());
Register scratch;
const PRegisterZ p_reg = LoopPReg().Zeroing();
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
__ St1b(reg.VnB(), p_reg,
VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
break;
case DataType::Type::kUint16:
case DataType::Type::kInt16:
__ St1h(reg.VnH(), p_reg,
VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
break;
case DataType::Type::kInt32:
case DataType::Type::kFloat32:
__ St1w(reg.VnS(), p_reg,
VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
break;
case DataType::Type::kInt64:
case DataType::Type::kFloat64:
__ St1d(reg.VnD(), p_reg,
VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
break;
default:
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
void LocationsBuilderARM64Sve::VisitVecPredSetAll(HVecPredSetAll* instruction) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
DCHECK(instruction->InputAt(0)->IsIntConstant());
locations->SetInAt(0, Location::NoLocation());
locations->SetOut(Location::NoLocation());
}
void InstructionCodeGeneratorARM64Sve::VisitVecPredSetAll(HVecPredSetAll* instruction) {
// Instruction is not predicated, see nodes_vector.h
DCHECK(!instruction->IsPredicated());
const PRegister p_reg = LoopPReg();
switch (instruction->GetPackedType()) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
__ Ptrue(p_reg.VnB(), vixl::aarch64::SVE_ALL);
break;
case DataType::Type::kUint16:
case DataType::Type::kInt16:
__ Ptrue(p_reg.VnH(), vixl::aarch64::SVE_ALL);
break;
case DataType::Type::kInt32:
case DataType::Type::kFloat32:
__ Ptrue(p_reg.VnS(), vixl::aarch64::SVE_ALL);
break;
case DataType::Type::kInt64:
case DataType::Type::kFloat64:
__ Ptrue(p_reg.VnD(), vixl::aarch64::SVE_ALL);
break;
default:
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
void LocationsBuilderARM64Sve::VisitVecPredWhile(HVecPredWhile* instruction) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
// The instruction doesn't really need a core register as out location; this is a hack
// to workaround absence of support for vector predicates in register allocation.
//
// Semantically, the out location of this instruction and predicate inputs locations of
// its users should be a fixed predicate register (similar to
// Location::RegisterLocation(int reg)). But the register allocator (RA) doesn't support
// SIMD regs (e.g. predicate), so LoopPReg() is used explicitly without exposing it
// to the RA.
//
// To make the RA happy Location::NoLocation() was used for all the vector instructions
// predicate inputs; but for the PredSetOperations (e.g. VecPredWhile) Location::NoLocation()
// can't be used without changes to RA - "ssa_liveness_analysis.cc] Check failed:
// input->IsEmittedAtUseSite()" would fire.
//
// Using a core register as a hack is the easiest way to tackle this problem. The RA will
// block one core register for the loop without actually using it; this should not be
// a performance issue as a SIMD loop operates mainly on SIMD registers.
//
// TODO: Support SIMD types in register allocator.
locations->SetOut(Location::RequiresRegister());
}
void InstructionCodeGeneratorARM64Sve::VisitVecPredWhile(HVecPredWhile* instruction) {
// Instruction is not predicated, see nodes_vector.h
DCHECK(!instruction->IsPredicated());
// Current implementation of predicated loop execution only supports kLO condition.
DCHECK(instruction->GetCondKind() == HVecPredWhile::CondKind::kLO);
Register left = InputRegisterAt(instruction, 0);
Register right = InputRegisterAt(instruction, 1);
DCHECK_EQ(codegen_->GetSIMDRegisterWidth() % instruction->GetVectorLength(), 0u);
switch (codegen_->GetSIMDRegisterWidth() / instruction->GetVectorLength()) {
case 1u:
__ Whilelo(LoopPReg().VnB(), left, right);
break;
case 2u:
__ Whilelo(LoopPReg().VnH(), left, right);
break;
case 4u:
__ Whilelo(LoopPReg().VnS(), left, right);
break;
case 8u:
__ Whilelo(LoopPReg().VnD(), left, right);
break;
default:
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
void LocationsBuilderARM64Sve::VisitVecPredCondition(HVecPredCondition* instruction) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
locations->SetInAt(0, Location::NoLocation());
// Result of the operation - a boolean value in a core register.
locations->SetOut(Location::RequiresRegister());
}
void InstructionCodeGeneratorARM64Sve::VisitVecPredCondition(HVecPredCondition* instruction) {
// Instruction is not predicated, see nodes_vector.h
DCHECK(!instruction->IsPredicated());
Register reg = OutputRegister(instruction);
// Currently VecPredCondition is only used as part of vectorized loop check condition
// evaluation.
DCHECK(instruction->GetPCondKind() == HVecPredCondition::PCondKind::kNFirst);
__ Cset(reg, pl);
}
Location InstructionCodeGeneratorARM64Sve::AllocateSIMDScratchLocation(
vixl::aarch64::UseScratchRegisterScope* scope) {
return LocationFrom(scope->AcquireZ());
}
void InstructionCodeGeneratorARM64Sve::FreeSIMDScratchLocation(Location loc,
vixl::aarch64::UseScratchRegisterScope* scope) {
scope->Release(ZRegisterFrom(loc));
}
void InstructionCodeGeneratorARM64Sve::LoadSIMDRegFromStack(Location destination,
Location source) {
__ Ldr(ZRegisterFrom(destination), SveStackOperandFrom(source));
}
void InstructionCodeGeneratorARM64Sve::MoveSIMDRegToSIMDReg(Location destination,
Location source) {
__ Mov(ZRegisterFrom(destination), ZRegisterFrom(source));
}
void InstructionCodeGeneratorARM64Sve::MoveToSIMDStackSlot(Location destination,
Location source) {
DCHECK(destination.IsSIMDStackSlot());
if (source.IsFpuRegister()) {
__ Str(ZRegisterFrom(source), SveStackOperandFrom(destination));
} else {
DCHECK(source.IsSIMDStackSlot());
UseScratchRegisterScope temps(GetVIXLAssembler());
if (GetVIXLAssembler()->GetScratchVRegisterList()->IsEmpty()) {
// Very rare situation, only when there are cycles in ParallelMoveResolver graph.
const Register temp = temps.AcquireX();
DCHECK_EQ(codegen_->GetSIMDRegisterWidth() % kArm64WordSize, 0u);
// Emit a number of LDR/STR (XRegister, 64-bit) to cover the whole SIMD register size
// when copying a stack slot.
for (size_t offset = 0, e = codegen_->GetSIMDRegisterWidth();
offset < e;
offset += kArm64WordSize) {
__ Ldr(temp, MemOperand(sp, source.GetStackIndex() + offset));
__ Str(temp, MemOperand(sp, destination.GetStackIndex() + offset));
}
} else {
const ZRegister temp = temps.AcquireZ();
__ Ldr(temp, SveStackOperandFrom(source));
__ Str(temp, SveStackOperandFrom(destination));
}
}
}
template <bool is_save>
void SaveRestoreLiveRegistersHelperSveImpl(CodeGeneratorARM64* codegen,
LocationSummary* locations,
int64_t spill_offset) {
const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
DCHECK(helpers::ArtVixlRegCodeCoherentForRegSet(core_spills,
codegen->GetNumberOfCoreRegisters(),
fp_spills,
codegen->GetNumberOfFloatingPointRegisters()));
MacroAssembler* masm = codegen->GetVIXLAssembler();
Register base = masm->StackPointer();
CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize, core_spills);
int64_t core_spill_size = core_list.GetTotalSizeInBytes();
int64_t fp_spill_offset = spill_offset + core_spill_size;
if (codegen->GetGraph()->HasSIMD()) {
if (is_save) {
masm->StoreCPURegList(core_list, MemOperand(base, spill_offset));
} else {
masm->LoadCPURegList(core_list, MemOperand(base, spill_offset));
}
codegen->GetAssembler()->SaveRestoreZRegisterList<is_save>(fp_spills, fp_spill_offset);
return;
}
// Case when we only need to restore D-registers.
DCHECK(!codegen->GetGraph()->HasSIMD());
DCHECK_LE(codegen->GetSlowPathFPWidth(), kDRegSizeInBytes);
CPURegList fp_list = CPURegList(CPURegister::kVRegister, kDRegSize, fp_spills);
if (is_save) {
masm->StoreCPURegList(core_list, MemOperand(base, spill_offset));
masm->StoreCPURegList(fp_list, MemOperand(base, fp_spill_offset));
} else {
masm->LoadCPURegList(core_list, MemOperand(base, spill_offset));
masm->LoadCPURegList(fp_list, MemOperand(base, fp_spill_offset));
}
}
void InstructionCodeGeneratorARM64Sve::SaveLiveRegistersHelper(LocationSummary* locations,
int64_t spill_offset) {
SaveRestoreLiveRegistersHelperSveImpl</* is_save= */ true>(codegen_, locations, spill_offset);
}
void InstructionCodeGeneratorARM64Sve::RestoreLiveRegistersHelper(LocationSummary* locations,
int64_t spill_offset) {
SaveRestoreLiveRegistersHelperSveImpl</* is_save= */ false>(codegen_, locations, spill_offset);
}
#undef __
} // namespace arm64
} // namespace art