MIPS32: Saves 128-bit vector registers along SuspendCheckSlowPath
We need to save 128 bits of data. This is only done for vector
registers that are live, so overhead is not too big.
Test: mma test-art-host-gtest
Test: ./testrunner.py --optimizing --target in QEMU (MIPS)
Change-Id: I0f792e9c98011be3e24d5fad35a8244faafcb9a0
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 4c4d97b..abe1d70 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -434,10 +434,13 @@
: SlowPathCodeMIPS(instruction), successor_(successor) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
__ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations); // Only saves live vector registers for SIMD.
mips_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickTestSuspend, void, void>();
+ RestoreLiveRegisters(codegen, locations); // Only restores live vector registers for SIMD.
if (successor_ == nullptr) {
__ B(GetReturnLabel());
} else {
@@ -1448,6 +1451,11 @@
__ Bind(GetLabelOf(block));
}
+VectorRegister VectorRegisterFrom(Location location) {
+ DCHECK(location.IsFpuRegister());
+ return static_cast<VectorRegister>(location.AsFpuRegister<FRegister>());
+}
+
void CodeGeneratorMIPS::MoveLocation(Location destination,
Location source,
Primitive::Type dst_type) {
@@ -1495,12 +1503,19 @@
__ Mtc1(src_low, dst);
__ MoveToFpuHigh(src_high, dst);
} else if (source.IsFpuRegister()) {
- if (Primitive::Is64BitType(dst_type)) {
- __ MovD(destination.AsFpuRegister<FRegister>(), source.AsFpuRegister<FRegister>());
+ if (GetGraph()->HasSIMD()) {
+ __ MoveV(VectorRegisterFrom(destination),
+ VectorRegisterFrom(source));
} else {
- DCHECK_EQ(dst_type, Primitive::kPrimFloat);
- __ MovS(destination.AsFpuRegister<FRegister>(), source.AsFpuRegister<FRegister>());
+ if (Primitive::Is64BitType(dst_type)) {
+ __ MovD(destination.AsFpuRegister<FRegister>(), source.AsFpuRegister<FRegister>());
+ } else {
+ DCHECK_EQ(dst_type, Primitive::kPrimFloat);
+ __ MovS(destination.AsFpuRegister<FRegister>(), source.AsFpuRegister<FRegister>());
+ }
}
+ } else if (source.IsSIMDStackSlot()) {
+ __ LoadQFromOffset(destination.AsFpuRegister<FRegister>(), SP, source.GetStackIndex());
} else if (source.IsDoubleStackSlot()) {
DCHECK(Primitive::Is64BitType(dst_type));
__ LoadDFromOffset(destination.AsFpuRegister<FRegister>(), SP, source.GetStackIndex());
@@ -1509,6 +1524,14 @@
DCHECK(source.IsStackSlot()) << "Cannot move from " << source << " to " << destination;
__ LoadSFromOffset(destination.AsFpuRegister<FRegister>(), SP, source.GetStackIndex());
}
+ } else if (destination.IsSIMDStackSlot()) {
+ if (source.IsFpuRegister()) {
+ __ StoreQToOffset(source.AsFpuRegister<FRegister>(), SP, destination.GetStackIndex());
+ } else {
+ DCHECK(source.IsSIMDStackSlot());
+ __ LoadQFromOffset(FTMP, SP, source.GetStackIndex());
+ __ StoreQToOffset(FTMP, SP, destination.GetStackIndex());
+ }
} else if (destination.IsDoubleStackSlot()) {
int32_t dst_offset = destination.GetStackIndex();
if (source.IsRegisterPair()) {
@@ -1875,13 +1898,21 @@
}
size_t CodeGeneratorMIPS::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
- __ StoreDToOffset(FRegister(reg_id), SP, stack_index);
- return kMipsDoublewordSize;
+ if (GetGraph()->HasSIMD()) {
+ __ StoreQToOffset(FRegister(reg_id), SP, stack_index);
+ } else {
+ __ StoreDToOffset(FRegister(reg_id), SP, stack_index);
+ }
+ return GetFloatingPointSpillSlotSize();
}
size_t CodeGeneratorMIPS::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
- __ LoadDFromOffset(FRegister(reg_id), SP, stack_index);
- return kMipsDoublewordSize;
+ if (GetGraph()->HasSIMD()) {
+ __ LoadQFromOffset(FRegister(reg_id), SP, stack_index);
+ } else {
+ __ LoadDFromOffset(FRegister(reg_id), SP, stack_index);
+ }
+ return GetFloatingPointSpillSlotSize();
}
void CodeGeneratorMIPS::DumpCoreRegister(std::ostream& stream, int reg) const {
@@ -8216,7 +8247,11 @@
void LocationsBuilderMIPS::VisitSuspendCheck(HSuspendCheck* instruction) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
- locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ // In suspend check slow path, usually there are no caller-save registers at all.
+ // If SIMD instructions are present, however, we force spilling all live SIMD
+ // registers in full width (since the runtime only saves/restores lower part).
+ locations->SetCustomSlowPathCallerSaves(
+ GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
}
void InstructionCodeGeneratorMIPS::VisitSuspendCheck(HSuspendCheck* instruction) {
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index c259ea3..1afa1b9 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -61,6 +61,8 @@
class CodeGeneratorMIPS;
+VectorRegister VectorRegisterFrom(Location location);
+
class InvokeDexCallingConvention : public CallingConvention<Register, FRegister> {
public:
InvokeDexCallingConvention()
@@ -372,7 +374,11 @@
size_t GetWordSize() const OVERRIDE { return kMipsWordSize; }
- size_t GetFloatingPointSpillSlotSize() const OVERRIDE { return kMipsDoublewordSize; }
+ size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
+ return GetGraph()->HasSIMD()
+ ? 2 * kMipsDoublewordSize // 16 bytes for each spill.
+ : 1 * kMipsDoublewordSize; // 8 bytes for each spill.
+ }
uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE {
return assembler_.GetLabelLocation(GetLabelOf(block));
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 5fb8755..232241c 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -1289,6 +1289,11 @@
SP,
source.GetStackIndex());
}
+ } else if (source.IsSIMDStackSlot()) {
+ __ LoadFpuFromOffset(kLoadQuadword,
+ destination.AsFpuRegister<FpuRegister>(),
+ SP,
+ source.GetStackIndex());
} else if (source.IsConstant()) {
// Move to GPR/FPR from constant
GpuRegister gpr = AT;
@@ -1329,12 +1334,17 @@
}
} else if (source.IsFpuRegister()) {
if (destination.IsFpuRegister()) {
- // Move to FPR from FPR
- if (dst_type == Primitive::kPrimFloat) {
- __ MovS(destination.AsFpuRegister<FpuRegister>(), source.AsFpuRegister<FpuRegister>());
+ if (GetGraph()->HasSIMD()) {
+ __ MoveV(VectorRegisterFrom(destination),
+ VectorRegisterFrom(source));
} else {
- DCHECK_EQ(dst_type, Primitive::kPrimDouble);
- __ MovD(destination.AsFpuRegister<FpuRegister>(), source.AsFpuRegister<FpuRegister>());
+ // Move to FPR from FPR
+ if (dst_type == Primitive::kPrimFloat) {
+ __ MovS(destination.AsFpuRegister<FpuRegister>(), source.AsFpuRegister<FpuRegister>());
+ } else {
+ DCHECK_EQ(dst_type, Primitive::kPrimDouble);
+ __ MovD(destination.AsFpuRegister<FpuRegister>(), source.AsFpuRegister<FpuRegister>());
+ }
}
} else {
DCHECK(destination.IsRegister());
@@ -1345,6 +1355,23 @@
}
}
}
+ } else if (destination.IsSIMDStackSlot()) {
+ if (source.IsFpuRegister()) {
+ __ StoreFpuToOffset(kStoreQuadword,
+ source.AsFpuRegister<FpuRegister>(),
+ SP,
+ destination.GetStackIndex());
+ } else {
+ DCHECK(source.IsSIMDStackSlot());
+ __ LoadFpuFromOffset(kLoadQuadword,
+ FTMP,
+ SP,
+ source.GetStackIndex());
+ __ StoreFpuToOffset(kStoreQuadword,
+ FTMP,
+ SP,
+ destination.GetStackIndex());
+ }
} else { // The destination is not a register. It must be a stack slot.
DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot());
if (source.IsRegister() || source.IsFpuRegister()) {
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index b620973..c94cc93 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -59,6 +59,8 @@
class CodeGeneratorMIPS64;
+VectorRegister VectorRegisterFrom(Location location);
+
class InvokeDexCallingConvention : public CallingConvention<GpuRegister, FpuRegister> {
public:
InvokeDexCallingConvention()