JIT: Performance fix for const doubles
Some recent Arm processors take a performance hit when
creating a floating point double by loading it as a pair of singles.
Legacy code to support soft floating point doubles as a pair of core
registers loaded double immediates in this way.
With the CL, we handle double immediates as a single unit.
(cherry-pick of c8129911e598ad0ca8d7b31012444ab6ce8bce45.)
Change-Id: Ic1512e34bfd233a6f5ffd58ce843965adbbad875
diff --git a/vm/compiler/codegen/arm/Assemble.cpp b/vm/compiler/codegen/arm/Assemble.cpp
index d1ecd97..7406d3e 100644
--- a/vm/compiler/codegen/arm/Assemble.cpp
+++ b/vm/compiler/codegen/arm/Assemble.cpp
@@ -952,6 +952,7 @@
if (lir->opcode == kThumbLdrPcRel ||
lir->opcode == kThumb2LdrPcRel12 ||
lir->opcode == kThumbAddPcRel ||
+ ((lir->opcode == kThumb2Vldrd) && (lir->operands[1] == r15pc)) ||
((lir->opcode == kThumb2Vldrs) && (lir->operands[1] == r15pc))) {
ArmLIR *lirTarget = (ArmLIR *) lir->generic.target;
intptr_t pc = (lir->generic.offset + 4) & ~3;
@@ -976,7 +977,7 @@
}
return kRetryHalve;
}
- if (lir->opcode == kThumb2Vldrs) {
+ if ((lir->opcode == kThumb2Vldrs) || (lir->opcode == kThumb2Vldrd)) {
lir->operands[2] = delta >> 2;
} else {
lir->operands[1] = (lir->opcode == kThumb2LdrPcRel12) ?
diff --git a/vm/compiler/codegen/arm/CodegenCommon.cpp b/vm/compiler/codegen/arm/CodegenCommon.cpp
index 07f3ac7..5c02678 100644
--- a/vm/compiler/codegen/arm/CodegenCommon.cpp
+++ b/vm/compiler/codegen/arm/CodegenCommon.cpp
@@ -368,6 +368,25 @@
return NULL;
}
+/* Search the existing constants in the literal pool for an exact wide match */
+ArmLIR* scanLiteralPoolWide(LIR* dataTarget, int valLo, int valHi)
+{
+ bool lowMatch = false;
+ ArmLIR* lowTarget = NULL;
+ while (dataTarget) {
+ if (lowMatch && (((ArmLIR *)dataTarget)->operands[0] == valHi)) {
+ return lowTarget;
+ }
+ lowMatch = false;
+ if (((ArmLIR *) dataTarget)->operands[0] == valLo) {
+ lowMatch = true;
+ lowTarget = (ArmLIR *) dataTarget;
+ }
+ dataTarget = dataTarget->next;
+ }
+ return NULL;
+}
+
/*
* The following are building blocks to insert constants into the pool or
* instruction streams.
@@ -392,6 +411,14 @@
return NULL;
}
+/* Add a 64-bit constant to the literal pool or mixed with code */
+ArmLIR* addWideData(CompilationUnit* cUnit, LIR** constantListP,
+ int valLo, int valHi)
+{
+ addWordData(cUnit, constantListP, valHi);
+ return addWordData(cUnit, constantListP, valLo);
+}
+
static RegLocation inlinedTargetWide(CompilationUnit *cUnit, MIR *mir,
bool fpHint)
{
diff --git a/vm/compiler/codegen/arm/Thumb2/Factory.cpp b/vm/compiler/codegen/arm/Thumb2/Factory.cpp
index 9c9ce13..c3c3712 100644
--- a/vm/compiler/codegen/arm/Thumb2/Factory.cpp
+++ b/vm/compiler/codegen/arm/Thumb2/Factory.cpp
@@ -53,7 +53,14 @@
{
int encodedImm = encodeImmSingle(value);
assert(SINGLEREG(rDest));
- if (encodedImm >= 0) {
+ if (value == 0) {
+ // TODO: we need better info about the target CPU. a vector exclusive or
+ // would probably be better here if we could rely on its existance.
+ // Load an immediate +2.0 (which encodes to 0)
+ newLIR2(cUnit, kThumb2Vmovs_IMM8, rDest, 0);
+ // +0.0 = +2.0 - +2.0
+ return newLIR3(cUnit, kThumb2Vsubs, rDest, rDest, rDest);
+ } else if (encodedImm >= 0) {
return newLIR2(cUnit, kThumb2Vmovs_IMM8, rDest, encodedImm);
}
ArmLIR *dataTarget = scanLiteralPool(cUnit->literalList, value, 0);
@@ -696,9 +703,34 @@
{
int encodedImm = encodeImmDouble(valLo, valHi);
ArmLIR *res;
- if (FPREG(rDestLo) && (encodedImm >= 0)) {
- res = newLIR2(cUnit, kThumb2Vmovd_IMM8, S2D(rDestLo, rDestHi),
- encodedImm);
+ int targetReg = S2D(rDestLo, rDestHi);
+ if (FPREG(rDestLo)) {
+ if ((valLo == 0) && (valHi == 0)) {
+ // TODO: we need better info about the target CPU. a vector
+ // exclusive or would probably be better here if we could rely on
+ // its existance.
+ // Load an immediate +2.0 (which encodes to 0)
+ newLIR2(cUnit, kThumb2Vmovd_IMM8, targetReg, 0);
+ // +0.0 = +2.0 - +2.0
+ res = newLIR3(cUnit, kThumb2Vsubd, targetReg, targetReg, targetReg);
+ } else if (encodedImm >= 0) {
+ res = newLIR2(cUnit, kThumb2Vmovd_IMM8, targetReg, encodedImm);
+ } else {
+ ArmLIR* dataTarget = scanLiteralPoolWide(cUnit->literalList, valLo, valHi);
+ if (dataTarget == NULL) {
+ dataTarget = addWideData(cUnit, &cUnit->literalList, valLo, valHi);
+ }
+ ArmLIR *loadPcRel = (ArmLIR *) dvmCompilerNew(sizeof(ArmLIR), true);
+ loadPcRel->opcode = kThumb2Vldrd;
+ loadPcRel->generic.target = (LIR *) dataTarget;
+ loadPcRel->operands[0] = targetReg;
+ loadPcRel->operands[1] = r15pc;
+ setupResourceMasks(loadPcRel);
+ setMemRefType(loadPcRel, true, kLiteral);
+ loadPcRel->aliasInfo = dataTarget->operands[0];
+ dvmCompilerAppendLIR(cUnit, (LIR *) loadPcRel);
+ res = loadPcRel;
+ }
} else {
res = loadConstantNoClobber(cUnit, rDestLo, valLo);
loadConstantNoClobber(cUnit, rDestHi, valHi);