Optimize more easy multiplications by constants.
Rather than make these changes in the libraries (*10 being a common case),
let's do them once and for all in the JIT.
The 2^n-1 case could be better if we generated RSB instructions, but the
current "fake" RSB is still better than a full multiply.
Thumb doesn't support reg/reg/reg/shift instructions, so we can't optimize
the "population count <= 2" cases (such as *10) there.
Tested on sholes, passion, and passion-running-sapphire (and visually
inspected to check we weren't trying to generate Thumb2 instructions there).
Also tested with the self-verifier.
diff --git a/vm/Android.mk b/vm/Android.mk
index 8e41da7..04e0910 100644
--- a/vm/Android.mk
+++ b/vm/Android.mk
@@ -56,7 +56,7 @@
# Enable assert and JIT tuning
include $(LOCAL_PATH)/ReconfigureDvm.mk
- # Enable asserion and JIT-tuning
+ # Enable assertions and JIT-tuning
LOCAL_CFLAGS += -UNDEBUG -DDEBUG=1 -DLOG_NDEBUG=1 -DWITH_DALVIK_ASSERT \
-DWITH_JIT_TUNING -DEXIT_STATS
LOCAL_MODULE := libdvm_assert
@@ -66,7 +66,7 @@
# Enable assert and self-verification
include $(LOCAL_PATH)/ReconfigureDvm.mk
- # Enable asserion and JIT self-verification
+ # Enable assertions and JIT self-verification
LOCAL_CFLAGS += -UNDEBUG -DDEBUG=1 -DLOG_NDEBUG=1 -DWITH_DALVIK_ASSERT \
-DWITH_SELF_VERIFICATION
LOCAL_MODULE := libdvm_sv
diff --git a/vm/compiler/codegen/arm/ArmLIR.h b/vm/compiler/codegen/arm/ArmLIR.h
index b329aed..7d26221 100644
--- a/vm/compiler/codegen/arm/ArmLIR.h
+++ b/vm/compiler/codegen/arm/ArmLIR.h
@@ -706,7 +706,7 @@
/*
* Each instance of this struct holds a pseudo or real LIR instruction:
- * - pesudo ones (eg labels and marks) and will be discarded by the assembler.
+ * - pseudo ones (eg labels and marks) and will be discarded by the assembler.
* - real ones will be assembled into Thumb instructions.
*
* Machine resources are encoded into a 64-bit vector, where the encodings are
diff --git a/vm/compiler/codegen/arm/Assemble.c b/vm/compiler/codegen/arm/Assemble.c
index 09783f2..7f8ad9e 100644
--- a/vm/compiler/codegen/arm/Assemble.c
+++ b/vm/compiler/codegen/arm/Assemble.c
@@ -36,7 +36,7 @@
* s2e: src2 end bit position
* operands: number of operands (for sanity check purposes)
* name: mnemonic name
- * fmt: for pretty-prining
+ * fmt: for pretty-printing
*/
#define ENCODING_MAP(opcode, skeleton, k0, ds, de, k1, s1s, s1e, k2, s2s, s2e, \
k3, k3s, k3e, flags, name, fmt, size) \
diff --git a/vm/compiler/codegen/arm/CodegenDriver.c b/vm/compiler/codegen/arm/CodegenDriver.c
index 4048c0a..f117de8 100644
--- a/vm/compiler/codegen/arm/CodegenDriver.c
+++ b/vm/compiler/codegen/arm/CodegenDriver.c
@@ -1844,20 +1844,75 @@
return false;
}
-/* Positive power of 2? Return shiftCount if so, -1 if not */
-static int mulToShift(int val) {
- int shiftCount;
- if (val > 0 && ((val & (val - 1)) == 0)) {
- shiftCount = 0;
- val = ~val & (val - 1);
- while (val) {
- shiftCount++;
- val >>= 1;
- }
- } else {
- shiftCount = -1;
+static bool isPowerOfTwo(int x)
+{
+ return (x & (x - 1)) == 0;
+}
+
+// Returns true if no more than two bits are set in 'x'.
+static bool isPopCountLE2(unsigned int x)
+{
+ x &= x - 1;
+ return (x & (x - 1)) == 0;
+}
+
+// Returns the index of the lowest set bit in 'x'.
+static int lowestSetBit(unsigned int x) {
+ int bit_posn = 0;
+ while ((x & 0xf) == 0) {
+ bit_posn += 4;
+ x >>= 4;
}
- return shiftCount;
+ while ((x & 1) == 0) {
+ bit_posn++;
+ x >>= 1;
+ }
+ return bit_posn;
+}
+
+// Returns true if it added instructions to 'cUnit' to multiply 'rlSrc' by 'lit'
+// and store the result in 'rlDest'.
+static bool handleEasyMultiply(CompilationUnit *cUnit,
+ RegLocation rlSrc, RegLocation rlDest, int lit)
+{
+ // Can we simplify this multiplication?
+ bool powerOfTwo = false;
+ bool popCountLE2 = false;
+ bool powerOfTwoMinusOne = false;
+ if (lit < 2) {
+ // Avoid special cases.
+ return false;
+ } else if (isPowerOfTwo(lit)) {
+ powerOfTwo = true;
+ } else if (isPopCountLE2(lit)) {
+ popCountLE2 = true;
+ } else if (isPowerOfTwo(lit + 1)) {
+ powerOfTwoMinusOne = true;
+ } else {
+ return false;
+ }
+ rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
+ RegLocation rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true);
+ if (powerOfTwo) {
+ // Shift.
+ opRegRegImm(cUnit, kOpLsl, rlResult.lowReg, rlSrc.lowReg,
+ lowestSetBit(lit));
+ } else if (popCountLE2) {
+ // Shift and add and shift.
+ int firstBit = lowestSetBit(lit);
+ int secondBit = lowestSetBit(lit ^ (1 << firstBit));
+ genMultiplyByTwoBitMultiplier(cUnit, rlSrc, rlResult, lit,
+ firstBit, secondBit);
+ } else {
+ // Reverse subtract: (src << (shift + 1)) - src.
+ assert(powerOfTwoMinusOne);
+ // TODO: rsb dst, src, src lsl#lowestSetBit(lit + 1)
+ int tReg = dvmCompilerAllocTemp(cUnit);
+ opRegRegImm(cUnit, kOpLsl, tReg, rlSrc.lowReg, lowestSetBit(lit + 1));
+ opRegRegReg(cUnit, kOpSub, rlResult.lowReg, tReg, rlSrc.lowReg);
+ }
+ storeValue(cUnit, rlDest, rlResult);
+ return true;
}
static bool handleFmt22b_Fmt22s(CompilationUnit *cUnit, MIR *mir)
@@ -1896,15 +1951,10 @@
break;
case OP_MUL_INT_LIT8:
case OP_MUL_INT_LIT16: {
- // TUNING: General shift & add for small constants
- int shiftCount = mulToShift(lit);
- if (shiftCount >= 0) {
- op = kOpLsl;
- lit = shiftCount;
- shiftOp = true;
- } else {
- op = kOpMul;
+ if (handleEasyMultiply(cUnit, rlSrc, rlDest, lit)) {
+ return false;
}
+ op = kOpMul;
break;
}
case OP_AND_INT_LIT8:
diff --git a/vm/compiler/codegen/arm/Thumb/Gen.c b/vm/compiler/codegen/arm/Thumb/Gen.c
index a274e19..35c4451 100644
--- a/vm/compiler/codegen/arm/Thumb/Gen.c
+++ b/vm/compiler/codegen/arm/Thumb/Gen.c
@@ -289,3 +289,12 @@
dvmCompilerClobber(cUnit, ophi);
return false;
}
+
+static void genMultiplyByTwoBitMultiplier(CompilationUnit *cUnit,
+ RegLocation rlSrc, RegLocation rlResult, int lit,
+ int firstBit, int secondBit)
+{
+ // We can't implement "add src, src, src, lsl#shift" on Thumb, so we have
+ // to do a regular multiply.
+ opRegRegImm(cUnit, kOpMul, rlResult.lowReg, rlSrc.lowReg, lit);
+}
diff --git a/vm/compiler/codegen/arm/Thumb2/Gen.c b/vm/compiler/codegen/arm/Thumb2/Gen.c
index 1390d64..dbce452 100644
--- a/vm/compiler/codegen/arm/Thumb2/Gen.c
+++ b/vm/compiler/codegen/arm/Thumb2/Gen.c
@@ -442,3 +442,14 @@
storeValueWide(cUnit, rlDest, rlResult);
return false;
}
+
+static void genMultiplyByTwoBitMultiplier(CompilationUnit *cUnit,
+ RegLocation rlSrc, RegLocation rlResult, int lit,
+ int firstBit, int secondBit)
+{
+ opRegRegRegShift(cUnit, kOpAdd, rlResult.lowReg, rlSrc.lowReg, rlSrc.lowReg,
+ encodeShift(kArmLsl, secondBit - firstBit));
+ if (firstBit != 0) {
+ opRegRegImm(cUnit, kOpLsl, rlResult.lowReg, rlResult.lowReg, firstBit);
+ }
+}