Continuing evolution of Thumb2 support.
Bug fix for local optimization
Enable partial floating point store sinking (with significant perf gain!)
diff --git a/vm/compiler/codegen/arm/ArchUtility.c b/vm/compiler/codegen/arm/ArchUtility.c
index abcb2eb..60c5cdb 100644
--- a/vm/compiler/codegen/arm/ArchUtility.c
+++ b/vm/compiler/codegen/arm/ArchUtility.c
@@ -37,6 +37,26 @@
return buf;
}
+static int expandImmediate(int value)
+{
+ int mode = (value & 0xf00) >> 8;
+ u4 bits = value & 0xff;
+ switch(mode) {
+ case 0:
+ return bits;
+ case 1:
+ return (bits << 16) | bits;
+ case 2:
+ return (bits << 24) | (bits << 8);
+ case 3:
+ return (bits << 24) | (bits << 16) | (bits << 8) | bits;
+ default:
+ break;
+ }
+ bits = (bits | 0x80) << 24;
+ return bits >> (((value & 0xf80) >> 7) - 8);
+}
+
/*
* Interpret a format string and build a string no longer than size
* See format key in Assemble.c.
@@ -62,6 +82,10 @@
assert((unsigned)(nc-'0') < 3);
operand = lir->operands[nc-'0'];
switch(*fmt++) {
+ case 'm':
+ operand = expandImmediate(operand);
+ sprintf(tbuf,"%d [0x%x]", operand, operand);
+ break;
case 's':
sprintf(tbuf,"s%d",operand & FP_REG_MASK);
break;
@@ -71,6 +95,7 @@
case 'h':
sprintf(tbuf,"%04x", operand);
break;
+ case 'M':
case 'd':
sprintf(tbuf,"%d", operand);
break;
@@ -106,6 +131,9 @@
case ARM_COND_CS:
strcpy(tbuf, "bcs");
break;
+ case ARM_COND_MI:
+ strcpy(tbuf, "bmi");
+ break;
default:
strcpy(tbuf, "");
break;
diff --git a/vm/compiler/codegen/arm/ArmLIR.h b/vm/compiler/codegen/arm/ArmLIR.h
index b43dab0..59c7529 100644
--- a/vm/compiler/codegen/arm/ArmLIR.h
+++ b/vm/compiler/codegen/arm/ArmLIR.h
@@ -54,13 +54,19 @@
/* Offset to distingish FP regs */
#define FP_REG_OFFSET 32
-/* Is reg fp? */
-#define IS_FP_REG(x) (x & FP_REG_OFFSET)
+/* Offset to distinguish DP FP regs */
+#define FP_DOUBLE 64
+/* Reg types */
+#define FPREG(x) ((x & FP_REG_OFFSET) == FP_REG_OFFSET)
+#define LOWREG(x) ((x & 0x7) == x)
+#define DOUBLEREG(x) ((x & FP_DOUBLE) == FP_DOUBLE)
+#define SINGLEREG(x) (FPREG(x) && !DOUBLEREG(x))
/* Mask to strip off fp flags */
#define FP_REG_MASK (FP_REG_OFFSET-1)
/* Mask to convert high reg to low for Thumb */
#define THUMB_REG_MASK 0x7
+
typedef enum NativeRegisterPool {
r0 = 0,
r1 = 1,
@@ -110,6 +116,22 @@
fr29 = 29 + FP_REG_OFFSET,
fr30 = 30 + FP_REG_OFFSET,
fr31 = 31 + FP_REG_OFFSET,
+ dr0 = fr0 + FP_DOUBLE,
+ dr1 = fr2 + FP_DOUBLE,
+ dr2 = fr4 + FP_DOUBLE,
+ dr3 = fr6 + FP_DOUBLE,
+ dr4 = fr8 + FP_DOUBLE,
+ dr5 = fr10 + FP_DOUBLE,
+ dr6 = fr12 + FP_DOUBLE,
+ dr7 = fr14 + FP_DOUBLE,
+ dr8 = fr16 + FP_DOUBLE,
+ dr9 = fr18 + FP_DOUBLE,
+ dr10 = fr20 + FP_DOUBLE,
+ dr11 = fr22 + FP_DOUBLE,
+ dr12 = fr24 + FP_DOUBLE,
+ dr13 = fr26 + FP_DOUBLE,
+ dr14 = fr28 + FP_DOUBLE,
+ dr15 = fr30 + FP_DOUBLE,
} NativeRegisterPool;
/* Thumb condition encodings */
@@ -217,7 +239,6 @@
THUMB_SUB_SPI7, /* sub(4) [101100001] imm_7[6..0] */
THUMB_SWI, /* swi [11011111] imm_8[7..0] */
THUMB_TST, /* tst [0100001000] rm[5..3] rn[2..0] */
-// FIXME: Enhance assembly encoding. Only low fp regs supported here
THUMB2_VLDRS, /* vldr low sx [111011011001] rn[19..16] rd[15-12]
[1010] imm_8[7..0] */
THUMB2_VLDRD, /* vldr low dx [111011011001] rn[19..16] rd[15-12]
@@ -258,6 +279,30 @@
[10101100] vm[3..0] */
THUMB2_VSQRTD, /* vsqrt.f64 vd, vm [1110111010110001] vd[15..12]
[10111100] vm[3..0] */
+ THUMB2_MOV_IMM_SHIFT, /* mov(T2) rd, #<const> [11110] i [00001001111]
+ imm3 rd[11..8] imm8 */
+ THUMB2_MOV_IMM16, /* mov(T3) rd, #<const> [11110] i [0010100] imm4 [0]
+ imm3 rd[11..8] imm8 */
+ THUMB2_STR_RRI12, /* str(Imm,T3) rd,[rn,#imm12] [111110001100]
+ rn[19..16] rt[15..12] imm12[11..0] */
+ THUMB2_LDR_RRI12, /* str(Imm,T3) rd,[rn,#imm12] [111110001100]
+ rn[19..16] rt[15..12] imm12[11..0] */
+ THUMB2_STR_RRI8_PREDEC, /* str(Imm,T4) rd,[rn,#-imm8] [111110000100]
+ rn[19..16] rt[15..12] [1100] imm[7..0]*/
+ THUMB2_LDR_RRI8_PREDEC, /* ldr(Imm,T4) rd,[rn,#-imm8] [111110000101]
+ rn[19..16] rt[15..12] [1100] imm[7..0]*/
+ THUMB2_CBNZ, /* cbnz rd,<label> [101110] i [1] imm5[7..3]
+ rn[2..0] */
+ THUMB2_CBZ, /* cbn rd,<label> [101100] i [1] imm5[7..3]
+ rn[2..0] */
+ THUMB2_ADD_RRI12, /* add rd, rn, #imm12 [11110] i [100000] rn[19..16]
+ [0] imm3[14..12] rd[11..8] imm8[7..0] */
+ THUMB2_MOV_RR, /* mov rd, rm [11101010010011110000] rd[11..8]
+ [0000] rm[3..0] */
+ THUMB2_VMOVS, /* vmov.f32 vd, vm [111011101] D [110000]
+ vd[15..12] 101001] M [0] vm[3..0] */
+ THUMB2_VMOVD, /* vmov.f64 vd, vm [111011101] D [110000]
+ vd[15..12] 101101] M [0] vm[3..0] */
ARM_LAST,
} ArmOpCode;
@@ -278,8 +323,10 @@
BITBLT, /* Bit string using end/start */
DFP, /* Double FP reg */
SFP, /* Single FP reg */
- IMMSHIFT8, /* Shifted 8-bit immed field using [26,14..12,7..0] */
- IMM12, /* Zero-extended 12-bit immediate using [26,14..12,7..0] */
+ MODIMM, /* Shifted 8-bit immediate using [26,14..12,7..0] */
+ IMM16, /* Zero-extended immediate using [26,19..16,14..12,7..0] */
+ IMM6, /* Encoded branch target using [9,7..3]0 */
+ IMM12, /* Zero-extended immediate using [26,14..12,7..0] */
} ArmEncodingKind;
/* Struct used to define the snippet positions for each Thumb opcode */
diff --git a/vm/compiler/codegen/arm/Assemble.c b/vm/compiler/codegen/arm/Assemble.c
index fb85253..ea133e7 100644
--- a/vm/compiler/codegen/arm/Assemble.c
+++ b/vm/compiler/codegen/arm/Assemble.c
@@ -65,6 +65,8 @@
* R -> register list
* s -> single precision floating point register
* S -> double precision floating point register
+ * m -> Thumb2 modified immediate
+ * M -> Thumb2 16-bit zero-extended immediate
*
* [!] escape. To insert "!", use "!!"
*/
@@ -365,19 +367,19 @@
"tst", "r!0d, r!1d", 1),
ENCODING_MAP(THUMB2_VLDRS, 0xed900a00,
SFP, 22, 12, BITBLT, 19, 16, BITBLT, 7, 0,
- IS_TERTIARY_OP,
+ IS_TERTIARY_OP | CLOBBER_DEST,
"vldr", "!0s, [r!1d, #!2E]", 2),
ENCODING_MAP(THUMB2_VLDRD, 0xed900b00,
DFP, 22, 12, BITBLT, 19, 16, BITBLT, 7, 0,
- IS_TERTIARY_OP,
+ IS_TERTIARY_OP | CLOBBER_DEST,
"vldr", "!0S, [r!1d, #!2E]", 2),
ENCODING_MAP(THUMB2_VMULS, 0xee200a00,
SFP, 22, 12, SFP, 7, 16, SFP, 5, 0,
- IS_TERTIARY_OP,
+ IS_TERTIARY_OP | CLOBBER_DEST,
"vmuls", "!0s, !1s, !2s", 2),
ENCODING_MAP(THUMB2_VMULD, 0xee200b00,
DFP, 22, 12, DFP, 7, 16, DFP, 5, 0,
- IS_TERTIARY_OP,
+ IS_TERTIARY_OP | CLOBBER_DEST,
"vmuld", "!0S, !1S, !2S", 2),
ENCODING_MAP(THUMB2_VSTRS, 0xed800a00,
SFP, 22, 12, BITBLT, 19, 16, BITBLT, 7, 0,
@@ -389,60 +391,108 @@
"vstr", "!0S, [r!1d, #!2E]", 2),
ENCODING_MAP(THUMB2_VSUBS, 0xee300a40,
SFP, 22, 12, SFP, 7, 16, SFP, 5, 0,
- IS_TERTIARY_OP,
+ IS_TERTIARY_OP | CLOBBER_DEST,
"vsub", "!0s, !1s, !2s", 2),
ENCODING_MAP(THUMB2_VSUBD, 0xee300b40,
DFP, 22, 12, DFP, 7, 16, DFP, 5, 0,
- IS_TERTIARY_OP,
+ IS_TERTIARY_OP | CLOBBER_DEST,
"vsub", "!0S, !1S, !2S", 2),
ENCODING_MAP(THUMB2_VADDS, 0xee300a00,
SFP, 22, 12, SFP, 7, 16, SFP, 5, 0,
- IS_TERTIARY_OP,
+ IS_TERTIARY_OP | CLOBBER_DEST,
"vadd", "!0s, !1s, !2s", 2),
ENCODING_MAP(THUMB2_VADDD, 0xee300b00,
DFP, 22, 12, DFP, 7, 16, DFP, 5, 0,
- IS_TERTIARY_OP,
+ IS_TERTIARY_OP | CLOBBER_DEST,
"vadd", "!0S, !1S, !2S", 2),
ENCODING_MAP(THUMB2_VDIVS, 0xee800a00,
SFP, 22, 12, SFP, 7, 16, SFP, 5, 0,
- IS_TERTIARY_OP,
+ IS_TERTIARY_OP | CLOBBER_DEST,
"vdivs", "!0s, !1s, !2s", 2),
ENCODING_MAP(THUMB2_VDIVD, 0xee800b00,
DFP, 22, 12, DFP, 7, 16, DFP, 5, 0,
- IS_TERTIARY_OP,
+ IS_TERTIARY_OP | CLOBBER_DEST,
"vdivs", "!0S, !1S, !2S", 2),
ENCODING_MAP(THUMB2_VCVTIF, 0xeeb80ac0,
SFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1,
- IS_BINARY_OP,
+ IS_BINARY_OP | CLOBBER_DEST,
"vcvt.f32", "!0s, !1s", 2),
ENCODING_MAP(THUMB2_VCVTID, 0xeeb80bc0,
DFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1,
- IS_BINARY_OP,
+ IS_BINARY_OP | CLOBBER_DEST,
"vcvt.f64", "!0S, !1s", 2),
ENCODING_MAP(THUMB2_VCVTFI, 0xeebd0ac0,
SFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1,
- IS_BINARY_OP,
+ IS_BINARY_OP | CLOBBER_DEST,
"vcvt.s32.f32 ", "!0s, !1s", 2),
ENCODING_MAP(THUMB2_VCVTDI, 0xeebd0bc0,
SFP, 22, 12, DFP, 5, 0, UNUSED, -1, -1,
- IS_BINARY_OP,
+ IS_BINARY_OP | CLOBBER_DEST,
"vcvt.s32.f64 ", "!0s, !1S", 2),
ENCODING_MAP(THUMB2_VCVTFD, 0xeeb70ac0,
DFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1,
- IS_BINARY_OP,
+ IS_BINARY_OP | CLOBBER_DEST,
"vcvt.f64.f32 ", "!0S, !1s", 2),
ENCODING_MAP(THUMB2_VCVTDF, 0xeeb70bc0,
SFP, 22, 12, DFP, 5, 0, UNUSED, -1, -1,
- IS_BINARY_OP,
+ IS_BINARY_OP | CLOBBER_DEST,
"vcvt.f32.f64 ", "!0s, !1S", 2),
ENCODING_MAP(THUMB2_VSQRTS, 0xeeb10ac0,
SFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1,
- IS_BINARY_OP,
+ IS_BINARY_OP | CLOBBER_DEST,
"vsqrt.f32 ", "!0s, !1s", 2),
ENCODING_MAP(THUMB2_VSQRTD, 0xeeb10bc0,
DFP, 22, 12, DFP, 5, 0, UNUSED, -1, -1,
- IS_BINARY_OP,
+ IS_BINARY_OP | CLOBBER_DEST,
"vsqrt.f64 ", "!0S, !1S", 2),
+ ENCODING_MAP(THUMB2_MOV_IMM_SHIFT, 0xf04f0000,
+ BITBLT, 11, 8, MODIMM, -1, -1, UNUSED, -1, -1,
+ IS_BINARY_OP | CLOBBER_DEST,
+ "mov", "r!0d, #!1m", 2),
+ ENCODING_MAP(THUMB2_MOV_IMM16, 0xf2400000,
+ BITBLT, 11, 8, IMM16, -1, -1, UNUSED, -1, -1,
+ IS_BINARY_OP | CLOBBER_DEST,
+ "mov", "r!0d, #!1M", 2),
+ ENCODING_MAP(THUMB2_STR_RRI12, 0xf8c00000,
+ BITBLT, 15, 12, BITBLT, 19, 16, BITBLT, 11, 0,
+ IS_TERTIARY_OP,
+ "str", "r!0d,[r!1d, #!2d", 2),
+ ENCODING_MAP(THUMB2_LDR_RRI12, 0xf8d00000,
+ BITBLT, 15, 12, BITBLT, 19, 16, BITBLT, 11, 0,
+ IS_TERTIARY_OP | CLOBBER_DEST,
+ "ldr", "r!0d,[r!1d, #!2d", 2),
+ ENCODING_MAP(THUMB2_STR_RRI8_PREDEC, 0xf8400c00,
+ BITBLT, 15, 12, BITBLT, 19, 16, BITBLT, 8, 0,
+ IS_TERTIARY_OP,
+ "str", "r!0d,[r!1d, #-!2d]", 2),
+ ENCODING_MAP(THUMB2_LDR_RRI8_PREDEC, 0xf8500c00,
+ BITBLT, 15, 12, BITBLT, 19, 16, BITBLT, 8, 0,
+ IS_TERTIARY_OP | CLOBBER_DEST,
+ "ldr", "r!0d,[r!1d, #-!2d]", 2),
+ ENCODING_MAP(THUMB2_CBNZ, 0xb900,
+ BITBLT, 2, 0, IMM6, -1, -1, UNUSED, -1, -1,
+ IS_BINARY_OP,
+ "cbnz", "r!0d,!1t", 1),
+ ENCODING_MAP(THUMB2_CBZ, 0xb100,
+ BITBLT, 2, 0, IMM6, -1, -1, UNUSED, -1, -1,
+ IS_BINARY_OP,
+ "cbz", "r!0d,!1t", 1),
+ ENCODING_MAP(THUMB2_ADD_RRI12, 0xf1000000,
+ BITBLT, 11, 8, BITBLT, 19, 16, IMM12, -1, -1,
+ IS_TERTIARY_OP | CLOBBER_DEST,
+ "add", "r!0d,r!1d,#!2d", 2),
+ ENCODING_MAP(THUMB2_MOV_RR, 0xea4f0000,
+ BITBLT, 11, 8, BITBLT, 3, 0, UNUSED, -1, -1,
+ IS_BINARY_OP | CLOBBER_DEST,
+ "mov", "r!0d, r!1d", 2),
+ ENCODING_MAP(THUMB2_VMOVS, 0xeeb00a40,
+ SFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1,
+ IS_BINARY_OP | CLOBBER_DEST,
+ "vmov.f32 ", "!0s, !1s", 2),
+ ENCODING_MAP(THUMB2_VMOVD, 0xeeb00b40,
+ DFP, 22, 12, DFP, 5, 0, UNUSED, -1, -1,
+ IS_BINARY_OP | CLOBBER_DEST,
+ "vmov.f64 ", "!0s, !1s", 2),
};
#define PADDING_MOV_R0_R0 0x1C00
@@ -508,6 +558,15 @@
return true;
}
lir->operands[1] = delta >> 2;
+ } else if (lir->opCode == THUMB2_CBNZ || lir->opCode == THUMB2_CBZ) {
+ ArmLIR *targetLIR = (ArmLIR *) lir->generic.target;
+ intptr_t pc = lir->generic.offset + 4;
+ intptr_t target = targetLIR->generic.offset;
+ int delta = target - pc;
+ if (delta > 126 || delta < 0) {
+ return true;
+ }
+ lir->operands[1] = delta >> 1;
} else if (lir->opCode == THUMB_B_COND) {
ArmLIR *targetLIR = (ArmLIR *) lir->generic.target;
intptr_t pc = lir->generic.offset + 4;
@@ -552,6 +611,11 @@
switch(encoder->fieldLoc[i].kind) {
case UNUSED:
break;
+ case IMM6:
+ value = ((lir->operands[i] & 0x20) >> 5) << 9;
+ value |= (lir->operands[i] & 0x1f) << 3;
+ bits |= value;
+ break;
case BITBLT:
value = (lir->operands[i] << encoder->fieldLoc[i].start) &
((1 << (encoder->fieldLoc[i].end + 1)) - 1);
@@ -575,11 +639,19 @@
encoder->fieldLoc[i].start;
bits |= value;
break;
- case IMMSHIFT8:
case IMM12:
+ case MODIMM:
value = ((lir->operands[i] & 0x800) >> 11) << 26;
value |= ((lir->operands[i] & 0x700) >> 8) << 12;
value |= lir->operands[i] & 0x0ff;
+ bits |= value;
+ break;
+ case IMM16:
+ value = ((lir->operands[i] & 0x0800) >> 11) << 26;
+ value |= ((lir->operands[i] & 0xf000) >> 12) << 16;
+ value |= ((lir->operands[i] & 0x0700) >> 8) << 12;
+ value |= lir->operands[i] & 0x0ff;
+ bits |= value;
break;
default:
assert(0);
diff --git a/vm/compiler/codegen/arm/LocalOptimizations.c b/vm/compiler/codegen/arm/LocalOptimizations.c
index 5f43b87..11aaedd 100644
--- a/vm/compiler/codegen/arm/LocalOptimizations.c
+++ b/vm/compiler/codegen/arm/LocalOptimizations.c
@@ -18,6 +18,27 @@
#include "vm/compiler/CompilerInternals.h"
#include "ArmLIR.h"
+ArmLIR* dvmCompilerGenCopy(CompilationUnit *cUnit, int rDest, int rSrc);
+
+/* Is this a Dalvik register access? */
+static inline bool isDalvikLoad(ArmLIR *lir)
+{
+ return ((lir->operands[1] == rFP) &&
+ ((lir->opCode == THUMB_LDR_RRI5) ||
+ (lir->opCode == THUMB2_LDR_RRI12) ||
+ (lir->opCode == THUMB2_VLDRS) ||
+ (lir->opCode == THUMB2_VLDRD)));
+}
+
+static inline bool isDalvikStore(ArmLIR *lir)
+{
+ return ((lir->operands[1] == rFP) &&
+ ((lir->opCode == THUMB_STR_RRI5) ||
+ (lir->opCode == THUMB2_STR_RRI12) ||
+ (lir->opCode == THUMB2_VSTRS) ||
+ (lir->opCode == THUMB2_VSTRD)));
+}
+
/*
* Perform a pass of top-down walk to
* 1) Eliminate redundant loads and stores
@@ -37,8 +58,7 @@
if (thisLIR->age >= cUnit->optRound) {
continue;
}
- if (thisLIR->opCode == THUMB_STR_RRI5 &&
- thisLIR->operands[1] == rFP) {
+ if (isDalvikStore(thisLIR)) {
int dRegId = thisLIR->operands[2];
int nativeRegId = thisLIR->operands[0];
ArmLIR *checkLIR;
@@ -49,16 +69,17 @@
checkLIR = NEXT_LIR(checkLIR)) {
/* Check if a Dalvik register load is redundant */
- if (checkLIR->opCode == THUMB_LDR_RRI5 &&
- checkLIR->operands[1] == rFP &&
- checkLIR->operands[2] == dRegId) {
+ if (isDalvikLoad(checkLIR) &&
+ checkLIR->operands[2] == dRegId ) {
+ if (FPREG(nativeRegId) != FPREG(checkLIR->operands[0])) {
+ break; // TODO: handle gen<=>float copies
+ }
/* Insert a move to replace the load */
if (checkLIR->operands[0] != nativeRegId) {
- ArmLIR *moveLIR =
- dvmCompilerNew(sizeof(ArmLIR), true);
- moveLIR->opCode = THUMB_MOV_RR;
- moveLIR->operands[0] = checkLIR->operands[0];
- moveLIR->operands[1] = nativeRegId;
+ ArmLIR *moveLIR;
+ moveLIR = dvmCompilerRegCopy(cUnit,
+ checkLIR->operands[0],
+ nativeRegId);
/*
* Insertion is guaranteed to succeed since checkLIR
* is never the first LIR on the list
@@ -70,8 +91,7 @@
continue;
/* Found a true output dependency - nuke the previous store */
- } else if (checkLIR->opCode == THUMB_STR_RRI5 &&
- checkLIR->operands[1] == rFP &&
+ } else if (isDalvikStore(checkLIR) &&
checkLIR->operands[2] == dRegId) {
thisLIR->isNop = true;
break;
@@ -82,10 +102,6 @@
/* Last instruction reached */
stopHere |= checkLIR->generic.next == NULL;
- /* Store data is clobbered */
- stopHere |= (EncodingMap[checkLIR->opCode].flags &
- CLOBBER_DEST) != 0 &&
- checkLIR->operands[0] == nativeRegId;
/*
* Conservatively assume there is a memory dependency
* for st/ld multiples and reg+reg address mode
@@ -93,16 +109,21 @@
stopHere |= checkLIR->opCode == THUMB_STMIA ||
checkLIR->opCode == THUMB_LDMIA ||
checkLIR->opCode == THUMB_STR_RRR ||
- checkLIR->opCode == THUMB_LDR_RRR;
+ checkLIR->opCode == THUMB_LDR_RRR ||
+ checkLIR->opCode == THUMB2_VLDRD ||
+ checkLIR->opCode == THUMB2_VSTRD;
+;
-// FIXME: need to enhance this code to sink & play well with coprocessor ld/str
- stopHere |= checkLIR->opCode == THUMB2_VSTRS ||
- checkLIR->opCode == THUMB2_VSTRD ||
- checkLIR->opCode == THUMB2_VLDRS ||
- checkLIR->opCode == THUMB2_VLDRD;
+ if (!isPseudoOpCode(checkLIR->opCode)) {
- stopHere |= (EncodingMap[checkLIR->opCode].flags &
- IS_BRANCH) != 0;
+ /* Store data is clobbered */
+ stopHere |= (EncodingMap[checkLIR->opCode].flags &
+ CLOBBER_DEST) != 0 &&
+ checkLIR->operands[0] == nativeRegId;
+
+ stopHere |= (EncodingMap[checkLIR->opCode].flags &
+ IS_BRANCH) != 0;
+ }
/* Found a new place to put the store - move it here */
if (stopHere == true) {
diff --git a/vm/compiler/codegen/arm/Thumb2Util.c b/vm/compiler/codegen/arm/Thumb2Util.c
index 1dd009b..3a9f1de 100644
--- a/vm/compiler/codegen/arm/Thumb2Util.c
+++ b/vm/compiler/codegen/arm/Thumb2Util.c
@@ -45,6 +45,7 @@
ArmConditionCode cond, int reg,
int checkValue, int dOffset,
ArmLIR *pcrLabel);
+ArmLIR* dvmCompilerRegCopy(CompilationUnit *cUnit, int rDest, int rSrc);
/*****************************************************************************/
@@ -132,14 +133,96 @@
/*****************************************************************************/
+ArmLIR* dvmCompilerRegCopy(CompilationUnit *cUnit, int rDest, int rSrc)
+{
+ ArmLIR* res = dvmCompilerNew(sizeof(ArmLIR), true);
+ res->operands[0] = rDest;
+ res->operands[1] = rSrc;
+ if (rDest == rSrc) {
+ res->isNop = true;
+ } else {
+ if (LOWREG(rDest) && LOWREG(rSrc)) {
+ res->opCode = THUMB_MOV_RR;
+ } else if (FPREG(rDest) && FPREG(rSrc)) {
+ if (DOUBLEREG(rDest)) {
+ assert(DOUBLEREG(rSrc));
+ res->opCode = THUMB2_VMOVD;
+ } else {
+ assert(SINGLEREG(rSrc));
+ res->opCode = THUMB2_VMOVS;
+ }
+ } else {
+ // TODO: support copy between FP and gen regs.
+ assert(!FPREG(rDest));
+ assert(!FPREG(rSrc));
+ res->opCode = THUMB2_MOV_RR;
+ }
+ }
+ return res;
+}
+
+static int leadingZeros(u4 val)
+{
+ u4 alt;
+ int n;
+ int count;
+
+ count = 16;
+ n = 32;
+ do {
+ alt = val >> count;
+ if (alt != 0) {
+ n = n - count;
+ val = alt;
+ }
+ count >>= 1;
+ } while (count);
+ return n - val;
+}
+
+/*
+ * Determine whether value can be encoded as a Thumb modified
+ * immediate. If not, return -1. If so, return i:imm3:a:bcdefgh form.
+ */
+static int modifiedImmediate(u4 value)
+{
+ int zLeading;
+ int zTrailing;
+ u4 b0 = value & 0xff;
+
+ /* Note: case of value==0 must use 0:000:0:0000000 encoding */
+ if (value <= 0xFF)
+ return b0; // 0:000:a:bcdefgh
+ if (value == ((b0 << 16) | b0))
+ return (0x1 << 8) | b0; /* 0:001:a:bcdefgh */
+ if (value == ((b0 << 24) | (b0 << 16) | (b0 << 8) | b0))
+ return (0x3 << 8) | b0; /* 0:011:a:bcdefgh */
+ b0 = (value >> 8) & 0xff;
+ if (value == ((b0 << 24) | (b0 << 8)))
+ return (0x2 << 8) | b0; /* 0:010:a:bcdefgh */
+ /* Can we do it with rotation? */
+ zLeading = leadingZeros(value);
+ zTrailing = 32 - leadingZeros(~value & (value - 1));
+ /* A run of eight or fewer active bits? */
+ if ((zLeading + zTrailing) < 24)
+ return -1; /* No - bail */
+ /* left-justify the constant, discarding msb (known to be 1) */
+ value <<= zLeading + 1;
+ /* Create bcdefgh */
+ value >>= 25;
+ /* Put it all together */
+ return value | ((0x8 + zLeading) << 7); /* [01000..11111]:bcdefgh */
+}
+
/*
* Load a immediate using a shortcut if possible; otherwise
* grab from the per-translation literal pool
*/
static void loadConstant(CompilationUnit *cUnit, int rDest, int value)
{
+ int modImm;
/* See if the value can be constructed cheaply */
- if ((value >= 0) && (value <= 255)) {
+ if ((value & 0xff) == value) {
newLIR2(cUnit, THUMB_MOV_IMM, rDest, value);
return;
} else if ((value & 0xFFFFFF00) == 0xFFFFFF00) {
@@ -147,6 +230,17 @@
newLIR2(cUnit, THUMB_MVN, rDest, rDest);
return;
}
+ /* Check Modified immediate special cases */
+ modImm = modifiedImmediate(value);
+ if (modImm >= 0) {
+ newLIR2(cUnit, THUMB2_MOV_IMM_SHIFT, rDest, modImm);
+ return;
+ }
+ /* 16-bit immediate? */
+ if ((value & 0xffff) == value) {
+ newLIR2(cUnit, THUMB2_MOV_IMM16, rDest, value);
+ return;
+ }
/* No shortcut - go ahead and use literal pool */
ArmLIR *dataTarget = scanLiteralPool(cUnit, value, 255);
if (dataTarget == NULL) {
@@ -172,9 +266,8 @@
{
int offset = offsetof(StackSaveArea, xtra.currentPc);
loadConstant(cUnit, rDPC, (int) (cUnit->method->insns + mir->offset));
- newLIR2(cUnit, THUMB_MOV_RR, rAddr, rFP);
- newLIR2(cUnit, THUMB_SUB_RI8, rAddr, sizeof(StackSaveArea) - offset);
- newLIR3(cUnit, THUMB_STR_RRI5, rDPC, rAddr, 0);
+ newLIR3(cUnit, THUMB2_STR_RRI8_PREDEC, rDPC, rFP,
+ sizeof(StackSaveArea) - offset);
}
/* Generate conditional branch instructions */
@@ -201,22 +294,20 @@
static void loadValuePair(CompilationUnit *cUnit, int vSrc, int rDestLo,
int rDestHi)
{
+ bool allLowRegs = (LOWREG(rDestLo) && LOWREG(rDestHi));
+
/* Use reg + imm5*4 to load the values if possible */
- if (vSrc <= 30) {
+ if (allLowRegs && vSrc <= 30) {
newLIR3(cUnit, THUMB_LDR_RRI5, rDestLo, rFP, vSrc);
newLIR3(cUnit, THUMB_LDR_RRI5, rDestHi, rFP, vSrc+1);
} else {
- if (vSrc <= 64) {
- /* Sneak 4 into the base address first */
- newLIR3(cUnit, THUMB_ADD_RRI3, rDestLo, rFP, 4);
- newLIR2(cUnit, THUMB_ADD_RI8, rDestLo, (vSrc-1)*4);
- } else {
- /* Offset too far from rFP */
- loadConstant(cUnit, rDestLo, vSrc*4);
- newLIR3(cUnit, THUMB_ADD_RRR, rDestLo, rFP, rDestLo);
- }
assert(rDestLo < rDestHi);
- newLIR2(cUnit, THUMB_LDMIA, rDestLo, (1<<rDestLo) | (1<<(rDestHi)));
+ loadValueAddress(cUnit, vSrc, rDestLo);
+ if (allLowRegs) {
+ newLIR2(cUnit, THUMB_LDMIA, rDestLo, (1<<rDestLo) | (1<<(rDestHi)));
+ } else {
+ assert(0); // Unimp - need Thumb2 ldmia
+ }
}
}
@@ -227,49 +318,74 @@
static void storeValuePair(CompilationUnit *cUnit, int rSrcLo, int rSrcHi,
int vDest, int rScratch)
{
+ bool allLowRegs = (LOWREG(rSrcLo) && LOWREG(rSrcHi));
killNullCheckedRegister(cUnit, vDest);
killNullCheckedRegister(cUnit, vDest+1);
updateLiveRegisterPair(cUnit, vDest, rSrcLo, rSrcHi);
/* Use reg + imm5*4 to store the values if possible */
- if (vDest <= 30) {
+ if (allLowRegs && vDest <= 30) {
newLIR3(cUnit, THUMB_STR_RRI5, rSrcLo, rFP, vDest);
newLIR3(cUnit, THUMB_STR_RRI5, rSrcHi, rFP, vDest+1);
} else {
- if (vDest <= 64) {
- /* Sneak 4 into the base address first */
- newLIR3(cUnit, THUMB_ADD_RRI3, rScratch, rFP, 4);
- newLIR2(cUnit, THUMB_ADD_RI8, rScratch, (vDest-1)*4);
- } else {
- /* Offset too far from rFP */
- loadConstant(cUnit, rScratch, vDest*4);
- newLIR3(cUnit, THUMB_ADD_RRR, rScratch, rFP, rScratch);
- }
assert(rSrcLo < rSrcHi);
- newLIR2(cUnit, THUMB_STMIA, rScratch, (1<<rSrcLo) | (1 << (rSrcHi)));
+ loadValueAddress(cUnit, vDest, rScratch);
+ if (allLowRegs) {
+ newLIR2(cUnit, THUMB_STMIA, rScratch,
+ (1<<rSrcLo) | (1 << (rSrcHi)));
+ } else {
+ assert(0); // Unimp - need Thumb2 stmia
+ }
+ }
+}
+
+static void addRegisterRegister(CompilationUnit *cUnit, int rDest,
+ int rSrc1, int rSrc2)
+{
+ if (!LOWREG(rDest) || !LOWREG(rSrc1) || !LOWREG(rSrc2)) {
+ assert(0); // Unimp
+ //newLIR3(cUnit, THUMB2_ADD_RRR, rDest, rFP, rDest);
+ } else {
+ newLIR3(cUnit, THUMB_ADD_RRR, rDest, rFP, rDest);
+ }
+}
+
+/* Add in immediate to a register. */
+static void addRegisterImmediate(CompilationUnit *cUnit, int rDest, int rSrc,
+ int value)
+{
+// TODO: check for modified immediate form
+ if (LOWREG(rDest) && LOWREG(rSrc) && (value <= 7)) {
+ newLIR3(cUnit, THUMB_ADD_RRI3, rDest, rSrc, value);
+ } else if (LOWREG(rDest) && (rDest == rSrc) && ((value & 0xff) == 0xff)) {
+ newLIR2(cUnit, THUMB_ADD_RI8, rDest, value);
+ } else if (value <= 4095) {
+ newLIR3(cUnit, THUMB2_ADD_RRI12, rDest, rSrc, value);
+ } else {
+ loadConstant(cUnit, rDest, value);
+ addRegisterRegister(cUnit, rDest, rDest, rFP);
}
}
/* Load the address of a Dalvik register on the frame */
static void loadValueAddress(CompilationUnit *cUnit, int vSrc, int rDest)
{
- /* RRI3 can add up to 7 */
- if (vSrc <= 1) {
- newLIR3(cUnit, THUMB_ADD_RRI3, rDest, rFP, vSrc*4);
- } else if (vSrc <= 64) {
- /* Sneak 4 into the base address first */
- newLIR3(cUnit, THUMB_ADD_RRI3, rDest, rFP, 4);
- newLIR2(cUnit, THUMB_ADD_RI8, rDest, (vSrc-1)*4);
- } else {
- loadConstant(cUnit, rDest, vSrc*4);
- newLIR3(cUnit, THUMB_ADD_RRR, rDest, rFP, rDest);
- }
+ addRegisterImmediate(cUnit, rDest, rFP, vSrc*4);
}
+/*
+ * FIXME: We need a general register temp for all of these coprocessor
+ * operations in case we can't reach in 1 shot. Might just want to
+ * designate a hot temp that all codegen routines could use in their
+ * scope. Alternately, callers will need to allocate a temp and
+ * pass it in to each of these.
+ */
+
/* Load a float from a Dalvik register */
static void loadFloat(CompilationUnit *cUnit, int vSrc, int rDest)
{
assert(vSrc <= 255); // FIXME - temp limit to 1st 256
+ assert(SINGLEREG(rDest));
newLIR3(cUnit, THUMB2_VLDRS, rDest, rFP, vSrc);
}
@@ -278,6 +394,7 @@
int rScratch)
{
assert(vSrc <= 255); // FIXME - temp limit to 1st 256
+ assert(SINGLEREG(rSrc));
newLIR3(cUnit, THUMB2_VSTRS, rSrc, rFP, vDest);
}
@@ -285,6 +402,7 @@
static void loadDouble(CompilationUnit *cUnit, int vSrc, int rDest)
{
assert(vSrc <= 255); // FIXME - temp limit to 1st 256
+ assert(DOUBLEREG(rDest));
newLIR3(cUnit, THUMB2_VLDRD, rDest, rFP, vSrc);
}
@@ -293,6 +411,7 @@
int rScratch)
{
assert(vSrc <= 255); // FIXME - temp limit to 1st 256
+ assert(DOUBLEREG(rSrc));
newLIR3(cUnit, THUMB2_VSTRD, rSrc, rFP, vDest);
}
@@ -300,26 +419,27 @@
/* Load a single value from rFP[src] and store them into rDest */
static void loadValue(CompilationUnit *cUnit, int vSrc, int rDest)
{
- /* Use reg + imm5*4 to load the value if possible */
- if (vSrc <= 31) {
- newLIR3(cUnit, THUMB_LDR_RRI5, rDest, rFP, vSrc);
- } else {
- loadConstant(cUnit, rDest, vSrc*4);
- newLIR3(cUnit, THUMB_LDR_RRR, rDest, rFP, rDest);
- }
+ loadWordDisp(cUnit, rFP, vSrc * 4, rDest);
}
/* Load a word at base + displacement. Displacement must be word multiple */
static void loadWordDisp(CompilationUnit *cUnit, int rBase, int displacement,
int rDest)
{
+ bool allLowRegs = (LOWREG(rBase) && LOWREG(rDest));
assert((displacement & 0x3) == 0);
/* Can it fit in a RRI5? */
- if (displacement < 128) {
+ if (allLowRegs && displacement < 128) {
newLIR3(cUnit, THUMB_LDR_RRI5, rDest, rBase, displacement >> 2);
+ } else if (displacement < 4092) {
+ newLIR3(cUnit, THUMB2_LDR_RRI12, rDest, rFP, displacement);
} else {
loadConstant(cUnit, rDest, displacement);
- newLIR3(cUnit, THUMB_LDR_RRR, rDest, rBase, rDest);
+ if (allLowRegs) {
+ newLIR3(cUnit, THUMB_LDR_RRR, rDest, rBase, rDest);
+ } else {
+ assert(0); // Unimp - need Thumb2 ldr_rrr
+ }
}
}
@@ -331,11 +451,17 @@
updateLiveRegister(cUnit, vDest, rSrc);
/* Use reg + imm5*4 to store the value if possible */
- if (vDest <= 31) {
+ if (LOWREG(rSrc) && vDest <= 31) {
newLIR3(cUnit, THUMB_STR_RRI5, rSrc, rFP, vDest);
+ } else if (vDest <= 1023) {
+ newLIR3(cUnit, THUMB2_STR_RRI12, rSrc, rFP, vDest*4);
} else {
loadConstant(cUnit, rScratch, vDest*4);
- newLIR3(cUnit, THUMB_STR_RRR, rSrc, rFP, rScratch);
+ if (LOWREG(rSrc)) {
+ newLIR3(cUnit, THUMB_STR_RRR, rSrc, rFP, rScratch);
+ } else {
+ assert(0); // Unimp: Need generic str_rrr routine
+ }
}
}
@@ -343,12 +469,20 @@
* Perform a "reg cmp imm" operation and jump to the PCR region if condition
* satisfies.
*/
-static inline ArmLIR *genRegImmCheck(CompilationUnit *cUnit,
+static ArmLIR *genRegImmCheck(CompilationUnit *cUnit,
ArmConditionCode cond, int reg,
int checkValue, int dOffset,
ArmLIR *pcrLabel)
{
- newLIR2(cUnit, THUMB_CMP_RI8, reg, checkValue);
- ArmLIR *branch = newLIR2(cUnit, THUMB_B_COND, 0, cond);
+ ArmLIR *branch;
+ if ((LOWREG(reg)) && (checkValue == 0) &&
+ ((cond == ARM_COND_EQ) || (cond == ARM_COND_NE))) {
+ branch = newLIR2(cUnit,
+ (cond == ARM_COND_EQ) ? THUMB2_CBZ : THUMB2_CBNZ,
+ reg, 0);
+ } else {
+ newLIR2(cUnit, THUMB_CMP_RI8, reg, checkValue);
+ branch = newLIR2(cUnit, THUMB_B_COND, 0, cond);
+ }
return genCheckCommon(cUnit, dOffset, branch, pcrLabel);
}
diff --git a/vm/compiler/codegen/arm/ThumbUtil.c b/vm/compiler/codegen/arm/ThumbUtil.c
index 69bb0f7..8be50ad 100644
--- a/vm/compiler/codegen/arm/ThumbUtil.c
+++ b/vm/compiler/codegen/arm/ThumbUtil.c
@@ -45,6 +45,7 @@
ArmConditionCode cond, int reg,
int checkValue, int dOffset,
ArmLIR *pcrLabel);
+ArmLIR* dvmCompilerRegCopy(CompilationUnit *cUnit, int rDest, int rSrc);
/*****************************************************************************/
@@ -132,6 +133,19 @@
/*****************************************************************************/
+ArmLIR* dvmCompilerRegCopy(CompilationUnit *cUnit, int rDest, int rSrc)
+{
+ ArmLIR* res = dvmCompilerNew(sizeof(ArmLIR), true);
+ assert(LOWREG(rDest) && LOWREG(rSrc));
+ res->operands[0] = rDest;
+ res->operands[1] = rSrc;
+ res->opCode = THUMB_MOV_RR;
+ if (rDest == rSrc) {
+ res->isNop = true;
+ }
+ return res;
+}
+
/*
* Load a immediate using a shortcut if possible; otherwise
* grab from the per-translation literal pool
diff --git a/vm/compiler/codegen/arm/armv7-a/ArchVariant.c b/vm/compiler/codegen/arm/armv7-a/ArchVariant.c
index 794d754..92097af 100644
--- a/vm/compiler/codegen/arm/armv7-a/ArchVariant.c
+++ b/vm/compiler/codegen/arm/armv7-a/ArchVariant.c
@@ -119,11 +119,11 @@
{
int offset = offsetof(InterpState, retval);
int vSrc = mir->dalvikInsn.vA;
- loadDouble(cUnit, vSrc, fr2);
- newLIR2(cUnit, THUMB2_VSQRTD, fr0, fr2);
+ loadDouble(cUnit, vSrc, dr1);
+ newLIR2(cUnit, THUMB2_VSQRTD, dr0, dr1);
assert(offset & 0x3 == 0); /* Must be word aligned */
assert(offset < 1024);
- newLIR3(cUnit, THUMB2_VSTRD, fr0, rGLUE, offset >> 2);
+ newLIR3(cUnit, THUMB2_VSTRD, dr0, rGLUE, offset >> 2);
return true;
}
@@ -212,10 +212,10 @@
default:
return true;
}
- loadDouble(cUnit, vSrc1, fr2);
- loadDouble(cUnit, vSrc2, fr4);
- newLIR3(cUnit, op, fr0, fr2, fr4);
- storeDouble(cUnit, fr0, vDest, 0);
+ loadDouble(cUnit, vSrc1, dr1);
+ loadDouble(cUnit, vSrc2, dr2);
+ newLIR3(cUnit, op, dr0, dr1, dr2);
+ storeDouble(cUnit, dr0, vDest, 0);
return false;
}
@@ -227,6 +227,8 @@
int op = THUMB_BKPT;
bool longSrc = false;
bool longDest = false;
+ int srcReg;
+ int tgtReg;
switch (opCode) {
case OP_INT_TO_FLOAT:
@@ -267,15 +269,20 @@
default:
return true;
}
- if (longSrc)
- loadDouble(cUnit, vSrc2, fr2);
- else
- loadFloat(cUnit, vSrc2, fr2);
- newLIR2(cUnit, op, fr0, fr2);
- if (longDest)
- storeDouble(cUnit, fr0, vSrc1Dest, 0);
- else
+ if (longSrc) {
+ srcReg = dr1;
+ loadDouble(cUnit, vSrc2, srcReg);
+ } else {
+ srcReg = fr2;
+ loadFloat(cUnit, vSrc2, srcReg);
+ }
+ if (longDest) {
+ newLIR2(cUnit, op, dr0, srcReg);
+ storeDouble(cUnit, dr0, vSrc1Dest, 0);
+ } else {
+ newLIR2(cUnit, op, fr0, srcReg);
storeFloat(cUnit, fr0, vSrc1Dest, 0);
+ }
return false;
}