Implement chaining up to the first 64 cases in a switch statement.
diff --git a/vm/Globals.h b/vm/Globals.h
index 5dd056f..b4d1f17 100644
--- a/vm/Globals.h
+++ b/vm/Globals.h
@@ -654,6 +654,18 @@
 #if defined(WITH_JIT)
 
 /*
+ * Exiting the compiled code w/o chaining will incur overhead to look up the
+ * target in the code cache which is extra work only when JIT is enabled. So
+ * we want to monitor it closely to make sure we don't have performance bugs.
+ */
+typedef enum NoChainExits {
+    kInlineCacheMiss = 0,
+    kCallsiteInterpreted,
+    kSwitchOverflow,
+    kNoChainExitLast,
+} NoChainExits;
+
+/*
  * JIT-specific global state
  */
 struct DvmJitGlobals {
@@ -711,7 +723,7 @@
     int                compilerMaxQueued;
     int                addrLookupsFound;
     int                addrLookupsNotFound;
-    int                noChainExit;
+    int                noChainExit[kNoChainExitLast];
     int                normalExit;
     int                puntExit;
     int                translationChains;
diff --git a/vm/compiler/CompilerIR.h b/vm/compiler/CompilerIR.h
index 35611a3..24e9b37 100644
--- a/vm/compiler/CompilerIR.h
+++ b/vm/compiler/CompilerIR.h
@@ -171,6 +171,12 @@
     /* Map SSA names to location */
     RegLocation *regLocation;
     int sequenceNumber;
+
+    /*
+     * Set to the Dalvik PC of the switch instruction if it has more than
+     * MAX_CHAINED_SWITCH_CASES cases.
+     */
+    const u2 *switchOverflowPad;
 } CompilationUnit;
 
 BasicBlock *dvmCompilerNewBB(BBType blockType);
diff --git a/vm/compiler/CompilerUtility.h b/vm/compiler/CompilerUtility.h
index d5a9d84..1cfc56b 100644
--- a/vm/compiler/CompilerUtility.h
+++ b/vm/compiler/CompilerUtility.h
@@ -17,7 +17,8 @@
 #ifndef _DALVIK_VM_COMPILER_UTILITY
 #define _DALVIK_VM_COMPILER_UTILITY
 
-#define ARENA_DEFAULT_SIZE 4096
+/* Each arena page has some overhead, so take a few bytes off 8k */
+#define ARENA_DEFAULT_SIZE 8100
 
 /* Allocate the initial memory block for arena-based allocation */
 bool dvmCompilerHeapInit(void);
diff --git a/vm/compiler/Frontend.c b/vm/compiler/Frontend.c
index 28d4d9f..1cd821f 100644
--- a/vm/compiler/Frontend.c
+++ b/vm/compiler/Frontend.c
@@ -506,8 +506,48 @@
             cUnit.hasLoop = true;
         }
 
+        if (lastInsn->dalvikInsn.opCode == OP_PACKED_SWITCH ||
+            lastInsn->dalvikInsn.opCode == OP_SPARSE_SWITCH) {
+            int i;
+            const u2 *switchData = desc->method->insns + lastInsn->offset +
+                             lastInsn->dalvikInsn.vB;
+            int size = switchData[1];
+            int maxChains = MIN(size, MAX_CHAINED_SWITCH_CASES);
+
+            /*
+             * Generate the landing pad for cases whose ranks are higher than
+             * MAX_CHAINED_SWITCH_CASES. The code will re-enter the interpreter
+             * through the NoChain point.
+             */
+            if (maxChains != size) {
+                cUnit.switchOverflowPad =
+                    desc->method->insns + lastInsn->offset;
+            }
+
+            s4 *targets = (s4 *) (switchData + 2 +
+                    (lastInsn->dalvikInsn.opCode == OP_PACKED_SWITCH ?
+                     2 : size * 2));
+
+            /* One chaining cell for the first MAX_CHAINED_SWITCH_CASES cases */
+            for (i = 0; i < maxChains; i++) {
+                BasicBlock *caseChain = dvmCompilerNewBB(kChainingCellNormal);
+                lastBB->next = caseChain;
+                lastBB = caseChain;
+
+                caseChain->startOffset = lastInsn->offset + targets[i];
+                caseChain->id = numBlocks++;
+            }
+
+            /* One more chaining cell for the default case */
+            BasicBlock *caseChain = dvmCompilerNewBB(kChainingCellNormal);
+            lastBB->next = caseChain;
+            lastBB = caseChain;
+
+            caseChain->startOffset = lastInsn->offset + lastInsn->width;
+            caseChain->id = numBlocks++;
         /* Fallthrough block not included in the trace */
-        if (!isUnconditionalBranch(lastInsn) && curBB->fallThrough == NULL) {
+        } else if (!isUnconditionalBranch(lastInsn) &&
+                   curBB->fallThrough == NULL) {
             /*
              * If the chaining cell is after an invoke or
              * instruction that cannot change the control flow, request a hot
diff --git a/vm/compiler/Utility.c b/vm/compiler/Utility.c
index cbfea16..77a2397 100644
--- a/vm/compiler/Utility.c
+++ b/vm/compiler/Utility.c
@@ -69,7 +69,7 @@
         if (size > ARENA_DEFAULT_SIZE) {
             LOGE("Requesting %d bytes which exceed the maximal size allowed\n",
                  size);
-            return NULL;
+            dvmAbort();
         }
         /* Time to allocate a new arena */
         ArenaMemBlock *newArena = (ArenaMemBlock *)
diff --git a/vm/compiler/codegen/CompilerCodegen.h b/vm/compiler/codegen/CompilerCodegen.h
index 8f138a6..75307a7 100644
--- a/vm/compiler/codegen/CompilerCodegen.h
+++ b/vm/compiler/codegen/CompilerCodegen.h
@@ -19,6 +19,9 @@
 
 #include "compiler/CompilerIR.h"
 
+/* Maximal number of switch cases to have inline chains */
+#define MAX_CHAINED_SWITCH_CASES 64
+
 /* Work unit is architecture dependent */
 bool dvmCompilerDoWork(CompilerWorkOrder *work);
 
diff --git a/vm/compiler/codegen/arm/Codegen.c b/vm/compiler/codegen/arm/Codegen.c
index 59933b1..b37efd2 100644
--- a/vm/compiler/codegen/arm/Codegen.c
+++ b/vm/compiler/codegen/arm/Codegen.c
@@ -3285,6 +3285,135 @@
     return false;
 }
 
+/*
+ * Find the matching case.
+ *
+ * return values:
+ * r0 (low 32-bit): pc of the chaining cell corresponding to the resolved case,
+ *    including default which is placed at MIN(size, MAX_CHAINED_SWITCH_CASES).
+ * r1 (high 32-bit): the branch offset of the matching case (only for indexes
+ *    above MAX_CHAINED_SWITCH_CASES).
+ *
+ * Instructions around the call are:
+ *
+ * mov r2, pc
+ * blx &findPackedSwitchIndex
+ * mov pc, r0
+ * .align4
+ * chaining cell for case 0 [8 bytes]
+ * chaining cell for case 1 [8 bytes]
+ *               :
+ * chaining cell for case MIN(size, MAX_CHAINED_SWITCH_CASES)-1 [8 bytes]
+ * chaining cell for case default [8 bytes]
+ * noChain exit
+ */
+s8 findPackedSwitchIndex(const u2* switchData, int testVal, int pc)
+{
+    int size;
+    int firstKey;
+    const int *entries;
+    int index;
+    int jumpIndex;
+    int caseDPCOffset = 0;
+    /* In Thumb mode pc is 4 ahead of the "mov r2, pc" instruction */
+    int chainingPC = (pc + 4) & ~3;
+
+    /*
+     * Packed switch data format:
+     *  ushort ident = 0x0100   magic value
+     *  ushort size             number of entries in the table
+     *  int first_key           first (and lowest) switch case value
+     *  int targets[size]       branch targets, relative to switch opcode
+     *
+     * Total size is (4+size*2) 16-bit code units.
+     */
+    size = switchData[1];
+    assert(size > 0);
+
+    firstKey = switchData[2];
+    firstKey |= switchData[3] << 16;
+
+
+    /* The entries are guaranteed to be aligned on a 32-bit boundary;
+     * we can treat them as a native int array.
+     */
+    entries = (const int*) &switchData[4];
+    assert(((u4)entries & 0x3) == 0);
+
+    index = testVal - firstKey;
+
+    /* Jump to the default cell */
+    if (index < 0 || index >= size) {
+        jumpIndex = MIN(size, MAX_CHAINED_SWITCH_CASES);
+    /* Jump to the non-chaining exit point */
+    } else if (index >= MAX_CHAINED_SWITCH_CASES) {
+        jumpIndex = MAX_CHAINED_SWITCH_CASES + 1;
+        caseDPCOffset = entries[index];
+    /* Jump to the inline chaining cell */
+    } else {
+        jumpIndex = index;
+    }
+
+    chainingPC += jumpIndex * 8;
+    return (((s8) caseDPCOffset) << 32) | (u8) chainingPC;
+}
+
+/* See comments for findPackedSwitchIndex */
+s8 findSparseSwitchIndex(const u2* switchData, int testVal, int pc)
+{
+    int size;
+    const int *keys;
+    const int *entries;
+    int chainingPC = (pc + 4) & ~3;
+    int i;
+
+    /*
+     * Sparse switch data format:
+     *  ushort ident = 0x0200   magic value
+     *  ushort size             number of entries in the table; > 0
+     *  int keys[size]          keys, sorted low-to-high; 32-bit aligned
+     *  int targets[size]       branch targets, relative to switch opcode
+     *
+     * Total size is (2+size*4) 16-bit code units.
+     */
+
+    size = switchData[1];
+    assert(size > 0);
+
+    /* The keys are guaranteed to be aligned on a 32-bit boundary;
+     * we can treat them as a native int array.
+     */
+    keys = (const int*) &switchData[2];
+    assert(((u4)keys & 0x3) == 0);
+
+    /* The entries are guaranteed to be aligned on a 32-bit boundary;
+     * we can treat them as a native int array.
+     */
+    entries = keys + size;
+    assert(((u4)entries & 0x3) == 0);
+
+    /*
+     * Run through the list of keys, which are guaranteed to
+     * be sorted low-to-high.
+     *
+     * Most tables have 3-4 entries.  Few have more than 10.  A binary
+     * search here is probably not useful.
+     */
+    for (i = 0; i < size; i++) {
+        int k = keys[i];
+        if (k == testVal) {
+            /* MAX_CHAINED_SWITCH_CASES + 1 is the start of the overflow case */
+            int jumpIndex = (i < MAX_CHAINED_SWITCH_CASES) ?
+                           i : MAX_CHAINED_SWITCH_CASES + 1;
+            chainingPC += jumpIndex * 8;
+            return (((s8) entries[i]) << 32) | (u8) chainingPC;
+        } else if (k > testVal) {
+            break;
+        }
+    }
+    return chainingPC + MIN(size, MAX_CHAINED_SWITCH_CASES) * 8;
+}
+
 static bool handleFmt31t(CompilationUnit *cUnit, MIR *mir)
 {
     OpCode dalvikOpCode = mir->dalvikInsn.opCode;
@@ -3296,8 +3425,8 @@
             genExportPC(cUnit, mir);
             loadValueDirectFixed(cUnit, rlSrc, r0);
             loadConstant(cUnit, r2, (int)dvmInterpHandleFillArrayData);
-            loadConstant(cUnit, r1, (mir->dalvikInsn.vB << 1) +
-                 (int) (cUnit->method->insns + mir->offset));
+            loadConstant(cUnit, r1,
+               (int) (cUnit->method->insns + mir->offset + mir->dalvikInsn.vB));
             opReg(cUnit, kOpBlx, r2);
             clobberCallRegs(cUnit);
             /* generate a branch over if successful */
@@ -3312,11 +3441,9 @@
             break;
         }
         /*
-         * TODO
-         * - Add a 1 to 3-entry per-location cache here to completely
-         *   bypass the dvmInterpHandle[Packed/Sparse]Switch call w/ chaining
-         * - Use out-of-line handlers for both of these.  These ops
-         *   handle their own register allocation.
+         * Compute the goto target of up to
+         * MIN(switchSize, MAX_CHAINED_SWITCH_CASES) + 1 chaining cells.
+         * See the comment before findPackedSwitchIndex for the code layout.
          */
         case OP_PACKED_SWITCH:
         case OP_SPARSE_SWITCH: {
@@ -3324,23 +3451,24 @@
             flushAllRegs(cUnit);   /* Send everything to home location */
             loadValueDirectFixed(cUnit, rlSrc, r1);
             lockAllTemps(cUnit);
-            // Exit to the interpreter, setting up r4PC
+            const u2 *switchData =
+                cUnit->method->insns + mir->offset + mir->dalvikInsn.vB;
+            u2 size = switchData[1];
+
             if (dalvikOpCode == OP_PACKED_SWITCH) {
-                loadConstant(cUnit, r4PC, (int)dvmInterpHandlePackedSwitch);
+                loadConstant(cUnit, r4PC, (int)findPackedSwitchIndex);
             } else {
-                loadConstant(cUnit, r4PC, (int)dvmInterpHandleSparseSwitch);
+                loadConstant(cUnit, r4PC, (int)findSparseSwitchIndex);
             }
-            loadConstant(cUnit, r0, (mir->dalvikInsn.vB << 1) +
-                 (int) (cUnit->method->insns + mir->offset));
+            /* r0 <- Addr of the switch data */
+            loadConstant(cUnit, r0,
+               (int) (cUnit->method->insns + mir->offset + mir->dalvikInsn.vB));
+            /* r2 <- pc of the instruction following the blx */
+            opRegReg(cUnit, kOpMov, r2, rpc);
             opReg(cUnit, kOpBlx, r4PC);
             clobberCallRegs(cUnit);
-            loadConstant(cUnit, r1, (int)(cUnit->method->insns + mir->offset));
-            loadWordDisp(cUnit, rGLUE, offsetof(InterpState,
-                         jitToInterpEntries.dvmJitToInterpNoChain), r2);
-            opRegReg(cUnit, kOpAdd, r0, r0);
-            opRegRegReg(cUnit, kOpAdd, r4PC, r0, r1);
-            opReg(cUnit, kOpBlx, r2);
-            clobberCallRegs(cUnit);
+            /* pc <- computed goto target */
+            opRegReg(cUnit, kOpMov, rpc, r0);
             break;
         }
         default:
@@ -4532,6 +4660,22 @@
         }
     }
 
+    /*
+     * Generate the branch to the dvmJitToInterpNoChain entry point at the end
+     * of all chaining cells for the overflow cases.
+     */
+    if (cUnit->switchOverflowPad) {
+        loadConstant(cUnit, r0, (int) cUnit->switchOverflowPad);
+        loadWordDisp(cUnit, rGLUE, offsetof(InterpState,
+                     jitToInterpEntries.dvmJitToInterpNoChain), r2);
+        opRegReg(cUnit, kOpAdd, r1, r1);
+        opRegRegReg(cUnit, kOpAdd, r4PC, r0, r1);
+#if defined(EXIT_STATS)
+        loadConstant(cUnit, r0, kSwitchOverflow);
+#endif
+        opReg(cUnit, kOpBlx, r2);
+    }
+
     dvmCompilerApplyGlobalOptimizations(cUnit);
 }
 
diff --git a/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_NO_OPT.S b/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_NO_OPT.S
index ec37bf9..88b38e0 100644
--- a/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_NO_OPT.S
+++ b/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_NO_OPT.S
@@ -48,4 +48,7 @@
     str     rFP, [r2, #offThread_curFrame]  @ self->curFrame = newFp
 
     @ Start executing the callee
+#if defined(EXIT_STATS)
+    mov     r0, #kInlineCacheMiss
+#endif
     mov     pc, r10                         @ dvmJitToInterpNoChain
diff --git a/vm/compiler/template/armv5te/TEMPLATE_RETURN.S b/vm/compiler/template/armv5te/TEMPLATE_RETURN.S
index 1e4aad2..1c85b19 100644
--- a/vm/compiler/template/armv5te/TEMPLATE_RETURN.S
+++ b/vm/compiler/template/armv5te/TEMPLATE_RETURN.S
@@ -23,21 +23,24 @@
 #else
     blxeq   lr                          @ punt to interpreter and compare state
 #endif
-    ldr     r0, .LdvmJitToInterpNoChain @ defined in footer.S
+    ldr     r1, .LdvmJitToInterpNoChain @ defined in footer.S
     mov     rFP, r10                    @ publish new FP
     ldrne   r10, [r2, #offMethod_clazz] @ r10<- method->clazz
     ldr     r8, [r8]                    @ r8<- suspendCount
 
     str     r2, [rGLUE, #offGlue_method]@ glue->method = newSave->method
-    ldr     r1, [r10, #offClassObject_pDvmDex] @ r1<- method->clazz->pDvmDex
+    ldr     r0, [r10, #offClassObject_pDvmDex] @ r0<- method->clazz->pDvmDex
     str     rFP, [r3, #offThread_curFrame] @ self->curFrame = fp
     add     rPC, rPC, #6                @ publish new rPC (advance 6 bytes)
-    str     r1, [rGLUE, #offGlue_methodClassDex]
+    str     r0, [rGLUE, #offGlue_methodClassDex]
     cmp     r8, #0                      @ check the suspendCount
     movne   r9, #0                      @ clear the chaining cell address
     cmp     r9, #0                      @ chaining cell exists?
     blxne   r9                          @ jump to the chaining cell
-    mov     pc, r0                      @ callsite is interpreted
+#if defined(EXIT_STATS)
+    mov     r0, #kCallsiteInterpreted
+#endif
+    mov     pc, r1                      @ callsite is interpreted
 1:
     stmia   rGLUE, {rPC, rFP}           @ SAVE_PC_FP_TO_GLUE()
     ldr     r2, .LdvmMterpStdBail       @ defined in footer.S
diff --git a/vm/compiler/template/out/CompilerTemplateAsm-armv5te-vfp.S b/vm/compiler/template/out/CompilerTemplateAsm-armv5te-vfp.S
index 797a2fc..9ea4b04 100644
--- a/vm/compiler/template/out/CompilerTemplateAsm-armv5te-vfp.S
+++ b/vm/compiler/template/out/CompilerTemplateAsm-armv5te-vfp.S
@@ -194,21 +194,24 @@
 #else
     blxeq   lr                          @ punt to interpreter and compare state
 #endif
-    ldr     r0, .LdvmJitToInterpNoChain @ defined in footer.S
+    ldr     r1, .LdvmJitToInterpNoChain @ defined in footer.S
     mov     rFP, r10                    @ publish new FP
     ldrne   r10, [r2, #offMethod_clazz] @ r10<- method->clazz
     ldr     r8, [r8]                    @ r8<- suspendCount
 
     str     r2, [rGLUE, #offGlue_method]@ glue->method = newSave->method
-    ldr     r1, [r10, #offClassObject_pDvmDex] @ r1<- method->clazz->pDvmDex
+    ldr     r0, [r10, #offClassObject_pDvmDex] @ r0<- method->clazz->pDvmDex
     str     rFP, [r3, #offThread_curFrame] @ self->curFrame = fp
     add     rPC, rPC, #6                @ publish new rPC (advance 6 bytes)
-    str     r1, [rGLUE, #offGlue_methodClassDex]
+    str     r0, [rGLUE, #offGlue_methodClassDex]
     cmp     r8, #0                      @ check the suspendCount
     movne   r9, #0                      @ clear the chaining cell address
     cmp     r9, #0                      @ chaining cell exists?
     blxne   r9                          @ jump to the chaining cell
-    mov     pc, r0                      @ callsite is interpreted
+#if defined(EXIT_STATS)
+    mov     r0, #kCallsiteInterpreted
+#endif
+    mov     pc, r1                      @ callsite is interpreted
 1:
     stmia   rGLUE, {rPC, rFP}           @ SAVE_PC_FP_TO_GLUE()
     ldr     r2, .LdvmMterpStdBail       @ defined in footer.S
@@ -271,6 +274,9 @@
     str     rFP, [r2, #offThread_curFrame]  @ self->curFrame = newFp
 
     @ Start executing the callee
+#if defined(EXIT_STATS)
+    mov     r0, #kInlineCacheMiss
+#endif
     mov     pc, r10                         @ dvmJitToInterpNoChain
 
 /* ------------------------------ */
diff --git a/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S b/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S
index 340b05d..75388fb 100644
--- a/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S
+++ b/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S
@@ -194,21 +194,24 @@
 #else
     blxeq   lr                          @ punt to interpreter and compare state
 #endif
-    ldr     r0, .LdvmJitToInterpNoChain @ defined in footer.S
+    ldr     r1, .LdvmJitToInterpNoChain @ defined in footer.S
     mov     rFP, r10                    @ publish new FP
     ldrne   r10, [r2, #offMethod_clazz] @ r10<- method->clazz
     ldr     r8, [r8]                    @ r8<- suspendCount
 
     str     r2, [rGLUE, #offGlue_method]@ glue->method = newSave->method
-    ldr     r1, [r10, #offClassObject_pDvmDex] @ r1<- method->clazz->pDvmDex
+    ldr     r0, [r10, #offClassObject_pDvmDex] @ r0<- method->clazz->pDvmDex
     str     rFP, [r3, #offThread_curFrame] @ self->curFrame = fp
     add     rPC, rPC, #6                @ publish new rPC (advance 6 bytes)
-    str     r1, [rGLUE, #offGlue_methodClassDex]
+    str     r0, [rGLUE, #offGlue_methodClassDex]
     cmp     r8, #0                      @ check the suspendCount
     movne   r9, #0                      @ clear the chaining cell address
     cmp     r9, #0                      @ chaining cell exists?
     blxne   r9                          @ jump to the chaining cell
-    mov     pc, r0                      @ callsite is interpreted
+#if defined(EXIT_STATS)
+    mov     r0, #kCallsiteInterpreted
+#endif
+    mov     pc, r1                      @ callsite is interpreted
 1:
     stmia   rGLUE, {rPC, rFP}           @ SAVE_PC_FP_TO_GLUE()
     ldr     r2, .LdvmMterpStdBail       @ defined in footer.S
@@ -271,6 +274,9 @@
     str     rFP, [r2, #offThread_curFrame]  @ self->curFrame = newFp
 
     @ Start executing the callee
+#if defined(EXIT_STATS)
+    mov     r0, #kInlineCacheMiss
+#endif
     mov     pc, r10                         @ dvmJitToInterpNoChain
 
 /* ------------------------------ */
diff --git a/vm/compiler/template/out/CompilerTemplateAsm-armv7-a.S b/vm/compiler/template/out/CompilerTemplateAsm-armv7-a.S
index fca0d67..c59b9b0 100644
--- a/vm/compiler/template/out/CompilerTemplateAsm-armv7-a.S
+++ b/vm/compiler/template/out/CompilerTemplateAsm-armv7-a.S
@@ -194,21 +194,24 @@
 #else
     blxeq   lr                          @ punt to interpreter and compare state
 #endif
-    ldr     r0, .LdvmJitToInterpNoChain @ defined in footer.S
+    ldr     r1, .LdvmJitToInterpNoChain @ defined in footer.S
     mov     rFP, r10                    @ publish new FP
     ldrne   r10, [r2, #offMethod_clazz] @ r10<- method->clazz
     ldr     r8, [r8]                    @ r8<- suspendCount
 
     str     r2, [rGLUE, #offGlue_method]@ glue->method = newSave->method
-    ldr     r1, [r10, #offClassObject_pDvmDex] @ r1<- method->clazz->pDvmDex
+    ldr     r0, [r10, #offClassObject_pDvmDex] @ r0<- method->clazz->pDvmDex
     str     rFP, [r3, #offThread_curFrame] @ self->curFrame = fp
     add     rPC, rPC, #6                @ publish new rPC (advance 6 bytes)
-    str     r1, [rGLUE, #offGlue_methodClassDex]
+    str     r0, [rGLUE, #offGlue_methodClassDex]
     cmp     r8, #0                      @ check the suspendCount
     movne   r9, #0                      @ clear the chaining cell address
     cmp     r9, #0                      @ chaining cell exists?
     blxne   r9                          @ jump to the chaining cell
-    mov     pc, r0                      @ callsite is interpreted
+#if defined(EXIT_STATS)
+    mov     r0, #kCallsiteInterpreted
+#endif
+    mov     pc, r1                      @ callsite is interpreted
 1:
     stmia   rGLUE, {rPC, rFP}           @ SAVE_PC_FP_TO_GLUE()
     ldr     r2, .LdvmMterpStdBail       @ defined in footer.S
@@ -271,6 +274,9 @@
     str     rFP, [r2, #offThread_curFrame]  @ self->curFrame = newFp
 
     @ Start executing the callee
+#if defined(EXIT_STATS)
+    mov     r0, #kInlineCacheMiss
+#endif
     mov     pc, r10                         @ dvmJitToInterpNoChain
 
 /* ------------------------------ */
diff --git a/vm/interp/Jit.c b/vm/interp/Jit.c
index fc770d7..aaeeff6 100644
--- a/vm/interp/Jit.c
+++ b/vm/interp/Jit.c
@@ -419,21 +419,21 @@
 
 #if defined(EXIT_STATS)
 /* Convenience function to increment counter from assembly code */
-void dvmBumpNoChain()
+void dvmBumpNoChain(int from)
 {
-    gDvm.jitNoChainExit++;
+    gDvmJit.noChainExit[from]++;
 }
 
 /* Convenience function to increment counter from assembly code */
 void dvmBumpNormal()
 {
-    gDvm.jitNormalExit++;
+    gDvmJit.normalExit++;
 }
 
 /* Convenience function to increment counter from assembly code */
 void dvmBumpPunt(int from)
 {
-    gDvm.jitPuntExit++;
+    gDvmJit.puntExit++;
 }
 #endif
 
@@ -461,9 +461,14 @@
          gDvmJit.threshold, gDvmJit.blockingMode ? "Blocking" : "Non-blocking");
 #if defined(EXIT_STATS)
         LOGD(
-         "JIT: Lookups: %d hits, %d misses; %d NoChain, %d normal, %d punt",
+         "JIT: Lookups: %d hits, %d misses; %d normal, %d punt",
          gDvmJit.addrLookupsFound, gDvmJit.addrLookupsNotFound,
-         gDvmJit.noChainExit, gDvmJit.normalExit, gDvmJit.puntExit);
+         gDvmJit.normalExit, gDvmJit.puntExit);
+        LOGD(
+         "JIT: noChainExit: %d IC miss, %d interp callsite, %d switch overflow",
+         gDvmJit.noChainExit[kInlineCacheMiss],
+         gDvmJit.noChainExit[kCallsiteInterpreted],
+         gDvmJit.noChainExit[kSwitchOverflow]);
 #endif
         LOGD("JIT: %d Translation chains", gDvmJit.translationChains);
 #if defined(INVOKE_STATS)
@@ -543,6 +548,19 @@
             if (lastPC == NULL) break;
             /* Grow the trace around the last PC if jitState is kJitTSelect */
             dexDecodeInstruction(gDvm.instrFormat, lastPC, &decInsn);
+
+            /*
+             * Treat {PACKED,SPARSE}_SWITCH as trace-ending instructions due
+             * to the amount of space it takes to generate the chaining
+             * cells.
+             */
+            if (interpState->totalTraceLen != 0 &&
+                (decInsn.opCode == OP_PACKED_SWITCH ||
+                 decInsn.opCode == OP_SPARSE_SWITCH)) {
+                interpState->jitState = kJitTSelectEnd;
+                break;
+            }
+
 #if defined(SHOW_TRACE)
             LOGD("TraceGen: adding %s",getOpcodeName(decInsn.opCode));
 #endif
diff --git a/vm/mterp/common/asm-constants.h b/vm/mterp/common/asm-constants.h
index bb6246b..e7efc6b 100644
--- a/vm/mterp/common/asm-constants.h
+++ b/vm/mterp/common/asm-constants.h
@@ -235,6 +235,14 @@
 MTERP_CONSTANT(STRING_FIELDOFF_OFFSET,   16)
 MTERP_CONSTANT(STRING_FIELDOFF_COUNT,    20)
 
+/*
+ * Reasons for the non-chaining interpreter entry points
+ * Enums defined in vm/Globals.h
+ */
+MTERP_CONSTANT(kInlineCacheMiss,        0)
+MTERP_CONSTANT(kCallsiteInterpreted,    1)
+MTERP_CONSTANT(kSwitchOverflow,         2)
+
 /* ClassObject fields */
 MTERP_OFFSET(offClassObject_descriptor, ClassObject, descriptor, 24)
 MTERP_OFFSET(offClassObject_accessFlags, ClassObject, accessFlags, 32)