Progress on live-precise GC.

This implements computation of register liveness.  This is still a work
in progress.  The computation is disabled by default, and when enabled
it's not yet used during the generation of register maps.  The code
has not been thoughly tested.

While working on this I fiddled around with the verifier's verbose
debugging stuff a bit.

This also changes some stuff in BitVector.  Unsigned ints are now
prevalent, and functions like dvmSetBit abort rather than returning a
boolean value when an illegal operation is attempted.  (Some parallel
functions in the compiler were also updated.)

Bug 2534655

Change-Id: Iea161c6d63a310e1dbdac2aeeb7b7aeadda8807c
diff --git a/vm/BitVector.c b/vm/BitVector.c
index 04c3d3f..b3d36ca 100644
--- a/vm/BitVector.c
+++ b/vm/BitVector.c
@@ -28,13 +28,12 @@
  * Allocate a bit vector with enough space to hold at least the specified
  * number of bits.
  */
-BitVector* dvmAllocBitVector(int startBits, bool expandable)
+BitVector* dvmAllocBitVector(unsigned int startBits, bool expandable)
 {
     BitVector* bv;
-    int count;
+    unsigned int count;
 
     assert(sizeof(bv->storage[0]) == 4);        /* assuming 32-bit units */
-    assert(startBits >= 0);
 
     bv = (BitVector*) malloc(sizeof(BitVector));
 
@@ -68,7 +67,7 @@
  */
 int dvmAllocBit(BitVector* pBits)
 {
-    int word, bit;
+    unsigned int word, bit;
 
 retry:
     for (word = 0; word < pBits->storageSize; word++) {
@@ -77,7 +76,7 @@
              * There are unallocated bits in this word.  Return the first.
              */
             bit = ffs(~(pBits->storage[word])) -1;
-            assert(bit >= 0 && bit < 32);
+            assert(bit < 32);
             pBits->storage[word] |= 1 << bit;
             return (word << 5) | bit;
         }
@@ -99,36 +98,38 @@
 
 /*
  * Mark the specified bit as "set".
- *
- * Returns "false" if the bit is outside the range of the vector and we're
- * not allowed to expand.
  */
-bool dvmSetBit(BitVector* pBits, int num)
+void dvmSetBit(BitVector* pBits, unsigned int num)
 {
-    assert(num >= 0);
-    if (num >= pBits->storageSize * (int)sizeof(u4) * 8) {
-        if (!pBits->expandable)
-            return false;
+    if (num >= pBits->storageSize * sizeof(u4) * 8) {
+        if (!pBits->expandable) {
+            LOGE("Attempt to set bit outside valid range (%d, limit is %d)\n",
+                num, pBits->storageSize * sizeof(u4) * 8);
+            dvmAbort();
+        }
 
         /* Round up to word boundaries for "num+1" bits */
-        int newSize = (num + 1 + 31) >> 5;
+        unsigned int newSize = (num + 1 + 31) >> 5;
         assert(newSize > pBits->storageSize);
         pBits->storage = (u4*)realloc(pBits->storage, newSize * sizeof(u4));
+        if (pBits->storage == NULL) {
+            LOGE("BitVector expansion to %d failed\n", newSize * sizeof(u4));
+            dvmAbort();
+        }
         memset(&pBits->storage[pBits->storageSize], 0x00,
             (newSize - pBits->storageSize) * sizeof(u4));
         pBits->storageSize = newSize;
     }
 
     pBits->storage[num >> 5] |= 1 << (num & 0x1f);
-    return true;
 }
 
 /*
  * Mark the specified bit as "clear".
  */
-void dvmClearBit(BitVector* pBits, int num)
+void dvmClearBit(BitVector* pBits, unsigned int num)
 {
-    assert(num >= 0 && num < (int) pBits->storageSize * (int)sizeof(u4) * 8);
+    assert(num < pBits->storageSize * sizeof(u4) * 8);
 
     pBits->storage[num >> 5] &= ~(1 << (num & 0x1f));
 }
@@ -138,7 +139,7 @@
  */
 void dvmClearAllBits(BitVector* pBits)
 {
-    int count = pBits->storageSize;
+    unsigned int count = pBits->storageSize;
     memset(pBits->storage, 0, count * sizeof(u4));
 }
 
@@ -147,27 +148,27 @@
  * since there might be unused bits - setting those to one will confuse the
  * iterator.
  */
-void dvmSetInitialBits(BitVector* pBits, int numBits)
+void dvmSetInitialBits(BitVector* pBits, unsigned int numBits)
 {
-    int i;
+    unsigned int idx;
     assert(((numBits + 31) >> 5) <= pBits->storageSize);
-    for (i = 0; i < (numBits >> 5); i++) {
-        pBits->storage[i] = -1;
+    for (idx = 0; idx < (numBits >> 5); idx++) {
+        pBits->storage[idx] = -1;
     }
-    int remNumBits = numBits & 0x1f;
+    unsigned int remNumBits = numBits & 0x1f;
     if (remNumBits) {
-        pBits->storage[i] = (1 << remNumBits) - 1;
+        pBits->storage[idx] = (1 << remNumBits) - 1;
     }
 }
 
 /*
  * Determine whether or not the specified bit is set.
  */
-bool dvmIsBitSet(const BitVector* pBits, int num)
+bool dvmIsBitSet(const BitVector* pBits, unsigned int num)
 {
-    assert(num >= 0 && num < (int) pBits->storageSize * (int)sizeof(u4) * 8);
+    assert(num < pBits->storageSize * sizeof(u4) * 8);
 
-    int val = pBits->storage[num >> 5] & (1 << (num & 0x1f));
+    unsigned int val = pBits->storage[num >> 5] & (1 << (num & 0x1f));
     return (val != 0);
 }
 
@@ -176,8 +177,8 @@
  */
 int dvmCountSetBits(const BitVector* pBits)
 {
-    int word;
-    int count = 0;
+    unsigned int word;
+    unsigned int count = 0;
 
     for (word = 0; word < pBits->storageSize; word++) {
         u4 val = pBits->storage[word];
@@ -199,16 +200,27 @@
 }
 
 /*
- * Copy a whole vector to the other. Only do that when the both vectors have
- * the same size and attribute.
+ * If the vector sizes don't match, log an error and abort.
  */
-bool dvmCopyBitVector(BitVector *dest, const BitVector *src)
+static void checkSizes(const BitVector* bv1, const BitVector* bv2)
 {
-    if (dest->storageSize != src->storageSize ||
-        dest->expandable != src->expandable)
-        return false;
+    if (bv1->storageSize != bv2->storageSize) {
+        LOGE("Mismatched vector sizes (%d, %d)\n",
+            bv1->storageSize, bv2->storageSize);
+        dvmAbort();
+    }
+}
+
+/*
+ * Copy a whole vector to the other. Only do that when the both vectors have
+ * the same size.
+ */
+void dvmCopyBitVector(BitVector *dest, const BitVector *src)
+{
+    /* if dest is expandable and < src, we could expand dest to match */
+    checkSizes(dest, src);
+
     memcpy(dest->storage, src->storage, sizeof(u4) * dest->storageSize);
-    return true;
 }
 
 /*
@@ -223,9 +235,9 @@
         dest->expandable != src2->expandable)
         return false;
 
-    int i;
-    for (i = 0; i < dest->storageSize; i++) {
-        dest->storage[i] = src1->storage[i] & src2->storage[i];
+    unsigned int idx;
+    for (idx = 0; idx < dest->storageSize; idx++) {
+        dest->storage[idx] = src1->storage[idx] & src2->storage[idx];
     }
     return true;
 }
@@ -242,9 +254,9 @@
         dest->expandable != src2->expandable)
         return false;
 
-    int i;
-    for (i = 0; i < dest->storageSize; i++) {
-        dest->storage[i] = src1->storage[i] | src2->storage[i];
+    unsigned int idx;
+    for (idx = 0; idx < dest->storageSize; idx++) {
+        dest->storage[idx] = src1->storage[idx] | src2->storage[idx];
     }
     return true;
 }
@@ -258,9 +270,9 @@
         src1->expandable != src2->expandable)
         return true;
 
-    int i;
-    for (i = 0; i < src1->storageSize; i++) {
-        if (src1->storage[i] != src2->storage[i]) return true;
+    unsigned int idx;
+    for (idx = 0; idx < src1->storageSize; idx++) {
+        if (src1->storage[idx] != src2->storage[idx]) return true;
     }
     return false;
 }
@@ -283,8 +295,8 @@
     if (bitIndex >= iterator->bitSize) return -1;
 
     for (; bitIndex < iterator->bitSize; bitIndex++) {
-        int wordIndex = bitIndex >> 5;
-        int mask = 1 << (bitIndex & 0x1f);
+        unsigned int wordIndex = bitIndex >> 5;
+        unsigned int mask = 1 << (bitIndex & 0x1f);
         if (pBits->storage[wordIndex] & mask) {
             iterator->idx = bitIndex+1;
             return bitIndex;
@@ -293,3 +305,28 @@
     /* No more set bits */
     return -1;
 }
+
+
+/*
+ * Merge the contents of "src" into "dst", checking to see if this causes
+ * any changes to occur.  This is a logical OR.
+ *
+ * Returns "true" if the contents of the destination vector were modified.
+ */
+bool dvmCheckMergeBitVectors(BitVector* dst, const BitVector* src)
+{
+    bool changed = false;
+
+    checkSizes(dst, src);
+
+    unsigned int idx;
+    for (idx = 0; idx < dst->storageSize; idx++) {
+        u4 merged = src->storage[idx] | dst->storage[idx];
+        if (dst->storage[idx] != merged) {
+            dst->storage[idx] = merged;
+            changed = true;
+        }
+    }
+
+    return changed;
+}
diff --git a/vm/BitVector.h b/vm/BitVector.h
index 3c926b1..d1a0ca3 100644
--- a/vm/BitVector.h
+++ b/vm/BitVector.h
@@ -28,7 +28,7 @@
  */
 typedef struct BitVector {
     bool    expandable;     /* expand bitmap if we run out? */
-    int     storageSize;    /* current size, in 32-bit words */
+    u4      storageSize;    /* current size, in 32-bit words */
     u4*     storage;
 } BitVector;
 
@@ -40,7 +40,7 @@
 } BitVectorIterator;
 
 /* allocate a bit vector with enough space to hold "startBits" bits */
-BitVector* dvmAllocBitVector(int startBits, bool expandable);
+BitVector* dvmAllocBitVector(unsigned int startBits, bool expandable);
 void dvmFreeBitVector(BitVector* pBits);
 
 /*
@@ -49,24 +49,25 @@
  * returns -1.
  *
  * dvmSetBit sets the specified bit, expanding the vector if necessary
- * (and possible).
+ * (and possible).  Attempting to set a bit past the limit of a non-expandable
+ * bit vector will cause a fatal error.
  *
  * dvmSetInitialBits sets all bits in [0..numBits-1]. Won't expand the vector.
  *
  * dvmIsBitSet returns "true" if the bit is set.
  */
 int dvmAllocBit(BitVector* pBits);
-bool dvmSetBit(BitVector* pBits, int num);
-void dvmClearBit(BitVector* pBits, int num);
+void dvmSetBit(BitVector* pBits, unsigned int num);
+void dvmClearBit(BitVector* pBits, unsigned int num);
 void dvmClearAllBits(BitVector* pBits);
-void dvmSetInitialBits(BitVector* pBits, int numBits);
-bool dvmIsBitSet(const BitVector* pBits, int num);
+void dvmSetInitialBits(BitVector* pBits, unsigned int numBits);
+bool dvmIsBitSet(const BitVector* pBits, unsigned int num);
 
 /* count the number of bits that have been set */
 int dvmCountSetBits(const BitVector* pBits);
 
-/* copy one vector to the other compatible one */
-bool dvmCopyBitVector(BitVector *dest, const BitVector *src);
+/* copy one vector to another of equal size */
+void dvmCopyBitVector(BitVector *dest, const BitVector *src);
 
 /*
  * Intersect two bit vectors and store the result to the dest vector.
@@ -81,6 +82,14 @@
                         const BitVector *src2);
 
 /*
+ * Merge the contents of "src" into "dst", checking to see if this causes
+ * any changes to occur.
+ *
+ * Returns "true" if the contents of the destination vector were modified.
+ */
+bool dvmCheckMergeBitVectors(BitVector* dst, const BitVector* src);
+
+/*
  * Compare two bit vectors and return true if difference is seen.
  */
 bool dvmCompareBitVectors(const BitVector *src1, const BitVector *src2);
diff --git a/vm/Dvm.mk b/vm/Dvm.mk
index 4711ffd..dcd70be 100644
--- a/vm/Dvm.mk
+++ b/vm/Dvm.mk
@@ -136,6 +136,7 @@
 	alloc/DdmHeap.c \
 	alloc/Verify.c \
 	alloc/Visit.c \
+	analysis/BackwardFlow.c \
 	analysis/CodeVerify.c \
 	analysis/DexPrepare.c \
 	analysis/DexVerify.c \
diff --git a/vm/analysis/BackwardFlow.c b/vm/analysis/BackwardFlow.c
new file mode 100644
index 0000000..d498410
--- /dev/null
+++ b/vm/analysis/BackwardFlow.c
@@ -0,0 +1,755 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Backward analysis for Dalvik bytecode.
+ */
+#include "Dalvik.h"
+#include "analysis/BackwardFlow.h"
+#include "analysis/CodeVerify.h"
+
+static bool processInstruction(VerifierData* vdata, u4 curIdx,
+    BitVector* workBits);
+static void dumpLiveState(const VerifierData* vdata, u4 curIdx,
+    const BitVector* workBits);
+
+
+/*
+ * Create a table of instruction widths that indicate the width of the
+ * *previous* instruction.  The values are copied from the width table
+ * in "vdata", not derived from the instruction stream.
+ *
+ * Caller must free the return value.
+ */
+static InstructionWidth* createBackwardWidthTable(VerifierData* vdata)
+{
+    InstructionWidth* widths;
+
+    widths = (InstructionWidth*)
+            calloc(vdata->insnsSize, sizeof(InstructionWidth));
+    if (widths == NULL)
+        return NULL;
+
+    unsigned int idx;
+    u4 insnWidth = 0;
+    for (idx = 0; idx < vdata->insnsSize; ) {
+        widths[idx] = insnWidth;
+        insnWidth = dvmInsnGetWidth(vdata->insnFlags, idx);
+        idx += insnWidth;
+    }
+
+    return widths;
+}
+
+/*
+ * Compute the "liveness" of every register at all GC points.
+ */
+bool dvmComputeLiveness(VerifierData* vdata)
+{
+    const InsnFlags* insnFlags = vdata->insnFlags;
+    InstructionWidth* backwardWidth;
+    VfyBasicBlock* startGuess = NULL;
+    BitVector* workBits;
+    bool result = false;
+
+    bool verbose = false; //= dvmWantVerboseVerification(vdata->method);
+    if (verbose) {
+        const Method* meth = vdata->method;
+        LOGI("Computing liveness for %s.%s:%s\n",
+            meth->clazz->descriptor, meth->name, meth->shorty);
+    }
+
+    assert(vdata->registerLines != NULL);
+
+    backwardWidth = createBackwardWidthTable(vdata);
+    if (backwardWidth == NULL)
+        goto bail;
+
+    /*
+     * Allocate space for intra-block work set.  Does not include space
+     * for method result "registers", which aren't visible to the GC.
+     * (They would be made live by move-result and then die on the
+     * instruction immediately before it.)
+     */
+    workBits = dvmAllocBitVector(vdata->insnRegCount, false);
+    if (workBits == NULL)
+        goto bail;
+
+    /*
+     * We continue until all blocks have been visited, and no block
+     * requires further attention ("visited" is set and "changed" is
+     * clear).
+     *
+     * TODO: consider creating a "dense" array of basic blocks to make
+     * the walking faster.
+     */
+    int iter = 0;
+    while (true) {
+        VfyBasicBlock* workBlock = NULL;
+        unsigned int idx;
+
+        if (iter++ > 100000) {
+            LOG_VFY_METH(vdata->method, "oh dear");
+            dvmAbort();
+        }
+
+        /*
+         * If a block is marked "changed", we stop and handle it.  If it
+         * just hasn't been visited yet, we remember it but keep searching
+         * for one that has been changed.
+         *
+         * The thought here is that this is more likely to let us work
+         * from end to start, which reduces the amount of re-evaluation
+         * required (both by using "changed" as a work list, and by picking
+         * un-visited blocks from the tail end of the method).
+         */
+        if (startGuess != NULL) {
+            assert(startGuess->changed);
+            workBlock = startGuess;
+        } else {
+            for (idx = 0; idx < vdata->insnsSize; idx++) {
+                VfyBasicBlock* block = vdata->basicBlocks[idx];
+                if (block == NULL)
+                    continue;
+
+                if (block->changed) {
+                    workBlock = block;
+                    break;
+                } else if (!block->visited) {
+                    workBlock = block;
+                }
+            }
+        }
+
+        if (workBlock == NULL) {
+            /* all done */
+            break;
+        }
+
+        assert(workBlock->changed || !workBlock->visited);
+        startGuess = NULL;
+
+        /*
+         * Load work bits.  These represent the liveness of registers
+         * after the last instruction in the block has finished executing.
+         */
+        assert(workBlock->liveRegs != NULL);
+        dvmCopyBitVector(workBits, workBlock->liveRegs);
+        if (verbose) {
+            LOGI("Loaded work bits from last=0x%04x\n", workBlock->lastAddr);
+            dumpLiveState(vdata, 0xfffd, workBlock->liveRegs);
+            dumpLiveState(vdata, 0xffff, workBits);
+        }
+
+        /*
+         * Process a single basic block.
+         *
+         * If this instruction is a GC point, we want to save the result
+         * in the RegisterLine.
+         *
+         * We don't break basic blocks on every GC point -- in particular,
+         * instructions that might throw but have no "try" block don't
+         * end a basic block -- so there could be more than one GC point
+         * in a given basic block.
+         *
+         * We could change this, but it turns out to be not all that useful.
+         * At first glance it appears that we could share the liveness bit
+         * vector between the basic block struct and the register line,
+         * but the basic block needs to reflect the state *after* the
+         * instruction has finished, while the GC points need to describe
+         * the state before the instruction starts.
+         */
+        u4 curIdx = workBlock->lastAddr;
+        while (true) {
+            if (!processInstruction(vdata, curIdx, workBits))
+                goto bail;
+
+            if (verbose) {
+                dumpLiveState(vdata, curIdx + 0x8000, workBits);
+            }
+
+            if (dvmInsnIsGcPoint(insnFlags, curIdx)) {
+                BitVector* lineBits = vdata->registerLines[curIdx].liveRegs;
+                if (lineBits == NULL) {
+                    lineBits = vdata->registerLines[curIdx].liveRegs =
+                        dvmAllocBitVector(vdata->insnRegCount, false);
+                }
+                dvmCopyBitVector(lineBits, workBits);
+            }
+
+            if (curIdx == workBlock->firstAddr)
+                break;
+            assert(curIdx >= backwardWidth[curIdx]);
+            curIdx -= backwardWidth[curIdx];
+        }
+
+        workBlock->visited = true;
+        workBlock->changed = false;
+
+        if (verbose) {
+            dumpLiveState(vdata, curIdx, workBits);
+        }
+
+        /*
+         * Merge changes to all predecessors.  If the new bits don't match
+         * the old bits, set the "changed" flag.
+         */
+        PointerSet* preds = workBlock->predecessors;
+        size_t numPreds = dvmPointerSetGetCount(preds);
+        unsigned int predIdx;
+
+        for (predIdx = 0; predIdx < numPreds; predIdx++) {
+            VfyBasicBlock* pred =
+                    (VfyBasicBlock*) dvmPointerSetGetEntry(preds, predIdx);
+
+            pred->changed = dvmCheckMergeBitVectors(pred->liveRegs, workBits);
+            if (verbose) {
+                LOGI("merging cur=%04x into pred last=%04x (ch=%d)\n",
+                    curIdx, pred->lastAddr, pred->changed);
+                dumpLiveState(vdata, 0xfffa, pred->liveRegs);
+                dumpLiveState(vdata, 0xfffb, workBits);
+            }
+
+            /*
+             * We want to set the "changed" flag on unvisited predecessors
+             * as a way of guiding the verifier through basic blocks in
+             * a reasonable order.  We can't count on variable liveness
+             * changing, so we force "changed" to true even if it hasn't.
+             */
+            if (!pred->visited)
+                pred->changed = true;
+
+            /*
+             * Keep track of one of the changed blocks so we can start
+             * there instead of having to scan through the list.
+             */
+            if (pred->changed)
+                startGuess = pred;
+        }
+    }
+
+#ifndef NDEBUG
+    /*
+     * Sanity check: verify that all GC point register lines have a
+     * liveness bit vector allocated.  Also, we're not expecting non-GC
+     * points to have them.
+     */
+    u4 checkIdx;
+    for (checkIdx = 0; checkIdx < vdata->insnsSize; ) {
+        if (dvmInsnIsGcPoint(insnFlags, checkIdx)) {
+            if (vdata->registerLines[checkIdx].liveRegs == NULL) {
+                LOG_VFY_METH(vdata->method,
+                    "GLITCH: no liveRegs for GC point 0x%04x\n", checkIdx);
+                dvmAbort();
+            }
+        } else if (vdata->registerLines[checkIdx].liveRegs != NULL) {
+            LOG_VFY_METH(vdata->method,
+                "GLITCH: liveRegs for non-GC point 0x%04x\n", checkIdx);
+            dvmAbort();
+        }
+        u4 insnWidth = dvmInsnGetWidth(insnFlags, checkIdx);
+        checkIdx += insnWidth;
+    }
+#endif
+
+    result = true;
+
+bail:
+    free(backwardWidth);
+    return result;
+}
+
+
+/*
+ * Add a register to the LIVE set.
+ */
+static inline void GEN(BitVector* workBits, u4 regIndex)
+{
+    dvmSetBit(workBits, regIndex);
+}
+
+/*
+ * Add a register pair to the LIVE set.
+ */
+static inline void GENW(BitVector* workBits, u4 regIndex)
+{
+    dvmSetBit(workBits, regIndex);
+    dvmSetBit(workBits, regIndex+1);
+}
+
+/*
+ * Remove a register from the LIVE set.
+ */
+static inline void KILL(BitVector* workBits, u4 regIndex)
+{
+    dvmClearBit(workBits, regIndex);
+}
+
+/*
+ * Remove a register pair from the LIVE set.
+ */
+static inline void KILLW(BitVector* workBits, u4 regIndex)
+{
+    dvmClearBit(workBits, regIndex);
+    dvmClearBit(workBits, regIndex+1);
+}
+
+/*
+ * Process a single instruction.
+ *
+ * Returns "false" if something goes fatally wrong.
+ */
+static bool processInstruction(VerifierData* vdata, u4 insnIdx,
+    BitVector* workBits)
+{
+    const Method* meth = vdata->method;
+    const u2* insns = meth->insns + insnIdx;
+    DecodedInstruction decInsn;
+
+    dexDecodeInstruction(insns, &decInsn);
+
+    /*
+     * Add registers to the "GEN" or "KILL" sets.  We want to do KILL
+     * before GEN to handle cases where the source and destination
+     * register is the same.
+     */
+    switch (decInsn.opcode) {
+    case OP_NOP:
+    case OP_RETURN_VOID:
+    case OP_GOTO:
+    case OP_GOTO_16:
+    case OP_GOTO_32:
+        /* no registers are used */
+        break;
+
+    case OP_RETURN:
+    case OP_RETURN_OBJECT:
+    case OP_MONITOR_ENTER:
+    case OP_MONITOR_EXIT:
+    case OP_CHECK_CAST:
+    case OP_THROW:
+    case OP_PACKED_SWITCH:
+    case OP_SPARSE_SWITCH:
+    case OP_FILL_ARRAY_DATA:
+    case OP_IF_EQZ:
+    case OP_IF_NEZ:
+    case OP_IF_LTZ:
+    case OP_IF_GEZ:
+    case OP_IF_GTZ:
+    case OP_IF_LEZ:
+    case OP_SPUT:
+    case OP_SPUT_VOLATILE:
+    case OP_SPUT_BOOLEAN:
+    case OP_SPUT_BYTE:
+    case OP_SPUT_CHAR:
+    case OP_SPUT_SHORT:
+    case OP_SPUT_OBJECT:
+    case OP_SPUT_OBJECT_VOLATILE:
+        /* action <- vA */
+        GEN(workBits, decInsn.vA);
+        break;
+
+    case OP_RETURN_WIDE:
+    case OP_SPUT_WIDE:
+    case OP_SPUT_WIDE_VOLATILE:
+        /* action <- vA(wide) */
+        GENW(workBits, decInsn.vA);
+        break;
+
+    case OP_IF_EQ:
+    case OP_IF_NE:
+    case OP_IF_LT:
+    case OP_IF_GE:
+    case OP_IF_GT:
+    case OP_IF_LE:
+    case OP_IPUT:
+    case OP_IPUT_VOLATILE:
+    case OP_IPUT_BOOLEAN:
+    case OP_IPUT_BYTE:
+    case OP_IPUT_CHAR:
+    case OP_IPUT_SHORT:
+    case OP_IPUT_OBJECT:
+    case OP_IPUT_OBJECT_VOLATILE:
+        /* action <- vA, vB */
+        GEN(workBits, decInsn.vA);
+        GEN(workBits, decInsn.vB);
+        break;
+
+    case OP_IPUT_WIDE:
+    case OP_IPUT_WIDE_VOLATILE:
+        /* action <- vA(wide), vB */
+        GENW(workBits, decInsn.vA);
+        GEN(workBits, decInsn.vB);
+        break;
+
+    case OP_APUT:
+    case OP_APUT_BOOLEAN:
+    case OP_APUT_BYTE:
+    case OP_APUT_CHAR:
+    case OP_APUT_SHORT:
+    case OP_APUT_OBJECT:
+        /* action <- vA, vB, vC */
+        GEN(workBits, decInsn.vA);
+        GEN(workBits, decInsn.vB);
+        GEN(workBits, decInsn.vC);
+        break;
+
+    case OP_APUT_WIDE:
+        /* action <- vA(wide), vB, vC */
+        GENW(workBits, decInsn.vA);
+        GEN(workBits, decInsn.vB);
+        GEN(workBits, decInsn.vC);
+        break;
+
+    case OP_FILLED_NEW_ARRAY:
+    case OP_INVOKE_VIRTUAL:
+    case OP_INVOKE_SUPER:
+    case OP_INVOKE_DIRECT:
+    case OP_INVOKE_STATIC:
+    case OP_INVOKE_INTERFACE:
+        /* action <- vararg */
+        {
+            unsigned int idx;
+            for (idx = 0; idx < decInsn.vA; idx++) {
+                GEN(workBits, decInsn.arg[idx]);
+            }
+        }
+        break;
+
+    case OP_FILLED_NEW_ARRAY_RANGE:
+    case OP_INVOKE_VIRTUAL_RANGE:
+    case OP_INVOKE_SUPER_RANGE:
+    case OP_INVOKE_DIRECT_RANGE:
+    case OP_INVOKE_STATIC_RANGE:
+    case OP_INVOKE_INTERFACE_RANGE:
+        /* action <- vararg/range */
+        {
+            unsigned int idx;
+            for (idx = 0; idx < decInsn.vA; idx++) {
+                GEN(workBits, decInsn.vC + idx);
+            }
+        }
+        break;
+
+    case OP_MOVE_RESULT:
+    case OP_MOVE_RESULT_WIDE:
+    case OP_MOVE_RESULT_OBJECT:
+    case OP_MOVE_EXCEPTION:
+    case OP_CONST_4:
+    case OP_CONST_16:
+    case OP_CONST:
+    case OP_CONST_HIGH16:
+    case OP_CONST_STRING:
+    case OP_CONST_STRING_JUMBO:
+    case OP_CONST_CLASS:
+    case OP_NEW_INSTANCE:
+    case OP_SGET:
+    case OP_SGET_VOLATILE:
+    case OP_SGET_BOOLEAN:
+    case OP_SGET_BYTE:
+    case OP_SGET_CHAR:
+    case OP_SGET_SHORT:
+    case OP_SGET_OBJECT:
+    case OP_SGET_OBJECT_VOLATILE:
+        /* vA <- value */
+        KILL(workBits, decInsn.vA);
+        break;
+
+    case OP_CONST_WIDE_16:
+    case OP_CONST_WIDE_32:
+    case OP_CONST_WIDE:
+    case OP_CONST_WIDE_HIGH16:
+    case OP_SGET_WIDE:
+    case OP_SGET_WIDE_VOLATILE:
+        /* vA(wide) <- value */
+        KILLW(workBits, decInsn.vA);
+        break;
+
+    case OP_MOVE:
+    case OP_MOVE_FROM16:
+    case OP_MOVE_16:
+    case OP_MOVE_OBJECT:
+    case OP_MOVE_OBJECT_FROM16:
+    case OP_MOVE_OBJECT_16:
+    case OP_INSTANCE_OF:
+    case OP_ARRAY_LENGTH:
+    case OP_NEW_ARRAY:
+    case OP_IGET:
+    case OP_IGET_VOLATILE:
+    case OP_IGET_BOOLEAN:
+    case OP_IGET_BYTE:
+    case OP_IGET_CHAR:
+    case OP_IGET_SHORT:
+    case OP_IGET_OBJECT:
+    case OP_IGET_OBJECT_VOLATILE:
+    case OP_NEG_INT:
+    case OP_NOT_INT:
+    case OP_NEG_FLOAT:
+    case OP_INT_TO_FLOAT:
+    case OP_FLOAT_TO_INT:
+    case OP_INT_TO_BYTE:
+    case OP_INT_TO_CHAR:
+    case OP_INT_TO_SHORT:
+    case OP_ADD_INT_LIT16:
+    case OP_RSUB_INT:
+    case OP_MUL_INT_LIT16:
+    case OP_DIV_INT_LIT16:
+    case OP_REM_INT_LIT16:
+    case OP_AND_INT_LIT16:
+    case OP_OR_INT_LIT16:
+    case OP_XOR_INT_LIT16:
+    case OP_ADD_INT_LIT8:
+    case OP_RSUB_INT_LIT8:
+    case OP_MUL_INT_LIT8:
+    case OP_DIV_INT_LIT8:
+    case OP_REM_INT_LIT8:
+    case OP_SHL_INT_LIT8:
+    case OP_SHR_INT_LIT8:
+    case OP_USHR_INT_LIT8:
+    case OP_AND_INT_LIT8:
+    case OP_OR_INT_LIT8:
+    case OP_XOR_INT_LIT8:
+        /* vA <- vB */
+        KILL(workBits, decInsn.vA);
+        GEN(workBits, decInsn.vB);
+        break;
+
+    case OP_IGET_WIDE:
+    case OP_IGET_WIDE_VOLATILE:
+    case OP_INT_TO_LONG:
+    case OP_INT_TO_DOUBLE:
+    case OP_FLOAT_TO_LONG:
+    case OP_FLOAT_TO_DOUBLE:
+        /* vA(wide) <- vB */
+        KILLW(workBits, decInsn.vA);
+        GEN(workBits, decInsn.vB);
+        break;
+
+    case OP_LONG_TO_INT:
+    case OP_LONG_TO_FLOAT:
+    case OP_DOUBLE_TO_INT:
+    case OP_DOUBLE_TO_FLOAT:
+        /* vA <- vB(wide) */
+        KILL(workBits, decInsn.vA);
+        GENW(workBits, decInsn.vB);
+        break;
+
+    case OP_MOVE_WIDE:
+    case OP_MOVE_WIDE_FROM16:
+    case OP_MOVE_WIDE_16:
+    case OP_NEG_LONG:
+    case OP_NOT_LONG:
+    case OP_NEG_DOUBLE:
+    case OP_LONG_TO_DOUBLE:
+    case OP_DOUBLE_TO_LONG:
+        /* vA(wide) <- vB(wide) */
+        KILLW(workBits, decInsn.vA);
+        GENW(workBits, decInsn.vB);
+        break;
+
+    case OP_CMPL_FLOAT:
+    case OP_CMPG_FLOAT:
+    case OP_AGET:
+    case OP_AGET_BOOLEAN:
+    case OP_AGET_BYTE:
+    case OP_AGET_CHAR:
+    case OP_AGET_SHORT:
+    case OP_AGET_OBJECT:
+    case OP_ADD_INT:
+    case OP_SUB_INT:
+    case OP_MUL_INT:
+    case OP_REM_INT:
+    case OP_DIV_INT:
+    case OP_AND_INT:
+    case OP_OR_INT:
+    case OP_XOR_INT:
+    case OP_SHL_INT:
+    case OP_SHR_INT:
+    case OP_USHR_INT:
+    case OP_ADD_FLOAT:
+    case OP_SUB_FLOAT:
+    case OP_MUL_FLOAT:
+    case OP_DIV_FLOAT:
+    case OP_REM_FLOAT:
+        /* vA <- vB, vC */
+        KILL(workBits, decInsn.vA);
+        GEN(workBits, decInsn.vB);
+        GEN(workBits, decInsn.vC);
+        break;
+
+    case OP_AGET_WIDE:
+        /* vA(wide) <- vB, vC */
+        KILLW(workBits, decInsn.vA);
+        GEN(workBits, decInsn.vB);
+        GEN(workBits, decInsn.vC);
+        break;
+
+    case OP_CMPL_DOUBLE:
+    case OP_CMPG_DOUBLE:
+    case OP_CMP_LONG:
+        /* vA <- vB(wide), vC(wide) */
+        KILL(workBits, decInsn.vA);
+        GENW(workBits, decInsn.vB);
+        GENW(workBits, decInsn.vC);
+        break;
+
+    case OP_SHL_LONG:
+    case OP_SHR_LONG:
+    case OP_USHR_LONG:
+        /* vA(wide) <- vB(wide), vC */
+        KILLW(workBits, decInsn.vA);
+        GENW(workBits, decInsn.vB);
+        GEN(workBits, decInsn.vC);
+        break;
+
+    case OP_ADD_LONG:
+    case OP_SUB_LONG:
+    case OP_MUL_LONG:
+    case OP_DIV_LONG:
+    case OP_REM_LONG:
+    case OP_AND_LONG:
+    case OP_OR_LONG:
+    case OP_XOR_LONG:
+    case OP_ADD_DOUBLE:
+    case OP_SUB_DOUBLE:
+    case OP_MUL_DOUBLE:
+    case OP_DIV_DOUBLE:
+    case OP_REM_DOUBLE:
+        /* vA(wide) <- vB(wide), vC(wide) */
+        KILLW(workBits, decInsn.vA);
+        GENW(workBits, decInsn.vB);
+        GENW(workBits, decInsn.vC);
+        break;
+
+    case OP_ADD_INT_2ADDR:
+    case OP_SUB_INT_2ADDR:
+    case OP_MUL_INT_2ADDR:
+    case OP_REM_INT_2ADDR:
+    case OP_SHL_INT_2ADDR:
+    case OP_SHR_INT_2ADDR:
+    case OP_USHR_INT_2ADDR:
+    case OP_AND_INT_2ADDR:
+    case OP_OR_INT_2ADDR:
+    case OP_XOR_INT_2ADDR:
+    case OP_DIV_INT_2ADDR:
+        /* vA <- vA, vB */
+        /* KILL(workBits, decInsn.vA); */
+        GEN(workBits, decInsn.vA);
+        GEN(workBits, decInsn.vB);
+        break;
+
+    case OP_SHL_LONG_2ADDR:
+    case OP_SHR_LONG_2ADDR:
+    case OP_USHR_LONG_2ADDR:
+        /* vA(wide) <- vA(wide), vB */
+        /* KILLW(workBits, decInsn.vA); */
+        GENW(workBits, decInsn.vA);
+        GEN(workBits, decInsn.vB);
+        break;
+
+    case OP_ADD_LONG_2ADDR:
+    case OP_SUB_LONG_2ADDR:
+    case OP_MUL_LONG_2ADDR:
+    case OP_DIV_LONG_2ADDR:
+    case OP_REM_LONG_2ADDR:
+    case OP_AND_LONG_2ADDR:
+    case OP_OR_LONG_2ADDR:
+    case OP_XOR_LONG_2ADDR:
+    case OP_ADD_FLOAT_2ADDR:
+    case OP_SUB_FLOAT_2ADDR:
+    case OP_MUL_FLOAT_2ADDR:
+    case OP_DIV_FLOAT_2ADDR:
+    case OP_REM_FLOAT_2ADDR:
+    case OP_ADD_DOUBLE_2ADDR:
+    case OP_SUB_DOUBLE_2ADDR:
+    case OP_MUL_DOUBLE_2ADDR:
+    case OP_DIV_DOUBLE_2ADDR:
+    case OP_REM_DOUBLE_2ADDR:
+        /* vA(wide) <- vA(wide), vB(wide) */
+        /* KILLW(workBits, decInsn.vA); */
+        GENW(workBits, decInsn.vA);
+        GENW(workBits, decInsn.vB);
+        break;
+
+    /* we will only see this if liveness analysis is done after general vfy */
+    case OP_THROW_VERIFICATION_ERROR:
+        /* no registers used */
+        break;
+
+    /* quickened instructions, not expected to appear */
+    case OP_EXECUTE_INLINE:
+    case OP_EXECUTE_INLINE_RANGE:
+    case OP_INVOKE_DIRECT_EMPTY:
+    case OP_IGET_QUICK:
+    case OP_IGET_WIDE_QUICK:
+    case OP_IGET_OBJECT_QUICK:
+    case OP_IPUT_QUICK:
+    case OP_IPUT_WIDE_QUICK:
+    case OP_IPUT_OBJECT_QUICK:
+    case OP_INVOKE_VIRTUAL_QUICK:
+    case OP_INVOKE_VIRTUAL_QUICK_RANGE:
+    case OP_INVOKE_SUPER_QUICK:
+    case OP_INVOKE_SUPER_QUICK_RANGE:
+    case OP_RETURN_VOID_BARRIER:
+        return false;
+
+    /* these should never appear during verification */
+    case OP_UNUSED_3E:
+    case OP_UNUSED_3F:
+    case OP_UNUSED_40:
+    case OP_UNUSED_41:
+    case OP_UNUSED_42:
+    case OP_UNUSED_43:
+    case OP_UNUSED_73:
+    case OP_UNUSED_79:
+    case OP_UNUSED_7A:
+    case OP_BREAKPOINT:
+    case OP_DISPATCH_FF:
+        return false;
+    }
+
+    return true;
+}
+
+
+/*
+ * Dump the liveness bits to the log.
+ *
+ * "curIdx" is for display only.
+ */
+static void dumpLiveState(const VerifierData* vdata, u4 curIdx,
+    const BitVector* workBits)
+{
+    u4 insnRegCount = vdata->insnRegCount;
+    size_t regCharSize = insnRegCount + (insnRegCount-1)/4 + 2 +1;
+    char regChars[regCharSize +1];
+    unsigned int idx;
+
+    memset(regChars, ' ', regCharSize);
+    regChars[0] = '[';
+    if (insnRegCount == 0)
+        regChars[1] = ']';
+    else
+        regChars[1 + (insnRegCount-1) + (insnRegCount-1)/4 +1] = ']';
+    regChars[regCharSize] = '\0';
+
+    for (idx = 0; idx < insnRegCount; idx++) {
+        char ch = dvmIsBitSet(workBits, idx) ? '+' : '-';
+        regChars[1 + idx + (idx/4)] = ch;
+    }
+
+    LOGI("0x%04x %s\n", curIdx, regChars);
+}
diff --git a/vm/analysis/BackwardFlow.h b/vm/analysis/BackwardFlow.h
new file mode 100644
index 0000000..f17217e
--- /dev/null
+++ b/vm/analysis/BackwardFlow.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Backward flow analysis.
+ */
+#ifndef _DALVIK_BACKWARDFLOW
+#define _DALVIK_BACKWARDFLOW
+
+struct VerifierData;
+
+bool dvmComputeLiveness(struct VerifierData* vdata);
+
+#endif /*_DALVIK_BACKWARDFLOW*/
diff --git a/vm/analysis/CodeVerify.c b/vm/analysis/CodeVerify.c
index f817a0e..545fa49 100644
--- a/vm/analysis/CodeVerify.c
+++ b/vm/analysis/CodeVerify.c
@@ -22,6 +22,7 @@
  * some string-peeling and wouldn't need to compute hashes.
  */
 #include "Dalvik.h"
+#include "analysis/BackwardFlow.h"
 #include "analysis/CodeVerify.h"
 #include "analysis/Optimize.h"
 #include "analysis/RegisterMap.h"
@@ -57,22 +58,8 @@
 
 static bool gDebugVerbose = false;
 
-
-/*
- * Selectively enable verbose debug logging -- use this to activate
- * dumpRegTypes() calls for all instructions in the specified method.
- */
-static inline bool doVerboseLogging(const Method* meth) {
-    return false;       /* COMMENT OUT to enable verbose debugging */
-
-    const char* cd = "Lcom/android/bluetooth/opp/BluetoothOppService;";
-    const char* mn = "scanFile";
-    const char* sg = "(Landroid/database/Cursor;I)Z";
-    return (strcmp(meth->clazz->descriptor, cd) == 0 &&
-            dvmCompareNameDescriptorAndMethod(mn, sg, meth) == 0);
-}
-
-#define SHOW_REG_DETAILS    (0 /*| DRT_SHOW_REF_TYPES | DRT_SHOW_LOCALS*/)
+#define SHOW_REG_DETAILS \
+    (0 | DRT_SHOW_LIVENESS /*| DRT_SHOW_REF_TYPES | DRT_SHOW_LOCALS*/)
 
 /*
  * We need an extra "pseudo register" to hold the return type briefly.  It
@@ -125,13 +112,12 @@
     const DecodedInstruction* pDecInsn, VerifyError* pFailure);
 static void verifyRegisterType(const RegisterLine* registerLine, \
     u4 vsrc, RegType checkType, VerifyError* pFailure);
-static bool doCodeVerification(const Method* meth, InsnFlags* insnFlags,\
-    RegisterTable* regTable, UninitInstanceMap* uninitMap);
+static bool doCodeVerification(VerifierData* vdata, RegisterTable* regTable);
 static bool verifyInstruction(const Method* meth, InsnFlags* insnFlags,\
     RegisterTable* regTable, int insnIdx, UninitInstanceMap* uninitMap,
     int* pStartGuess);
 static ClassObject* findCommonSuperclass(ClassObject* c1, ClassObject* c2);
-static void dumpRegTypes(const Method* meth, const InsnFlags* insnFlags,\
+static void dumpRegTypes(const VerifierData* vdata, \
     const RegisterLine* registerLine, int addr, const char* addrName,
     const UninitInstanceMap* uninitMap, int displayFlags);
 
@@ -140,6 +126,7 @@
     DRT_SIMPLE          = 0,
     DRT_SHOW_REF_TYPES  = 0x01,
     DRT_SHOW_LOCALS     = 0x02,
+    DRT_SHOW_LIVENESS   = 0x04,
 };
 
 
@@ -2554,8 +2541,8 @@
     } else {
         if (gDebugVerbose) {
             LOGVV("MERGE into 0x%04x\n", nextInsn);
-            //dumpRegTypes(meth, insnFlags, targetRegs, 0, "targ", NULL, 0);
-            //dumpRegTypes(meth, insnFlags, workRegs, 0, "work", NULL, 0);
+            //dumpRegTypes(vdata, targetRegs, 0, "targ", NULL, 0);
+            //dumpRegTypes(vdata, workRegs, 0, "work", NULL, 0);
         }
         /* merge registers, set Changed only if different */
         RegisterLine* targetLine = getRegisterLine(regTable, nextInsn);
@@ -2599,7 +2586,7 @@
 
         if (gDebugVerbose) {
             //LOGI(" RESULT (changed=%d)\n", changed);
-            //dumpRegTypes(meth, insnFlags, targetRegs, 0, "rslt", NULL, 0);
+            //dumpRegTypes(vdata, targetRegs, 0, "rslt", NULL, 0);
         }
 #ifdef VERIFIER_STATS
         gDvm.verifierStats.mergeRegCount++;
@@ -2966,8 +2953,8 @@
  * what's in which register, but for verification purposes we only need to
  * store it at branch target addresses (because we merge into that).
  *
- * By zeroing out the storage we are effectively initializing the register
- * information to kRegTypeUnknown.
+ * By zeroing out the regType storage we are effectively initializing the
+ * register information to kRegTypeUnknown.
  *
  * We jump through some hoops here to minimize the total number of
  * allocations we have to perform per method verified.
@@ -3061,6 +3048,9 @@
 
     /*
      * Populate the sparse register line table.
+     *
+     * There is a RegisterLine associated with every address, but not
+     * every RegisterLine has non-NULL pointers to storage for its fields.
      */
     u1* storage = (u1*)regTable->lineAlloc;
     for (i = 0; i < insnsSize; i++) {
@@ -3106,6 +3096,24 @@
 }
 
 /*
+ * Free up any "hairy" structures associated with register lines.
+ */
+static void freeRegisterLineInnards(VerifierData* vdata)
+{
+    unsigned int idx;
+
+    if (vdata->registerLines == NULL)
+        return;
+
+    for (idx = 0; idx < vdata->insnsSize; idx++) {
+        BitVector* liveRegs = vdata->registerLines[idx].liveRegs;
+        if (liveRegs != NULL)
+            dvmFreeBitVector(liveRegs);
+    }
+}
+
+
+/*
  * Verify that the arguments in a filled-new-array instruction are valid.
  *
  * "resClass" is the class refered to by pDecInsn->vB.
@@ -3432,14 +3440,33 @@
             generateRegisterMap ? kTrackRegsGcPoints : kTrackRegsBranches))
         goto bail;
 
-    vdata->registerLines = NULL;     /* don't set this until we need it */
+    vdata->registerLines = regTable.registerLines;
 
     /*
-     * Compute basic blocks if we want to do liveness analysis.
+     * Perform liveness analysis.
+     *
+     * We can do this before or after the main verifier pass.  The choice
+     * affects whether or not we see the effects of verifier instruction
+     * changes, i.e. substitution of throw-verification-error.
+     *
+     * In practice the ordering doesn't really matter, because T-V-E
+     * just prunes "can continue", creating regions of dead code (with
+     * corresponding register map data that will never be used).
      */
-    if (gDvm.registerMapMode == kRegisterMapModeLivePrecise) {
+    if (generateRegisterMap &&
+        gDvm.registerMapMode == kRegisterMapModeLivePrecise)
+    {
+        /*
+         * Compute basic blocks and predecessor lists.
+         */
         if (!dvmComputeVfyBasicBlocks(vdata))
             goto bail;
+
+        /*
+         * Compute liveness.
+         */
+        if (!dvmComputeLiveness(vdata))
+            goto bail;
     }
 
     /*
@@ -3453,16 +3480,13 @@
     /*
      * Run the verifier.
      */
-    if (!doCodeVerification(meth, vdata->insnFlags, &regTable,
-            vdata->uninitMap))
+    if (!doCodeVerification(vdata, &regTable))
         goto bail;
 
     /*
      * Generate a register map.
      */
     if (generateRegisterMap) {
-        vdata->registerLines = regTable.registerLines;
-
         RegisterMap* pMap = dvmGenerateRegisterMapV(vdata);
         if (pMap != NULL) {
             /*
@@ -3480,6 +3504,7 @@
     result = true;
 
 bail:
+    freeRegisterLineInnards(vdata);
     free(regTable.registerLines);
     free(regTable.lineAlloc);
     return result;
@@ -3535,9 +3560,11 @@
  * instruction if a register contains an uninitialized instance created
  * by that same instrutcion.
  */
-static bool doCodeVerification(const Method* meth, InsnFlags* insnFlags,
-    RegisterTable* regTable, UninitInstanceMap* uninitMap)
+static bool doCodeVerification(VerifierData* vdata, RegisterTable* regTable)
 {
+    const Method* meth = vdata->method;
+    InsnFlags* insnFlags = vdata->insnFlags;
+    UninitInstanceMap* uninitMap = vdata->uninitMap;
     const int insnsSize = dvmGetMethodInsnsSize(meth);
     bool result = false;
     bool debugVerbose = false;
@@ -3548,7 +3575,7 @@
      */
     dvmInsnSetChanged(insnFlags, 0, true);
 
-    if (doVerboseLogging(meth)) {
+    if (dvmWantVerboseVerification(meth)) {
         IF_LOGI() {
             char* desc = dexProtoCopyMethodDescriptor(&meth->prototype);
             LOGI("Now verifying: %s.%s %s (ins=%d regs=%d)\n",
@@ -3618,15 +3645,15 @@
                 LOG_VFY("HUH? workLine diverged in %s.%s %s\n",
                         meth->clazz->descriptor, meth->name, desc);
                 free(desc);
-                dumpRegTypes(meth, insnFlags, registerLine, 0, "work",
+                dumpRegTypes(vdata, registerLine, 0, "work",
                     uninitMap, DRT_SHOW_REF_TYPES | DRT_SHOW_LOCALS);
-                dumpRegTypes(meth, insnFlags, registerLine, 0, "insn",
+                dumpRegTypes(vdata, registerLine, 0, "insn",
                     uninitMap, DRT_SHOW_REF_TYPES | DRT_SHOW_LOCALS);
             }
 #endif
         }
         if (debugVerbose) {
-            dumpRegTypes(meth, insnFlags, &regTable->workLine, insnIdx,
+            dumpRegTypes(vdata, &regTable->workLine, insnIdx,
                 NULL, uninitMap, SHOW_REG_DETAILS);
         }
 
@@ -5920,10 +5947,12 @@
 /*
  * Dump the register types for the specifed address to the log file.
  */
-static void dumpRegTypes(const Method* meth, const InsnFlags* insnFlags,
+static void dumpRegTypes(const VerifierData* vdata,
     const RegisterLine* registerLine, int addr, const char* addrName,
     const UninitInstanceMap* uninitMap, int displayFlags)
 {
+    const Method* meth = vdata->method;
+    const InsnFlags* insnFlags = vdata->insnFlags;
     const RegType* addrRegs = registerLine->regTypes;
     int regCount = meth->registersSize;
     int fullRegCount = regCount + kExtraRegs;
@@ -5988,6 +6017,26 @@
         LOGI("%c0x%04x %s mst=%d\n", branchTarget ? '>' : ' ',
             addr, regChars, registerLine->monitorStackTop);
     }
+    if (displayFlags & DRT_SHOW_LIVENESS) {
+        /*
+         * We can't use registerLine->liveRegs because it might be the
+         * "work line" rather than the copy from RegisterTable.
+         */
+        BitVector* liveRegs = vdata->registerLines[addr].liveRegs;
+        if (liveRegs != NULL)  {
+            char liveChars[regCharSize + 1];
+            memset(liveChars, ' ', regCharSize);
+            liveChars[regCharSize] = '\0';
+
+            for (i = 0; i < regCount; i++) {
+                bool isLive = dvmIsBitSet(liveRegs, i);
+                liveChars[i + 1 + (i / 4)] = isLive ? '+' : '-';
+            }
+            LOGI("        %s\n", liveChars);
+        } else {
+            LOGI("        %c\n", '#');
+        }
+    }
 
     if (displayFlags & DRT_SHOW_REF_TYPES) {
         for (i = 0; i < regCount + kExtraRegs; i++) {
diff --git a/vm/analysis/CodeVerify.h b/vm/analysis/CodeVerify.h
index b9147ef..9369537 100644
--- a/vm/analysis/CodeVerify.h
+++ b/vm/analysis/CodeVerify.h
@@ -120,12 +120,18 @@
  * instruction.  We track the status of all registers, and (if the method
  * has any monitor-enter instructions) maintain a stack of entered monitors
  * (identified by code unit offset).
+ *
+ * If live-precise register maps are enabled, the "liveRegs" vector will
+ * be populated.  Unlike the other lists of registers here, we do not
+ * track the liveness of the method result register (which is not visible
+ * to the GC).
  */
 typedef struct {
     RegType*        regTypes;
     MonitorEntries* monitorEntries;
     u4*             monitorStack;
     unsigned int    monitorStackTop;
+    BitVector*      liveRegs;
 } RegisterLine;
 
 /*
diff --git a/vm/analysis/VerifySubs.c b/vm/analysis/VerifySubs.c
index cbe8af7..2366f68 100644
--- a/vm/analysis/VerifySubs.c
+++ b/vm/analysis/VerifySubs.c
@@ -23,6 +23,21 @@
 
 
 /*
+ * This is used when debugging to apply a magnifying glass to the
+ * verification of a particular method.
+ */
+bool dvmWantVerboseVerification(const Method* meth)
+{
+    return false;       /* COMMENT OUT to enable verbose debugging */
+
+    const char* cd = "Lcom/android/server/am/ActivityManagerService;";
+    const char* mn = "trimApplications";
+    const char* sg = "()V";
+    return (strcmp(meth->clazz->descriptor, cd) == 0 &&
+            dvmCompareNameDescriptorAndMethod(mn, sg, meth) == 0);
+}
+
+/*
  * Output a code verifier warning message.  For the pre-verifier it's not
  * a big deal if something fails (and it may even be expected), but if
  * we're doing just-in-time verification it's significant.
diff --git a/vm/analysis/VerifySubs.h b/vm/analysis/VerifySubs.h
index 9449e64..625244d 100644
--- a/vm/analysis/VerifySubs.h
+++ b/vm/analysis/VerifySubs.h
@@ -68,4 +68,7 @@
 /* return a RegType enumeration value that "value" just fits into */
 char dvmDetermineCat1Const(s4 value);
 
+/* debugging */
+bool dvmWantVerboseVerification(const Method* meth);
+
 #endif /*_DALVIK_VERIFYSUBS*/
diff --git a/vm/analysis/VfyBasicBlock.c b/vm/analysis/VfyBasicBlock.c
index 59d395e..7500ba0 100644
--- a/vm/analysis/VfyBasicBlock.c
+++ b/vm/analysis/VfyBasicBlock.c
@@ -75,7 +75,15 @@
     if (newBlock == NULL)
         return NULL;
 
-    newBlock->predecessors = dvmPointerSetAlloc(8);
+    /*
+     * TODO: there is no good default size here -- the problem is that most
+     * addresses will only have one predecessor, but a fair number will
+     * have 10+, and a few will have 100+ (e.g. the synthetic "finally"
+     * in a large synchronized method).  We probably want to use a small
+     * base allocation (perhaps two) and then have the first overflow
+     * allocation jump dramatically (to 32 or thereabouts).
+     */
+    newBlock->predecessors = dvmPointerSetAlloc(32);
     if (newBlock->predecessors == NULL) {
         free(newBlock);
         return NULL;
@@ -83,6 +91,13 @@
 
     newBlock->firstAddr = (u4) -1;      // DEBUG
 
+    newBlock->liveRegs = dvmAllocBitVector(vdata->insnRegCount, false);
+    if (newBlock->liveRegs == NULL) {
+        dvmPointerSetFree(newBlock->predecessors);
+        free(newBlock);
+        return NULL;
+    }
+
     return newBlock;
 }
 
@@ -211,6 +226,13 @@
         }
     }
 
+    if (false) {
+        if (dvmPointerSetGetCount(curBlock->predecessors) > 256) {
+            LOGI("Lots of preds at 0x%04x in %s.%s:%s\n", curIdx,
+                meth->clazz->descriptor, meth->name, meth->shorty);
+        }
+    }
+
     return true;
 }
 
@@ -235,14 +257,14 @@
             block->firstAddr, block->lastAddr);
 
         PointerSet* preds = block->predecessors;
-        size_t numDeps = dvmPointerSetGetCount(preds);
+        size_t numPreds = dvmPointerSetGetCount(preds);
 
-        if (numDeps > 0) {
+        if (numPreds > 0) {
             count += snprintf(printBuf + count, sizeof(printBuf) - count,
                     "preds:");
 
             unsigned int predIdx;
-            for (predIdx = 0; predIdx < numDeps; predIdx++) {
+            for (predIdx = 0; predIdx < numPreds; predIdx++) {
                 if (count >= (int) sizeof(printBuf))
                     break;
                 const VfyBasicBlock* pred =
@@ -255,8 +277,13 @@
                     "(no preds)");
         }
 
+        printBuf[sizeof(printBuf)-2] = '!';
+        printBuf[sizeof(printBuf)-1] = '\0';
+
         LOGI("%s", printBuf);
     }
+
+    usleep(100 * 1000);      /* ugh...let logcat catch up */
 }
 
 
@@ -280,15 +307,7 @@
     u4 idx, blockStartAddr;
     bool result = false;
 
-    bool verbose = false;
-    if (false) {
-        if (strcmp(meth->clazz->descriptor, "Ljava/security/Provider;") == 0 &&
-            strcmp(meth->name, "entrySet") == 0)
-        {
-            verbose = true;
-        }
-    }
-
+    bool verbose = false; //dvmWantVerboseVerification(meth);
     if (verbose) {
         LOGI("Basic blocks for %s.%s:%s\n",
             meth->clazz->descriptor, meth->name, meth->shorty);
diff --git a/vm/analysis/VfyBasicBlock.h b/vm/analysis/VfyBasicBlock.h
index c2d6618..ff2c680 100644
--- a/vm/analysis/VfyBasicBlock.h
+++ b/vm/analysis/VfyBasicBlock.h
@@ -21,7 +21,6 @@
 #ifndef _DALVIK_VFYBASICBLOCK
 #define _DALVIK_VFYBASICBLOCK
 
-#include "libdex/InstrUtils.h"
 #include "PointerSet.h"
 
 struct VerifierData;
@@ -32,11 +31,21 @@
  *
  * This is used for liveness analysis, which is a reverse-flow algorithm,
  * so we need to mantain a list of predecessors for each block.
+ *
+ * "liveRegs" indicates the set of registers that are live at the end of
+ * the basic block (after the last instruction has executed).  Successor
+ * blocks will compare their results with this to see if this block needs
+ * to be re-evaluated.  Note that this is not the same as the contents of
+ * the RegisterLine for the last instruction in the block (which reflects
+ * the state *before* the instruction has executed).
  */
 typedef struct {
     u4              firstAddr;      /* address of first instruction */
     u4              lastAddr;       /* address of last instruction */
     PointerSet*     predecessors;   /* set of basic blocks that can flow here */
+    BitVector*      liveRegs;       /* liveness for each register */
+    bool            changed;        /* input set has changed, must re-eval */
+    bool            visited;        /* block has been visited at least once */
 } VfyBasicBlock;
 
 /*
diff --git a/vm/compiler/CompilerUtility.h b/vm/compiler/CompilerUtility.h
index 3e65a2e..5dd1faf 100644
--- a/vm/compiler/CompilerUtility.h
+++ b/vm/compiler/CompilerUtility.h
@@ -63,9 +63,9 @@
 intptr_t dvmGrowableListIteratorNext(GrowableListIterator *iterator);
 intptr_t dvmGrowableListGetElement(const GrowableList *gList, size_t idx);
 
-BitVector* dvmCompilerAllocBitVector(int startBits, bool expandable);
-bool dvmCompilerSetBit(BitVector* pBits, int num);
-bool dvmCompilerClearBit(BitVector* pBits, int num);
+BitVector* dvmCompilerAllocBitVector(unsigned int startBits, bool expandable);
+bool dvmCompilerSetBit(BitVector* pBits, unsigned int num);
+bool dvmCompilerClearBit(BitVector* pBits, unsigned int num);
 void dvmCompilerMarkAllBits(BitVector *pBits, bool set);
 void dvmDebugBitVector(char *msg, const BitVector *bv, int length);
 void dvmDumpLIRInsn(struct LIR *lir, unsigned char *baseAddr);
diff --git a/vm/compiler/MethodSSATransformation.c b/vm/compiler/MethodSSATransformation.c
index 48d5b5c..60eea33 100644
--- a/vm/compiler/MethodSSATransformation.c
+++ b/vm/compiler/MethodSSATransformation.c
@@ -346,9 +346,9 @@
         dvmAbort();
     }
 
-    int i;
-    for (i = 0; i < dest->storageSize; i++) {
-        dest->storage[i] |= src1->storage[i] & ~src2->storage[i];
+    unsigned int idx;
+    for (idx = 0; idx < dest->storageSize; idx++) {
+        dest->storage[idx] |= src1->storage[idx] & ~src2->storage[idx];
     }
 }
 
diff --git a/vm/compiler/Utility.c b/vm/compiler/Utility.c
index fe25833..7be57ef 100644
--- a/vm/compiler/Utility.c
+++ b/vm/compiler/Utility.c
@@ -269,13 +269,12 @@
  * NOTE: this is the sister implementation of dvmAllocBitVector. In this version
  * memory is allocated from the compiler arena.
  */
-BitVector* dvmCompilerAllocBitVector(int startBits, bool expandable)
+BitVector* dvmCompilerAllocBitVector(unsigned int startBits, bool expandable)
 {
     BitVector* bv;
-    int count;
+    unsigned int count;
 
     assert(sizeof(bv->storage[0]) == 4);        /* assuming 32-bit units */
-    assert(startBits >= 0);
 
     bv = (BitVector*) dvmCompilerNew(sizeof(BitVector), false);
 
@@ -296,15 +295,14 @@
  * NOTE: this is the sister implementation of dvmSetBit. In this version
  * memory is allocated from the compiler arena.
  */
-bool dvmCompilerSetBit(BitVector *pBits, int num)
+bool dvmCompilerSetBit(BitVector *pBits, unsigned int num)
 {
-    assert(num >= 0);
-    if (num >= pBits->storageSize * (int)sizeof(u4) * 8) {
+    if (num >= pBits->storageSize * sizeof(u4) * 8) {
         if (!pBits->expandable)
             dvmAbort();
 
         /* Round up to word boundaries for "num+1" bits */
-        int newSize = (num + 1 + 31) >> 5;
+        unsigned int newSize = (num + 1 + 31) >> 5;
         assert(newSize > pBits->storageSize);
         u4 *newStorage = (u4*)dvmCompilerNew(newSize * sizeof(u4), false);
         memcpy(newStorage, pBits->storage, pBits->storageSize * sizeof(u4));
@@ -327,10 +325,9 @@
  * NOTE: this is the sister implementation of dvmClearBit. In this version
  * memory is allocated from the compiler arena.
  */
-bool dvmCompilerClearBit(BitVector *pBits, int num)
+bool dvmCompilerClearBit(BitVector *pBits, unsigned int num)
 {
-    assert(num >= 0);
-    if (num >= pBits->storageSize * (int)sizeof(u4) * 8) {
+    if (num >= pBits->storageSize * sizeof(u4) * 8) {
         LOGE("Trying to clear a bit that is not set in the vector yet!");
         dvmAbort();
     }