Codegen support for debugger

Add the ability to generate code with support for debugging.
This involves generating a callout to an equivalent of the
old "updateDebugger()" before each Dalvik opcode, method
entry and method exit.

The added code is fairly compact - 8 bytes per Dalvik opcode,
plus 4 additional bytes per safe point.

I dislike the idea of always making this call, so I'm reusing
the dedicated register rSUSPEND to hold the address of the callout.
rSUSPEND is normally used to reduce the frequency of full suspend
checks, but when debugging this isn't necessary - allowing
us to resuse this register to hold the address of the callout.
If it is non-null we make the callout, otherwise we continue.  I refresh
this register from a slot in the Thread structure on method entry
and also following the return of taken suspend checks.  In this way,
the debugger has the ability to control updates on a per-thread basis,
and the performance penalty is greatly reduced for threads that don't
have any pending debugger requests.  Once the debugger attaches,
it would suspend all threads, walk through the thread list, set
thread->pUpdateDebuggerFromCode to art_update_debugger and then
turn everything loose.

One thing I'm not doing, though, is debugger updates before and after
calls to native methods.  This is something that will have to be done
by the stubs, because I don't know which invokes are native.  Oh, and
there will also need to be an artUpdateDebugger call on the exception
path.

I'm passing the DalvikPC to the stub, and am using special
codes (-1 and -2) to denote method entry and exit.  The stub
recovers the current Method* and Thread* and passes them on to
artUpdateDebugger().

When we're compiling in this special mode, all optimizations which
might result in code motion or suppressed load/store of a Dalvik
register are turned off.  No register promotion is done, so everything
will be in its home location.

Change-Id: Iaf66f4d0d094a1699269d0a1ad1ed33e7613aef8
diff --git a/src/asm_support.h b/src/asm_support.h
index 63b8f3c..d3e18bc 100644
--- a/src/asm_support.h
+++ b/src/asm_support.h
@@ -24,13 +24,13 @@
 #define rSELF r9
 #define rLR r14
 // Offset of field Thread::suspend_count_ verified in InitCpu
-#define THREAD_SUSPEND_COUNT_OFFSET 420
+#define THREAD_SUSPEND_COUNT_OFFSET 424
 // Offset of field Thread::exception_ verified in InitCpu
-#define THREAD_EXCEPTION_OFFSET 416
+#define THREAD_EXCEPTION_OFFSET 420
 
 #elif defined(__i386__)
 // Offset of field Thread::self_ verified in InitCpu
-#define THREAD_SELF_OFFSET 408
+#define THREAD_SELF_OFFSET 412
 #endif
 
 #endif  // ART_SRC_ASM_SUPPORT_H_
diff --git a/src/compiler/Compiler.h b/src/compiler/Compiler.h
index c09da33..451fae5 100644
--- a/src/compiler/Compiler.h
+++ b/src/compiler/Compiler.h
@@ -24,6 +24,14 @@
 #define COMPILER_TRACED(X)
 #define COMPILER_TRACEE(X)
 
+/*
+ * Special offsets to denote method entry/exit for debugger update.
+ * NOTE: bit pattern must be loadable using 1 instruction and must
+ * not be a valid Dalvik offset.
+ */
+#define DEBUGGER_METHOD_ENTRY -1
+#define DEBUGGER_METHOD_EXIT -2
+
 typedef enum OatInstructionSetType {
     DALVIK_OAT_NONE = 0,
     DALVIK_OAT_ARM,
@@ -39,6 +47,7 @@
     kPromoteRegs,
     kTrackLiveTemps,
     kSkipLargeMethodOptimization,
+    kGenCodeForDebugger,
 };
 
 /* Type of allocation for memory tuning */
diff --git a/src/compiler/CompilerIR.h b/src/compiler/CompilerIR.h
index d04b97c..ade478f 100644
--- a/src/compiler/CompilerIR.h
+++ b/src/compiler/CompilerIR.h
@@ -238,6 +238,7 @@
     std::vector<uint32_t> mappingTable;
     std::vector<uint16_t> coreVmapTable;
     std::vector<uint16_t> fpVmapTable;
+    bool genDebugger;                   // Generate code for debugger
     bool printMe;
     bool hasClassLiterals;              // Contains class ptrs used as literals
     bool hasLoop;                       // Contains a loop
diff --git a/src/compiler/Frontend.cc b/src/compiler/Frontend.cc
index 38d6972..873567c 100644
--- a/src/compiler/Frontend.cc
+++ b/src/compiler/Frontend.cc
@@ -33,6 +33,7 @@
      //(1 << kPromoteRegs) |
      //(1 << kTrackLiveTemps) |
      //(1 << kSkipLargeMethodOptimization) |
+     //(1 << kGenCodeForDebugger) |
      0;
 
 uint32_t compilerDebugFlags = 0 |     // Enable debug/testing modes
@@ -785,6 +786,18 @@
         cUnit->printMe = VLOG_IS_ON(compiler) || (cUnit->enableDebug & (1 << kDebugVerbose));
     }
 
+    /* Are we generating code for the debugger? */
+    if (cUnit->disableOpt & (1 << kGenCodeForDebugger)) {
+        cUnit->genDebugger = true;
+        // Yes, disable most optimizations
+        cUnit->disableOpt |= (
+            (1 << kLoadStoreElimination) |
+            (1 << kLoadHoisting) |
+            (1 << kSuppressLoads) |
+            (1 << kPromoteRegs) |
+            (1 << kTrackLiveTemps));
+    }
+
     /* Assume non-throwing leaf */
     cUnit->attrs = (METHOD_IS_LEAF | METHOD_IS_THROW_FREE);
 
diff --git a/src/compiler/codegen/arm/ArmLIR.h b/src/compiler/codegen/arm/ArmLIR.h
index bc536f9..510a5ea 100644
--- a/src/compiler/codegen/arm/ArmLIR.h
+++ b/src/compiler/codegen/arm/ArmLIR.h
@@ -33,7 +33,7 @@
  *        pointer in r0 as a hidden arg0. Otherwise used as codegen scratch
  *        registers.
  * r0-r1: As in C/C++ r0 is 32-bit return register and r0/r1 is 64-bit
- * r4   : (rSUSPEND) is reserved (suspend check assist)
+ * r4   : (rSUSPEND) is reserved (suspend check/debugger assist)
  * r5   : Callee save (promotion target)
  * r6   : Callee save (promotion target)
  * r7   : Callee save (promotion target)
diff --git a/src/compiler/codegen/arm/MethodCodegenDriver.cc b/src/compiler/codegen/arm/MethodCodegenDriver.cc
index c512e8b..e4073d4 100644
--- a/src/compiler/codegen/arm/MethodCodegenDriver.cc
+++ b/src/compiler/codegen/arm/MethodCodegenDriver.cc
@@ -1933,12 +1933,24 @@
         }
         storeBaseDisp(cUnit, rSP, 0, r0, kWord);
         flushIns(cUnit);
+
+        if (cUnit->genDebugger) {
+            // Refresh update debugger callout
+            loadWordDisp(cUnit, rSELF,
+                         OFFSETOF_MEMBER(Thread, pUpdateDebuggerFromCode), rSUSPEND);
+            genDebuggerUpdate(cUnit, DEBUGGER_METHOD_ENTRY);
+        }
+
         oatFreeTemp(cUnit, r0);
         oatFreeTemp(cUnit, r1);
         oatFreeTemp(cUnit, r2);
         oatFreeTemp(cUnit, r3);
     } else if (bb->blockType == kExitBlock) {
         newLIR0(cUnit, kArmPseudoMethodExit);
+        /* If we're compiling for the debugger, generate an update callout */
+        if (cUnit->genDebugger) {
+            genDebuggerUpdate(cUnit, DEBUGGER_METHOD_EXIT);
+        }
         opRegImm(cUnit, kOpAdd, rSP, cUnit->frameSize - (spillCount * 4));
         /* Need to restore any FP callee saves? */
         if (cUnit->numFPSpills) {
@@ -1994,6 +2006,11 @@
             headLIR->defMask = ENCODE_ALL;
         }
 
+        /* If we're compiling for the debugger, generate an update callout */
+        if (cUnit->genDebugger) {
+            genDebuggerUpdate(cUnit, mir->offset);
+        }
+
         /* Don't generate the SSA annotation unless verbose mode is on */
         if (cUnit->printMe && mir->ssaRep) {
             char* ssaString = oatGetSSAString(cUnit, mir->ssaRep);
@@ -2087,9 +2104,17 @@
         oatAppendLIR(cUnit, (LIR *)lab);
         loadWordDisp(cUnit, rSELF,
                      OFFSETOF_MEMBER(Thread, pTestSuspendFromCode), rLR);
-        loadWordDisp(cUnit, rSELF,
-            Thread::SuspendCountOffset().Int32Value(), rSUSPEND);
+        if (!cUnit->genDebugger) {
+            // use rSUSPEND for suspend count
+            loadWordDisp(cUnit, rSELF,
+                         Thread::SuspendCountOffset().Int32Value(), rSUSPEND);
+        }
         opReg(cUnit, kOpBlx, rLR);
+        if ( cUnit->genDebugger) {
+            // use rSUSPEND for update debugger
+            loadWordDisp(cUnit, rSELF,
+                         OFFSETOF_MEMBER(Thread, pUpdateDebuggerFromCode), rSUSPEND);
+        }
         genUnconditionalBranch(cUnit, resumeLab);
     }
 }
diff --git a/src/compiler/codegen/arm/Thumb2/Gen.cc b/src/compiler/codegen/arm/Thumb2/Gen.cc
index 2a3b807..db51fb8 100644
--- a/src/compiler/codegen/arm/Thumb2/Gen.cc
+++ b/src/compiler/codegen/arm/Thumb2/Gen.cc
@@ -574,6 +574,29 @@
     }
 }
 
+/*
+ * Generate callout to updateDebugger. Note: genIT will automatically
+ * create a scheduling barrier, which we need to prevent code motion that
+ * might confuse the debugger.  Note: Return registers r0/r1 are
+ * handled specially during code generation following function calls.
+ * Typically, temp registers are not live between opcodes, but we keep
+ * r0/r1 live following invokes, where they are consumed by the immediately
+ * following op_move_result_xxx.  Thus, we must preserve and restore r0/r1
+ * when making a call to update the debugger.  This is handled by the stub.
+ */
+STATIC void genDebuggerUpdate(CompilationUnit* cUnit, int32_t offset)
+{
+    // Following DCHECK verifies that dPC is in range of single load immediate
+    DCHECK((offset == DEBUGGER_METHOD_ENTRY) ||
+           (offset == DEBUGGER_METHOD_EXIT) || ((offset & 0xffff) == offset));
+    oatClobberCalleeSave(cUnit);
+    opRegImm(cUnit, kOpCmp, rSUSPEND, 0);
+    genIT(cUnit, kArmCondNe, "T");
+    loadConstant(cUnit, r2, offset);     // arg2 <- Entry code
+    opReg(cUnit, kOpBlx, rSUSPEND);
+    oatFreeTemp(cUnit, r2);
+}
+
 STATIC void genConstString(CompilationUnit* cUnit, MIR* mir,
                            RegLocation rlDest, RegLocation rlSrc)
 {
@@ -875,7 +898,8 @@
     oatInitPool(pool->FPRegs, fpRegs, pool->numFPRegs);
     // Keep special registers from being allocated
     for (int i = 0; i < numReserved; i++) {
-        if (NO_SUSPEND && (reservedRegs[i] == rSUSPEND)) {
+        if (NO_SUSPEND && !cUnit->genDebugger &&
+            (reservedRegs[i] == rSUSPEND)) {
             //To measure cost of suspend check
             continue;
         }
@@ -1738,8 +1762,15 @@
         return;
     }
     oatFlushAllRegs(cUnit);
-    newLIR2(cUnit, kThumbSubRI8, rSUSPEND, 1);
-    ArmLIR* branch = opCondBranch(cUnit, kArmCondEq);
+    ArmLIR* branch;
+    if (cUnit->genDebugger) {
+        // If generating code for the debugger, always check for suspension
+        branch = genUnconditionalBranch(cUnit, NULL);
+    } else {
+        // In non-debug case, only check periodically
+        newLIR2(cUnit, kThumbSubRI8, rSUSPEND, 1);
+        branch = opCondBranch(cUnit, kArmCondEq);
+    }
     ArmLIR* retLab = newLIR0(cUnit, kArmPseudoTargetLabel);
     retLab->defMask = ENCODE_ALL;
     ArmLIR* target = (ArmLIR*)oatNew(cUnit, sizeof(ArmLIR), true, kAllocLIR);
diff --git a/src/runtime_support.cc b/src/runtime_support.cc
index b68f7ac..7c47a8f 100644
--- a/src/runtime_support.cc
+++ b/src/runtime_support.cc
@@ -34,6 +34,17 @@
   self->SetTopOfStack(sp, 0);
 }
 
+/*
+ * Report location to debugger.  Note: dalvikPC is the current offset within
+ * the method.  However, because the offset alone cannot distinguish between
+ * method entry and offset 0 within the method, we'll use an offset of -1
+ * to denote method entry.
+ */
+extern "C" void artUpdateDebuggerFromCode(int32_t dalvikPC, Thread* self, Method** sp) {
+  FinishCalleeSaveFrameSetup(self, sp,  Runtime::kRefsAndArgs);
+    // TODO: fill this out similar to old "updateDebugger"
+}
+
 // Temporary debugging hook for compiler.
 extern void DebugMe(Method* method, uint32_t info) {
   LOG(INFO) << "DebugMe";
diff --git a/src/runtime_support.h b/src/runtime_support.h
index e5097e5..b9c03e4 100644
--- a/src/runtime_support.h
+++ b/src/runtime_support.h
@@ -28,6 +28,7 @@
 extern Array* CheckAndAllocArrayFromCode(uint32_t type_idx, Method* method, int32_t component_count,
                                          Thread* self, bool access_check);
 extern void DebugMe(Method* method, uint32_t info);
+extern void UpdateDebuggerFromCode(Method* method, Thread* thread , int32_t dalvikPC, Method** sp);
 extern Object* DecodeJObjectInThread(Thread* thread, jobject obj);
 extern Field* FindFieldFromCode(uint32_t field_idx, const Method* referrer, Thread* self,
                                 bool is_static, bool is_primitive, size_t expected_size);
@@ -93,6 +94,7 @@
   extern "C" void art_trace_exit_from_code();
   extern "C" void* art_resolve_string_from_code(void*, uint32_t);
   extern "C" void* art_resolve_method_from_code(void* referrer, uint32_t method_idx, bool is_direct);
+  extern "C" void art_update_debugger(void*, void*, int32_t, void*);
 
   /* Conversions */
   extern "C" float __aeabi_i2f(int op1);             // OP_INT_TO_FLOAT
diff --git a/src/runtime_support_arm.S b/src/runtime_support_arm.S
index d406436..c45583a 100644
--- a/src/runtime_support_arm.S
+++ b/src/runtime_support_arm.S
@@ -64,6 +64,22 @@
     b      artDeliverPendingExceptionFromCode  @ artDeliverPendingExceptionFromCode(Thread*, SP)
 .endm
 
+    .global art_update_debugger
+    .extern artUpdateDebuggerFromCode
+    /*
+     * On entry, r0 and r1 must be preserved, r2 is DalvikPC
+     */
+art_update_debugger:
+    mov    r3, r0         @ stash away r0 so that it's saved as if it were an argument
+    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    mov    r0, r2         @ arg0 is DalvikPC
+    mov    r1, rSELF      @ arg1 is Thread*
+    mov    r2, sp         @ arg2 is sp
+    bl     artUpdateDebuggerFromCode      @ artUpdateDebuggerFromCode(Method*, Thread*, dPC, sp)
+    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    mov    r0, r3         @ restore original r0
+    bx     lr
+
     .global art_do_long_jump
     /*
      * On entry r0 is uint32_t* gprs_ and r1 is uint32_t* fprs_
diff --git a/src/thread.cc b/src/thread.cc
index 6be7f7d..582832d 100644
--- a/src/thread.cc
+++ b/src/thread.cc
@@ -135,6 +135,7 @@
   pThrowVerificationErrorFromCode = art_throw_verification_error_from_code;
   pUnlockObjectFromCode = art_unlock_object_from_code;
   pResolveMethodFromCode = art_resolve_method_from_code;
+  pUpdateDebuggerFromCode = NULL;  // To enable, set to art_update_debugger
 #endif
   pF2l = F2L;
   pD2l = D2L;
diff --git a/src/thread.h b/src/thread.h
index a6fa0ca..b1b8a1e 100644
--- a/src/thread.h
+++ b/src/thread.h
@@ -166,6 +166,7 @@
   void (*pUnlockObjectFromCode)(void*);
   void* (*pUnresolvedDirectMethodTrampolineFromCode)(int32_t, Method**, Thread*,
                                                      Runtime::TrampolineType);
+  void (*pUpdateDebuggerFromCode)(void*, void*, int32_t, void*);
 
   class StackVisitor {
    public: