Invoke support for Quick Compiler

Fleshed out invoke and const-string support.  Fixed a bug in Phi node
insertion.

With this CL, the "Recursive Fibonacci" and "HelloWorld" milestones are
met.

Added are a set of "HL" (for High-Level) invoke intrinsics.  Until we
complete the merging of the Quick & Iceland runtime models the invoke
code sequences are slightly different.  Thus, the Greenland IR needs
to represent invokes at a somewhat higher level than Iceland.  The
test for fast/slow path needs to happen during the lowering of the
HLInvokeXXX intrinsics in both the Quick and Portable paths.

This will generally be the case in the short term - push fast/slow
path determination below the Greenland IR level.  As unification
proceeds, we'll pull as much as makes sense into the common front end.

Change-Id: I0a18edf1be18583c0afdc3f7e10a3e4691968e77
diff --git a/src/compiler/CompilerUtility.h b/src/compiler/CompilerUtility.h
index 5e8e993..da3a237 100644
--- a/src/compiler/CompilerUtility.h
+++ b/src/compiler/CompilerUtility.h
@@ -132,6 +132,7 @@
 const char* oatGetShortyFromTargetIdx(CompilationUnit*, int);
 void oatDumpRegLocTable(RegLocation*, int);
 void oatDumpMemStats(CompilationUnit* cUnit);
+void oatDumpRegLoc(RegLocation loc);
 
 }  // namespace art
 
diff --git a/src/compiler/Frontend.cc b/src/compiler/Frontend.cc
index 70de4fc..d5e6b12 100644
--- a/src/compiler/Frontend.cc
+++ b/src/compiler/Frontend.cc
@@ -765,7 +765,11 @@
   cUnit->numRegs = code_item->registers_size_ - cUnit->numIns;
   cUnit->numOuts = code_item->outs_size_;
 #if defined(ART_USE_QUICK_COMPILER)
-  cUnit->genBitcode = PrettyMethod(method_idx, dex_file).find("Fibonacci.fibonacci") != std::string::npos;
+  if ((PrettyMethod(method_idx, dex_file).find("fibonacci") != std::string::npos)
+      || (PrettyMethod(method_idx, dex_file).find("HelloWorld") != std::string::npos)
+     ) {
+    cUnit->genBitcode = true;
+  }
 #endif
   /* Adjust this value accordingly once inlining is performed */
   cUnit->numDalvikRegisters = code_item->registers_size_;
@@ -782,7 +786,9 @@
         (cUnit->enableDebug & (1 << kDebugVerbose));
   }
 #if defined(ART_USE_QUICK_COMPILER)
-  if (cUnit->genBitcode) cUnit->printMe = true;
+  if (cUnit->genBitcode) {
+    cUnit->printMe = true;
+  }
 #endif
   if (cUnit->instructionSet == kX86) {
     // Disable optimizations on X86 for now
diff --git a/src/compiler/Ralloc.cc b/src/compiler/Ralloc.cc
index a3b3c50..29766e0 100644
--- a/src/compiler/Ralloc.cc
+++ b/src/compiler/Ralloc.cc
@@ -331,6 +331,10 @@
   }
 }
 
+void oatDumpRegLoc(RegLocation loc) {
+  oatDumpRegLocTable(&loc, 1);
+}
+
 static const RegLocation freshLoc = {kLocDalvikFrame, 0, 0, 0, 0, 0, 0, 0, 0,
                                      INVALID_REG, INVALID_REG, INVALID_SREG,
                                      INVALID_SREG};
diff --git a/src/compiler/SSATransformation.cc b/src/compiler/SSATransformation.cc
index aedf4be..6eb0415 100644
--- a/src/compiler/SSATransformation.cc
+++ b/src/compiler/SSATransformation.cc
@@ -816,9 +816,10 @@
  */
 bool insertPhiNodeOperands(CompilationUnit* cUnit, BasicBlock* bb)
 {
-  ArenaBitVector* ssaRegV = cUnit->tempSSARegisterV;
   GrowableListIterator iter;
   MIR *mir;
+  std::vector<int> uses;
+  std::vector<int> incomingArc;
 
   /* Phi nodes are at the beginning of each block */
   for (mir = bb->firstMIRInsn; mir; mir = mir->next) {
@@ -828,7 +829,8 @@
     DCHECK_GE(ssaReg, 0);   // Shouldn't see compiler temps here
     int vReg = SRegToVReg(cUnit, ssaReg);
 
-    oatClearAllBits(ssaRegV);
+    uses.clear();
+    incomingArc.clear();
 
     /* Iterate through the predecessors */
     oatGrowableListIteratorInit(bb->predecessors, &iter);
@@ -837,12 +839,12 @@
          (BasicBlock*)oatGrowableListIteratorNext(&iter);
       if (!predBB) break;
       int ssaReg = predBB->dataFlowInfo->vRegToSSAMap[vReg];
-      oatSetBit(cUnit, ssaRegV, ssaReg);
-      cUnit->tempSSABlockIdV[ssaReg] = predBB->id;
+      uses.push_back(ssaReg);
+      incomingArc.push_back(predBB->id);
     }
 
     /* Count the number of SSA registers for a Dalvik register */
-    int numUses = oatCountSetBits(ssaRegV);
+    int numUses = uses.size();
     mir->ssaRep->numUses = numUses;
     mir->ssaRep->uses =
         (int*) oatNew(cUnit, sizeof(int) * numUses, false, kAllocDFInfo);
@@ -853,17 +855,11 @@
     // TODO: Ugly, rework (but don't burden each MIR/LIR for Phi-only needs)
     mir->dalvikInsn.vB = (intptr_t) incoming;
 
-    ArenaBitVectorIterator phiIterator;
-
-    oatBitVectorIteratorInit(ssaRegV, &phiIterator);
-    int *usePtr = mir->ssaRep->uses;
-
     /* Set the uses array for the phi node */
-    while (true) {
-      int ssaRegIdx = oatBitVectorIteratorNext(&phiIterator);
-      if (ssaRegIdx == -1) break;
-        *usePtr++ = ssaRegIdx;
-        *incoming++ = cUnit->tempSSABlockIdV[ssaRegIdx];
+    int *usePtr = mir->ssaRep->uses;
+    for (int i = 0; i < numUses; i++) {
+      *usePtr++ = uses[i];
+      *incoming++ = incomingArc[i];
     }
   }
 
diff --git a/src/compiler/codegen/CodegenUtil.cc b/src/compiler/codegen/CodegenUtil.cc
index 11982ba..c189eb2 100644
--- a/src/compiler/codegen/CodegenUtil.cc
+++ b/src/compiler/codegen/CodegenUtil.cc
@@ -361,7 +361,6 @@
 /* Dump instructions and constant pool contents */
 void oatCodegenDump(CompilationUnit* cUnit)
 {
-  LOG(INFO) << "/*";
   LOG(INFO) << "Dumping LIR insns for "
             << PrettyMethod(cUnit->method_idx, *cUnit->dex_file);
   LIR* lirInsn;
diff --git a/src/compiler/codegen/CompilerCodegen.h b/src/compiler/codegen/CompilerCodegen.h
index 4508416..61ea6b6 100644
--- a/src/compiler/codegen/CompilerCodegen.h
+++ b/src/compiler/codegen/CompilerCodegen.h
@@ -31,6 +31,8 @@
 void genFusedFPCmpBranch(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir,
                          bool gtBias, bool isDouble);
 
+CallInfo* oatNewCallInfo(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir,
+                         InvokeType type, bool isRange);
 
 /* Lower middle-level IR to low-level IR for the whole method */
 void oatMethodMIR2LIR(CompilationUnit* cUnit);
diff --git a/src/compiler/codegen/GenCommon.cc b/src/compiler/codegen/GenCommon.cc
index 29f3cca..a1c7645 100644
--- a/src/compiler/codegen/GenCommon.cc
+++ b/src/compiler/codegen/GenCommon.cc
@@ -1083,8 +1083,8 @@
   }
 }
 
-void genConstClass(CompilationUnit* cUnit, uint32_t type_idx, RegLocation rlDest,
-                   RegLocation rlSrc)
+void genConstClass(CompilationUnit* cUnit, uint32_t type_idx,
+                   RegLocation rlDest)
 {
   RegLocation rlMethod = loadCurrMethod(cUnit);
   int resReg = oatAllocTemp(cUnit);
@@ -1145,8 +1145,8 @@
   }
 }
 
-void genConstString(CompilationUnit* cUnit, uint32_t string_idx, RegLocation rlDest,
-          RegLocation rlSrc)
+void genConstString(CompilationUnit* cUnit, uint32_t string_idx,
+                    RegLocation rlDest)
 {
   /* NOTE: Most strings should be available at compile time */
   int32_t offset_of_string = Array::DataOffset(sizeof(String*)).Int32Value() +
diff --git a/src/compiler/codegen/MethodBitcode.cc b/src/compiler/codegen/MethodBitcode.cc
index 3b6b087..f13a473 100644
--- a/src/compiler/codegen/MethodBitcode.cc
+++ b/src/compiler/codegen/MethodBitcode.cc
@@ -28,7 +28,7 @@
 #include <llvm/Instructions.h>
 #include <llvm/Support/Casting.h>
 
-const char* labelFormat = "L0x%x_d";
+const char* labelFormat = "L0x%x_%d";
 
 namespace art {
 extern const RegLocation badLoc;
@@ -193,6 +193,8 @@
   condValue->setName(StringPrintf("t%d", cUnit->tempName++));
   cUnit->irb->CreateCondBr(condValue, getLLVMBlock(cUnit, bb->taken->id),
                            getLLVMBlock(cUnit, bb->fallThrough->id));
+  // Don't redo the fallthrough branch in the BB driver
+  bb->fallThrough = NULL;
 }
 
 void convertCompareZeroAndBranch(CompilationUnit* cUnit, BasicBlock* bb,
@@ -212,6 +214,8 @@
   condValue->setName(StringPrintf("t%d", cUnit->tempName++));
   cUnit->irb->CreateCondBr(condValue, getLLVMBlock(cUnit, bb->taken->id),
                            getLLVMBlock(cUnit, bb->fallThrough->id));
+  // Don't redo the fallthrough branch in the BB driver
+  bb->fallThrough = NULL;
 }
 
 llvm::Value* genDivModOp(CompilationUnit* cUnit, bool isDiv, bool isLong,
@@ -314,6 +318,70 @@
   defineValue(cUnit, res, rlDest.origSReg);
 }
 
+void convertInvoke(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir,
+                   InvokeType invokeType, bool isRange)
+{
+  CallInfo* info = oatNewCallInfo(cUnit, bb, mir, invokeType, isRange);
+  llvm::SmallVector<llvm::Value*, 10> args;
+  // Insert the invokeType
+  args.push_back(cUnit->irb->getInt32(static_cast<int>(invokeType)));
+  // Insert the method_idx
+  args.push_back(cUnit->irb->getInt32(info->index));
+  // Insert the optimization flags
+  args.push_back(cUnit->irb->getInt32(info->optFlags));
+  // Now, insert the actual arguments
+  if (cUnit->printMe) {
+    LOG(INFO) << "Building Invoke info";
+  }
+  for (int i = 0; i < info->numArgWords;) {
+    if (cUnit->printMe) {
+      oatDumpRegLoc(info->args[i]);
+    }
+    llvm::Value* val = getLLVMValue(cUnit, info->args[i].origSReg);
+    args.push_back(val);
+    i += info->args[i].wide ? 2 : 1;
+  }
+  /*
+   * Choose the invoke return type based on actual usage.  Note: may
+   * be different than shorty.  For example, if a function return value
+   * is not used, we'll treat this as a void invoke.
+   */
+  greenland::IntrinsicHelper::IntrinsicId id;
+  if (info->result.location == kLocInvalid) {
+    id = greenland::IntrinsicHelper::HLInvokeVoid;
+  } else {
+    if (info->result.wide) {
+      if (info->result.fp) {
+        id = greenland::IntrinsicHelper::HLInvokeDouble;
+      } else {
+        id = greenland::IntrinsicHelper::HLInvokeFloat;
+      }
+    } else if (info->result.ref) {
+        id = greenland::IntrinsicHelper::HLInvokeObj;
+    } else if (info->result.fp) {
+        id = greenland::IntrinsicHelper::HLInvokeFloat;
+    } else {
+        id = greenland::IntrinsicHelper::HLInvokeInt;
+    }
+  }
+  llvm::Function* intr = cUnit->intrinsic_helper->GetIntrinsicFunction(id);
+  llvm::Value* res = cUnit->irb->CreateCall(intr, args);
+  if (info->result.location != kLocInvalid) {
+    defineValue(cUnit, res, info->result.origSReg);
+  }
+}
+
+void convertConstString(CompilationUnit* cUnit, BasicBlock* bb,
+                        uint32_t string_idx, RegLocation rlDest)
+{
+  greenland::IntrinsicHelper::IntrinsicId id;
+  id = greenland::IntrinsicHelper::ConstString;
+  llvm::Function* intr = cUnit->intrinsic_helper->GetIntrinsicFunction(id);
+  llvm::Value* index = cUnit->irb->getInt32(string_idx);
+  llvm::Value* res = cUnit->irb->CreateCall(intr, index);
+  defineValue(cUnit, res, rlDest.origSReg);
+}
+
 /*
  * Target-independent code generation.  Use only high-level
  * load/store utilities here, or target-dependent genXX() handlers
@@ -327,6 +395,9 @@
   RegLocation rlDest = badLoc;
   RegLocation rlResult = badLoc;
   Instruction::Code opcode = mir->dalvikInsn.opcode;
+  uint32_t vB = mir->dalvikInsn.vB;
+  uint32_t vC = mir->dalvikInsn.vC;
+
   bool objectDefinition = false;
 
   /* Prep Src and Dest locations */
@@ -398,7 +469,7 @@
     case Instruction::CONST:
     case Instruction::CONST_4:
     case Instruction::CONST_16: {
-        llvm::Constant* immValue = cUnit->irb->GetJInt(mir->dalvikInsn.vB);
+        llvm::Constant* immValue = cUnit->irb->GetJInt(vB);
         llvm::Value* res = emitConst(cUnit, immValue, rlDest);
         defineValue(cUnit, res, rlDest.origSReg);
       }
@@ -406,14 +477,14 @@
 
     case Instruction::CONST_WIDE_16:
     case Instruction::CONST_WIDE_32: {
-        llvm::Constant* immValue = cUnit->irb->GetJLong(mir->dalvikInsn.vB);
+        llvm::Constant* immValue = cUnit->irb->GetJLong(vB);
         llvm::Value* res = emitConst(cUnit, immValue, rlDest);
         defineValue(cUnit, res, rlDest.origSReg);
       }
       break;
 
     case Instruction::CONST_HIGH16: {
-        llvm::Constant* immValue = cUnit->irb->GetJInt(mir->dalvikInsn.vB << 16);
+        llvm::Constant* immValue = cUnit->irb->GetJInt(vB << 16);
         llvm::Value* res = emitConst(cUnit, immValue, rlDest);
         defineValue(cUnit, res, rlDest.origSReg);
       }
@@ -426,7 +497,7 @@
         defineValue(cUnit, res, rlDest.origSReg);
     }
     case Instruction::CONST_WIDE_HIGH16: {
-        int64_t imm = static_cast<int64_t>(mir->dalvikInsn.vB) << 48;
+        int64_t imm = static_cast<int64_t>(vB) << 48;
         llvm::Constant* immValue = cUnit->irb->GetJLong(imm);
         llvm::Value* res = emitConst(cUnit, immValue, rlDest);
         defineValue(cUnit, res, rlDest.origSReg);
@@ -570,44 +641,44 @@
 
     case Instruction::ADD_INT_LIT16:
     case Instruction::ADD_INT_LIT8:
-      convertArithOpLit(cUnit, kOpAdd, rlDest, rlSrc[0], mir->dalvikInsn.vC);
+      convertArithOpLit(cUnit, kOpAdd, rlDest, rlSrc[0], vC);
       break;
     case Instruction::RSUB_INT:
     case Instruction::RSUB_INT_LIT8:
-      convertArithOpLit(cUnit, kOpRsub, rlDest, rlSrc[0], mir->dalvikInsn.vC);
+      convertArithOpLit(cUnit, kOpRsub, rlDest, rlSrc[0], vC);
       break;
     case Instruction::MUL_INT_LIT16:
     case Instruction::MUL_INT_LIT8:
-      convertArithOpLit(cUnit, kOpMul, rlDest, rlSrc[0], mir->dalvikInsn.vC);
+      convertArithOpLit(cUnit, kOpMul, rlDest, rlSrc[0], vC);
       break;
     case Instruction::DIV_INT_LIT16:
     case Instruction::DIV_INT_LIT8:
-      convertArithOpLit(cUnit, kOpDiv, rlDest, rlSrc[0], mir->dalvikInsn.vC);
+      convertArithOpLit(cUnit, kOpDiv, rlDest, rlSrc[0], vC);
       break;
     case Instruction::REM_INT_LIT16:
     case Instruction::REM_INT_LIT8:
-      convertArithOpLit(cUnit, kOpRem, rlDest, rlSrc[0], mir->dalvikInsn.vC);
+      convertArithOpLit(cUnit, kOpRem, rlDest, rlSrc[0], vC);
       break;
     case Instruction::AND_INT_LIT16:
     case Instruction::AND_INT_LIT8:
-      convertArithOpLit(cUnit, kOpAnd, rlDest, rlSrc[0], mir->dalvikInsn.vC);
+      convertArithOpLit(cUnit, kOpAnd, rlDest, rlSrc[0], vC);
       break;
     case Instruction::OR_INT_LIT16:
     case Instruction::OR_INT_LIT8:
-      convertArithOpLit(cUnit, kOpOr, rlDest, rlSrc[0], mir->dalvikInsn.vC);
+      convertArithOpLit(cUnit, kOpOr, rlDest, rlSrc[0], vC);
       break;
     case Instruction::XOR_INT_LIT16:
     case Instruction::XOR_INT_LIT8:
-      convertArithOpLit(cUnit, kOpXor, rlDest, rlSrc[0], mir->dalvikInsn.vC);
+      convertArithOpLit(cUnit, kOpXor, rlDest, rlSrc[0], vC);
       break;
     case Instruction::SHL_INT_LIT8:
-      convertArithOpLit(cUnit, kOpLsl, rlDest, rlSrc[0], mir->dalvikInsn.vC);
+      convertArithOpLit(cUnit, kOpLsl, rlDest, rlSrc[0], vC);
       break;
     case Instruction::SHR_INT_LIT8:
-      convertArithOpLit(cUnit, kOpLsr, rlDest, rlSrc[0], mir->dalvikInsn.vC);
+      convertArithOpLit(cUnit, kOpLsr, rlDest, rlSrc[0], vC);
       break;
     case Instruction::USHR_INT_LIT8:
-      convertArithOpLit(cUnit, kOpAsr, rlDest, rlSrc[0], mir->dalvikInsn.vC);
+      convertArithOpLit(cUnit, kOpAsr, rlDest, rlSrc[0], vC);
       break;
 
     case Instruction::ADD_FLOAT:
@@ -645,6 +716,47 @@
       convertFPArithOp(cUnit, kOpRem, rlDest, rlSrc[0], rlSrc[1]);
       break;
 
+    case Instruction::INVOKE_STATIC:
+      convertInvoke(cUnit, bb, mir, kStatic, false /*range*/);
+      break;
+    case Instruction::INVOKE_STATIC_RANGE:
+      convertInvoke(cUnit, bb, mir, kStatic, true /*range*/);
+      break;
+
+    case Instruction::INVOKE_DIRECT:
+      convertInvoke(cUnit, bb,  mir, kDirect, false /*range*/);
+      break;
+    case Instruction::INVOKE_DIRECT_RANGE:
+      convertInvoke(cUnit, bb, mir, kDirect, true /*range*/);
+      break;
+
+    case Instruction::INVOKE_VIRTUAL:
+      convertInvoke(cUnit, bb, mir, kVirtual, false /*range*/);
+      break;
+    case Instruction::INVOKE_VIRTUAL_RANGE:
+      convertInvoke(cUnit, bb, mir, kVirtual, true /*range*/);
+      break;
+
+    case Instruction::INVOKE_SUPER:
+      convertInvoke(cUnit, bb, mir, kSuper, false /*range*/);
+      break;
+    case Instruction::INVOKE_SUPER_RANGE:
+      convertInvoke(cUnit, bb, mir, kSuper, true /*range*/);
+      break;
+
+    case Instruction::INVOKE_INTERFACE:
+      convertInvoke(cUnit, bb, mir, kInterface, false /*range*/);
+      break;
+    case Instruction::INVOKE_INTERFACE_RANGE:
+      convertInvoke(cUnit, bb, mir, kInterface, true /*range*/);
+      break;
+
+    case Instruction::CONST_STRING:
+    case Instruction::CONST_STRING_JUMBO:
+      convertConstString(cUnit, bb, vB, rlDest);
+      break;
+
+
 #if 0
 
     case Instruction::MOVE_EXCEPTION: {
@@ -715,11 +827,6 @@
       storeValue(cUnit, rlDest, rlResult);
       break;
 
-    case Instruction::CONST_STRING:
-    case Instruction::CONST_STRING_JUMBO:
-      genConstString(cUnit, mir, rlDest, rlSrc[0]);
-      break;
-
     case Instruction::CONST_CLASS:
       genConstClass(cUnit, mir, rlDest, rlSrc[0]);
       break;
@@ -884,41 +991,6 @@
       genSput(cUnit, mir, rlSrc[0], true, false);
       break;
 
-    case Instruction::INVOKE_STATIC_RANGE:
-      genInvoke(cUnit, bb, mir, kStatic, true /*range*/);
-      break;
-    case Instruction::INVOKE_STATIC:
-      genInvoke(cUnit, bb, mir, kStatic, false /*range*/);
-      break;
-
-    case Instruction::INVOKE_DIRECT:
-      genInvoke(cUnit, bb,  mir, kDirect, false /*range*/);
-      break;
-    case Instruction::INVOKE_DIRECT_RANGE:
-      genInvoke(cUnit, bb, mir, kDirect, true /*range*/);
-      break;
-
-    case Instruction::INVOKE_VIRTUAL:
-      genInvoke(cUnit, bb, mir, kVirtual, false /*range*/);
-      break;
-    case Instruction::INVOKE_VIRTUAL_RANGE:
-      genInvoke(cUnit, bb, mir, kVirtual, true /*range*/);
-      break;
-
-    case Instruction::INVOKE_SUPER:
-      genInvoke(cUnit, bb, mir, kSuper, false /*range*/);
-      break;
-    case Instruction::INVOKE_SUPER_RANGE:
-      genInvoke(cUnit, bb, mir, kSuper, true /*range*/);
-      break;
-
-    case Instruction::INVOKE_INTERFACE:
-      genInvoke(cUnit, bb, mir, kInterface, false /*range*/);
-      break;
-    case Instruction::INVOKE_INTERFACE_RANGE:
-      genInvoke(cUnit, bb, mir, kInterface, true /*range*/);
-      break;
-
     case Instruction::NEG_INT:
     case Instruction::NOT_INT:
       res = genArithOpInt(cUnit, mir, rlDest, rlSrc[0], rlSrc[0]);
@@ -1114,6 +1186,15 @@
      * path, it will need to be regenereated.
      */
     return false;
+  } else if (bb->blockType == kExceptionHandling) {
+    /*
+     * Because we're deferring null checking, delete the associated empty
+     * exception block.
+     * TODO: add new block type for exception blocks that we generate
+     * greenland code for.
+     */
+    llvmBB->eraseFromParent();
+    return false;
   }
 
   for (MIR* mir = bb->firstMIRInsn; mir; mir = mir->next) {
@@ -1144,7 +1225,7 @@
     }
   }
 
-  if ((bb->taken == NULL) && (bb->fallThrough != NULL) && !bb->hasReturn) {
+  if ((bb->fallThrough != NULL) && !bb->hasReturn) {
     cUnit->irb->CreateBr(getLLVMBlock(cUnit, bb->fallThrough->id));
   }
 
@@ -1291,21 +1372,19 @@
 
   llvm::verifyFunction(*cUnit->func, llvm::PrintMessageAction);
 
-    // Write bitcode to file
-    std::string errmsg;
+  // Write bitcode to file
+  std::string errmsg;
 
-    llvm::OwningPtr<llvm::tool_output_file> out_file(
+  llvm::OwningPtr<llvm::tool_output_file> out_file(
       new llvm::tool_output_file("/tmp/foo.bc", errmsg,
                                  llvm::raw_fd_ostream::F_Binary));
 
-    if (!errmsg.empty()) {
-      LOG(ERROR) << "Failed to create bitcode output file: " << errmsg;
-    }
+  if (!errmsg.empty()) {
+    LOG(ERROR) << "Failed to create bitcode output file: " << errmsg;
+  }
 
-    llvm::WriteBitcodeToFile(cUnit->module, out_file->os());
-    out_file->keep();
-
-
+  llvm::WriteBitcodeToFile(cUnit->module, out_file->os());
+  out_file->keep();
 }
 
 RegLocation getLoc(CompilationUnit* cUnit, llvm::Value* val) {
@@ -1438,6 +1517,7 @@
   ConditionCode res = kCondAl;
   switch(llvmCond) {
     case llvm::ICmpInst::ICMP_NE: res = kCondNe; break;
+    case llvm::ICmpInst::ICMP_EQ: res = kCondEq; break;
     case llvm::ICmpInst::ICMP_SGT: res = kCondGt; break;
     default: LOG(FATAL) << "Unexpected llvm condition";
   }
@@ -1531,6 +1611,65 @@
   }
 }
 
+void cvtConstString(CompilationUnit* cUnit, llvm::CallInst* callInst)
+{
+  DCHECK(callInst->getNumArgOperands() == 1);
+  llvm::ConstantInt* stringIdxVal =
+      llvm::dyn_cast<llvm::ConstantInt>(callInst->getArgOperand(0));
+  uint32_t stringIdx = stringIdxVal->getZExtValue();
+  RegLocation rlDest = getLoc(cUnit, callInst);
+  genConstString(cUnit, stringIdx, rlDest);
+}
+
+void cvtInvoke(CompilationUnit* cUnit, llvm::CallInst* callInst,
+               greenland::JType jtype)
+{
+  CallInfo* info = (CallInfo*)oatNew(cUnit, sizeof(CallInfo), true,
+                                         kAllocMisc);
+  if (jtype == greenland::kVoid) {
+    info->result.location = kLocInvalid;
+  } else {
+    info->result = getLoc(cUnit, callInst);
+  }
+  llvm::ConstantInt* invokeTypeVal =
+      llvm::dyn_cast<llvm::ConstantInt>(callInst->getArgOperand(0));
+  llvm::ConstantInt* methodIndexVal =
+      llvm::dyn_cast<llvm::ConstantInt>(callInst->getArgOperand(1));
+  llvm::ConstantInt* optFlagsVal =
+      llvm::dyn_cast<llvm::ConstantInt>(callInst->getArgOperand(2));
+  info->type = static_cast<InvokeType>(invokeTypeVal->getZExtValue());
+  info->index = methodIndexVal->getZExtValue();
+  info->optFlags = optFlagsVal->getZExtValue();
+  info->offset = cUnit->currentDalvikOffset;
+
+  // FIXME - rework such that we no longer need isRange
+  info->isRange = false;
+
+  // Count the argument words, and then build argument array.
+  info->numArgWords = 0;
+  for (unsigned int i = 3; i < callInst->getNumArgOperands(); i++) {
+    RegLocation tLoc = getLoc(cUnit, callInst->getArgOperand(i));
+    info->numArgWords += tLoc.wide ? 2 : 1;
+  }
+  info->args = (info->numArgWords == 0) ? NULL : (RegLocation*)
+      oatNew(cUnit, sizeof(RegLocation) * info->numArgWords, false, kAllocMisc);
+  // Now, fill in the location records, synthesizing high loc of wide vals
+  for (int i = 3, next = 0; next < info->numArgWords;) {
+    info->args[next] = getLoc(cUnit, callInst->getArgOperand(i));
+    if (cUnit->printMe) {
+      oatDumpRegLoc(info->args[next]);
+    }
+    if (info->args[next].wide) {
+      next++;
+      // TODO: Might make sense to mark this as an invalid loc
+      info->args[next].origSReg = info->args[next-1].origSReg+1;
+      info->args[next].sRegLow = info->args[next-1].sRegLow+1;
+    }
+    next++;
+  }
+  genInvoke(cUnit, info);
+}
+
 bool methodBitcodeBlockCodeGen(CompilationUnit* cUnit, llvm::BasicBlock* bb)
 {
   bool isEntry = (bb == &cUnit->func->getEntryBlock());
@@ -1554,8 +1693,7 @@
   //TODO: restore oat incoming liveness optimization
   oatClobberAllRegs(cUnit);
 
-  //LIR* headLIR = NULL;
-
+  LIR* headLIR = NULL;
 
   if (isEntry) {
     cUnit->currentDalvikOffset = 0;
@@ -1575,6 +1713,32 @@
       cUnit->currentDalvikOffset = dexOffsetValue->getZExtValue();
     }
 
+    oatResetRegPool(cUnit);
+    if (cUnit->disableOpt & (1 << kTrackLiveTemps)) {
+      oatClobberAllRegs(cUnit);
+    }
+
+    if (cUnit->disableOpt & (1 << kSuppressLoads)) {
+      oatResetDefTracking(cUnit);
+    }
+
+#ifndef NDEBUG
+    /* Reset temp tracking sanity check */
+    cUnit->liveSReg = INVALID_SREG;
+#endif
+
+    LIR* boundaryLIR;
+    const char* instStr = "boundary";
+    boundaryLIR = newLIR1(cUnit, kPseudoDalvikByteCodeBoundary,
+                          (intptr_t) instStr);
+    cUnit->boundaryMap.Overwrite(cUnit->currentDalvikOffset, boundaryLIR);
+
+    /* Remember the first LIR for thisl block*/
+    if (headLIR == NULL) {
+      headLIR = boundaryLIR;
+      headLIR->defMask = ENCODE_ALL;
+    }
+
     switch(opcode) {
 
       case llvm::Instruction::ICmp: {
@@ -1597,6 +1761,7 @@
           switch (id) {
             case greenland::IntrinsicHelper::AllocaShadowFrame:
             case greenland::IntrinsicHelper::SetShadowFrameEntry:
+            case greenland::IntrinsicHelper::PopShadowFrame:
               // Ignore shadow frame stuff for quick compiler
               break;
             case greenland::IntrinsicHelper::CopyInt:
@@ -1619,6 +1784,15 @@
             case greenland::IntrinsicHelper::CheckSuspend:
               genSuspendTest(cUnit, 0 /* optFlags already applied */);
               break;
+            case greenland::IntrinsicHelper::HLInvokeInt:
+              cvtInvoke(cUnit, callInst, greenland::kInt);
+              break;
+            case greenland::IntrinsicHelper::HLInvokeVoid:
+              cvtInvoke(cUnit, callInst, greenland::kVoid);
+              break;
+            case greenland::IntrinsicHelper::ConstString:
+              cvtConstString(cUnit, callInst);
+              break;
             case greenland::IntrinsicHelper::UnknownId:
               cvtCall(cUnit, callInst, callee);
               break;
@@ -1694,6 +1868,10 @@
         LOG(FATAL) << "Unknown llvm opcode: " << opcode; break;
     }
   }
+
+  if (headLIR != NULL) {
+    oatApplyLocalOptimizations(cUnit, headLIR, cUnit->lastLIRInsn);
+  }
   return false;
 }
 
diff --git a/src/compiler/codegen/MethodCodegenDriver.cc b/src/compiler/codegen/MethodCodegenDriver.cc
index 7f98f07..aab323b 100644
--- a/src/compiler/codegen/MethodCodegenDriver.cc
+++ b/src/compiler/codegen/MethodCodegenDriver.cc
@@ -174,8 +174,8 @@
  * high-word loc for wide arguments.  Also pull up any following
  * MOVE_RESULT and incorporate it into the invoke.
  */
-CallInfo* newCallInfo(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir,
-                      InvokeType type, bool isRange)
+CallInfo* oatNewCallInfo(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir,
+                         InvokeType type, bool isRange)
 {
   CallInfo* info = (CallInfo*)oatNew(cUnit, sizeof(CallInfo), true,
                                          kAllocMisc);
@@ -407,11 +407,11 @@
 
     case Instruction::CONST_STRING:
     case Instruction::CONST_STRING_JUMBO:
-      genConstString(cUnit, vB, rlDest, rlSrc[0]);
+      genConstString(cUnit, vB, rlDest);
       break;
 
     case Instruction::CONST_CLASS:
-      genConstClass(cUnit, vB, rlDest, rlSrc[0]);
+      genConstClass(cUnit, vB, rlDest);
       break;
 
     case Instruction::FILL_ARRAY_DATA:
@@ -419,12 +419,12 @@
       break;
 
     case Instruction::FILLED_NEW_ARRAY:
-      genFilledNewArray(cUnit, newCallInfo(cUnit, bb, mir, kStatic,
+      genFilledNewArray(cUnit, oatNewCallInfo(cUnit, bb, mir, kStatic,
                         false /* not range */));
       break;
 
     case Instruction::FILLED_NEW_ARRAY_RANGE:
-      genFilledNewArray(cUnit, newCallInfo(cUnit, bb, mir, kStatic,
+      genFilledNewArray(cUnit, oatNewCallInfo(cUnit, bb, mir, kStatic,
                         true /* range */));
       break;
 
@@ -622,38 +622,38 @@
       break;
 
     case Instruction::INVOKE_STATIC_RANGE:
-      genInvoke(cUnit, newCallInfo(cUnit, bb, mir, kStatic, true));
+      genInvoke(cUnit, oatNewCallInfo(cUnit, bb, mir, kStatic, true));
       break;
     case Instruction::INVOKE_STATIC:
-      genInvoke(cUnit, newCallInfo(cUnit, bb, mir, kStatic, false));
+      genInvoke(cUnit, oatNewCallInfo(cUnit, bb, mir, kStatic, false));
       break;
 
     case Instruction::INVOKE_DIRECT:
-      genInvoke(cUnit, newCallInfo(cUnit, bb, mir, kDirect, false));
+      genInvoke(cUnit, oatNewCallInfo(cUnit, bb, mir, kDirect, false));
       break;
     case Instruction::INVOKE_DIRECT_RANGE:
-      genInvoke(cUnit, newCallInfo(cUnit, bb, mir, kDirect, true));
+      genInvoke(cUnit, oatNewCallInfo(cUnit, bb, mir, kDirect, true));
       break;
 
     case Instruction::INVOKE_VIRTUAL:
-      genInvoke(cUnit, newCallInfo(cUnit, bb, mir, kVirtual, false));
+      genInvoke(cUnit, oatNewCallInfo(cUnit, bb, mir, kVirtual, false));
       break;
     case Instruction::INVOKE_VIRTUAL_RANGE:
-      genInvoke(cUnit, newCallInfo(cUnit, bb, mir, kVirtual, true));
+      genInvoke(cUnit, oatNewCallInfo(cUnit, bb, mir, kVirtual, true));
       break;
 
     case Instruction::INVOKE_SUPER:
-      genInvoke(cUnit, newCallInfo(cUnit, bb, mir, kSuper, false));
+      genInvoke(cUnit, oatNewCallInfo(cUnit, bb, mir, kSuper, false));
       break;
     case Instruction::INVOKE_SUPER_RANGE:
-      genInvoke(cUnit, newCallInfo(cUnit, bb, mir, kSuper, true));
+      genInvoke(cUnit, oatNewCallInfo(cUnit, bb, mir, kSuper, true));
       break;
 
     case Instruction::INVOKE_INTERFACE:
-      genInvoke(cUnit, newCallInfo(cUnit, bb, mir, kInterface, false));
+      genInvoke(cUnit, oatNewCallInfo(cUnit, bb, mir, kInterface, false));
       break;
     case Instruction::INVOKE_INTERFACE_RANGE:
-      genInvoke(cUnit, newCallInfo(cUnit, bb, mir, kInterface, true));
+      genInvoke(cUnit, oatNewCallInfo(cUnit, bb, mir, kInterface, true));
       break;
 
     case Instruction::NEG_INT:
diff --git a/src/greenland/intrinsic_func_list.def b/src/greenland/intrinsic_func_list.def
index 87ce43b..d2e0a17 100644
--- a/src/greenland/intrinsic_func_list.def
+++ b/src/greenland/intrinsic_func_list.def
@@ -449,6 +449,61 @@
                           _EXPAND_ARG3(kInt32ConstantTy, kJavaMethodTy, kJavaThreadTy))
 
 //----------------------------------------------------------------------------
+// High-level Invokes (fast-path determination not yet performed)
+//
+// NOTE: We expect these intrinsics to be temporary.  Once calling conventions are
+//       fully merged, the unified front end will lower down to the
+//       InvokeRetxxx() intrinsics in the next section and these will be
+//       removed.
+//
+// arg0: InvokeType
+// arg1: method_idx
+// arg2: optimization_flags (primary to note whether null checking is needed)
+// [arg3..argN]: actual arguments
+//----------------------------------------------------------------------------
+// INVOKE method returns void
+_EVAL_DEF_INTRINSICS_FUNC(HLInvokeVoid,
+                          dex_lang_hl_invoke.void,
+                          kAttrNone,
+                          kVoidTy,
+                          _EXPAND_ARG1(kVarArgTy))
+
+// INVOKE method returns object
+_EVAL_DEF_INTRINSICS_FUNC(HLInvokeObj,
+                          dex_lang_hl_invoke.obj,
+                          kAttrNone,
+                          kJavaObjectTy,
+                          _EXPAND_ARG1(kVarArgTy))
+
+// INVOKE method returns int
+_EVAL_DEF_INTRINSICS_FUNC(HLInvokeInt,
+                          dex_lang_hl_invoke.i32,
+                          kAttrNone,
+                          kInt32Ty,
+                          _EXPAND_ARG1(kVarArgTy))
+
+// INVOKE method returns float
+_EVAL_DEF_INTRINSICS_FUNC(HLInvokeFloat,
+                          dex_lang_hl_invoke.f32,
+                          kAttrNone,
+                          kFloatTy,
+                          _EXPAND_ARG1(kVarArgTy))
+
+// INVOKE method returns long
+_EVAL_DEF_INTRINSICS_FUNC(HLInvokeLong,
+                          dex_lang_hl_invoke.i64,
+                          kAttrNone,
+                          kInt64Ty,
+                          _EXPAND_ARG1(kVarArgTy))
+
+// INVOKE method returns double
+_EVAL_DEF_INTRINSICS_FUNC(HLInvokeDouble,
+                          dex_lang_hl_invoke.f64,
+                          kAttrNone,
+                          kDoubleTy,
+                          _EXPAND_ARG1(kVarArgTy))
+
+//----------------------------------------------------------------------------
 // Invoke
 //----------------------------------------------------------------------------
 
diff --git a/test/Fibonacci/Fibonacci.java b/test/Fibonacci/Fibonacci.java
index 9c94367..a5c4e4b 100644
--- a/test/Fibonacci/Fibonacci.java
+++ b/test/Fibonacci/Fibonacci.java
@@ -16,6 +16,8 @@
 
 class Fibonacci {
 
+/*
+    // Iterative version
     static int fibonacci(int n) {
         if (n == 0) {
             return 0;
@@ -29,12 +31,22 @@
         }
         return y;
     }
+*/
+
+   // Recursive version
+   static int fibonacci(int n) {
+        if ((n == 0) || (n == 1)) {
+            return n;
+        } else {
+            return fibonacci(n - 1) + (fibonacci(n - 2));
+        }
+    }
 
     public static void main(String[] args) {
         String arg = (args.length > 0) ? args[0] : "10";
         try {
             int x = Integer.parseInt(arg);
-            int y = fibonacci(x); /* to warm up cache */
+            int y = fibonacci(x);
             System.out.printf("fibonacci(%d)=%d\n", x, y);
             y = fibonacci(x + 1);
             System.out.printf("fibonacci(%d)=%d\n", x + 1, y);