Merge "Improve ARM assembler assertions." into ics-mr1-plus-art
diff --git a/src/compiler/codegen/GenCommon.cc b/src/compiler/codegen/GenCommon.cc
index c9ba285..2209084 100644
--- a/src/compiler/codegen/GenCommon.cc
+++ b/src/compiler/codegen/GenCommon.cc
@@ -582,7 +582,8 @@
 #if defined(TARGET_ARM)
     int rVal = rLR;  // Using a lot of temps, rLR is known free here
 #elif defined(TARGET_X86)
-    int rVal = rSrc;
+    oatFreeTemp(cUnit, rRET0);
+    int rVal = oatAllocTemp(cUnit);
 #else
     int rVal = oatAllocTemp(cUnit);
 #endif
@@ -609,6 +610,11 @@
     opRegImm(cUnit, kOpSub, rIdx, 1);
     opCmpImmBranch(cUnit, kCondGe, rIdx, 0, target);
 #endif
+#if defined(TARGET_X86)
+    // Restore the target pointer
+    opRegRegImm(cUnit, kOpAdd, rRET0, rDst,
+                -Array::DataOffset(component_size).Int32Value());
+#endif
   } else if (!isRange) {
     // TUNING: interleave
     for (unsigned int i = 0; i < dInsn->vA; i++) {
diff --git a/src/compiler/codegen/x86/Assemble.cc b/src/compiler/codegen/x86/Assemble.cc
index b9605cc..671e728 100644
--- a/src/compiler/codegen/x86/Assemble.cc
+++ b/src/compiler/codegen/x86/Assemble.cc
@@ -262,8 +262,8 @@
   EXT_0F_ENCODING_MAP(Addss,     0xF3, 0x58),
   EXT_0F_ENCODING_MAP(Mulsd,     0xF2, 0x59),
   EXT_0F_ENCODING_MAP(Mulss,     0xF3, 0x59),
-  EXT_0F_ENCODING_MAP(Cvtss2sd,  0xF2, 0x5A),
-  EXT_0F_ENCODING_MAP(Cvtsd2ss,  0xF3, 0x5A),
+  EXT_0F_ENCODING_MAP(Cvtsd2ss,  0xF2, 0x5A),
+  EXT_0F_ENCODING_MAP(Cvtss2sd,  0xF3, 0x5A),
   EXT_0F_ENCODING_MAP(Subsd,     0xF2, 0x5C),
   EXT_0F_ENCODING_MAP(Subss,     0xF3, 0x5C),
   EXT_0F_ENCODING_MAP(Divsd,     0xF2, 0x5E),
diff --git a/src/compiler/codegen/x86/FP/X86FP.cc b/src/compiler/codegen/x86/FP/X86FP.cc
index 24cd7d3..e6b47d2 100644
--- a/src/compiler/codegen/x86/FP/X86FP.cc
+++ b/src/compiler/codegen/x86/FP/X86FP.cc
@@ -44,8 +44,12 @@
       op = kX86MulssRR;
       break;
     case Instruction::NEG_FLOAT:
-      UNIMPLEMENTED(WARNING) << "inline fneg"; // pxor xmm, [0x80000000]
-                                                             // fall-through
+      rlSrc1 = loadValue(cUnit, rlSrc1, kFPReg);
+      rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
+      newLIR2(cUnit, kX86XorpsRR, rlResult.lowReg, rlResult.lowReg);
+      newLIR2(cUnit, kX86SubssRR, rlResult.lowReg, rlSrc1.lowReg);
+      storeValue(cUnit, rlDest, rlResult);
+      return false;
     case Instruction::REM_FLOAT_2ADDR:
     case Instruction::REM_FLOAT: {
       return genArithOpFloatPortable(cUnit, mir, rlDest, rlSrc1, rlSrc2);
@@ -91,6 +95,12 @@
       op = kX86MulsdRR;
       break;
     case Instruction::NEG_DOUBLE:
+      rlSrc1 = loadValueWide(cUnit, rlSrc1, kFPReg);
+      rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
+      newLIR2(cUnit, kX86XorpsRR, rlResult.lowReg, rlResult.lowReg);
+      newLIR2(cUnit, kX86SubsdRR, rlResult.lowReg, rlSrc1.lowReg);
+      storeValueWide(cUnit, rlDest, rlResult);
+      return false;
     case Instruction::REM_DOUBLE_2ADDR:
     case Instruction::REM_DOUBLE: {
       return genArithOpDoublePortable(cUnit, mir, rlDest, rlSrc1, rlSrc2);
@@ -124,9 +134,7 @@
   RegLocation rlDest;
   X86OpCode op = kX86Nop;
   int srcReg;
-  int tempReg;
   RegLocation rlResult;
-  LIR* branch = NULL;
   switch (opcode) {
     case Instruction::INT_TO_FLOAT:
       longSrc = false;
@@ -152,40 +160,52 @@
       rcSrc = kCoreReg;
       op = kX86Cvtsi2sdRR;
       break;
-    case Instruction::FLOAT_TO_INT:
+    case Instruction::FLOAT_TO_INT: {
       rlSrc = oatGetSrc(cUnit, mir, 0);
       rlSrc = loadValue(cUnit, rlSrc, kFPReg);
       srcReg = rlSrc.lowReg;
       rlDest = oatGetDest(cUnit, mir, 0);
       oatClobberSReg(cUnit, rlDest.sRegLow);
       rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
-      tempReg = oatAllocTempFloat(cUnit);
+      int tempReg = oatAllocTempFloat(cUnit);
 
       loadConstant(cUnit, rlResult.lowReg, 0x7fffffff);
       newLIR2(cUnit, kX86Cvtsi2ssRR, tempReg, rlResult.lowReg);
       newLIR2(cUnit, kX86ComissRR, srcReg, tempReg);
-      branch = newLIR2(cUnit, kX86Jcc8, 0, kX86CondA);
-      newLIR2(cUnit, kX86Cvtss2siRR, rlResult.lowReg, srcReg);
-      branch->target = newLIR0(cUnit, kPseudoTargetLabel);
+      LIR* branchPosOverflow = newLIR2(cUnit, kX86Jcc8, 0, kX86CondA);
+      LIR* branchNaN = newLIR2(cUnit, kX86Jcc8, 0, kX86CondP);
+      newLIR2(cUnit, kX86Cvttss2siRR, rlResult.lowReg, srcReg);
+      LIR* branchNormal = newLIR1(cUnit, kX86Jmp8, 0);
+      branchNaN->target = newLIR0(cUnit, kPseudoTargetLabel);
+      newLIR2(cUnit, kX86Xor32RR, rlResult.lowReg, rlResult.lowReg);
+      branchPosOverflow->target = newLIR0(cUnit, kPseudoTargetLabel);
+      branchNormal->target = newLIR0(cUnit, kPseudoTargetLabel);
       storeValue(cUnit, rlDest, rlResult);
       return false;
-    case Instruction::DOUBLE_TO_INT:
+    }
+    case Instruction::DOUBLE_TO_INT: {
       rlSrc = oatGetSrcWide(cUnit, mir, 0, 1);
       rlSrc = loadValueWide(cUnit, rlSrc, kFPReg);
       srcReg = rlSrc.lowReg;
       rlDest = oatGetDest(cUnit, mir, 0);
       oatClobberSReg(cUnit, rlDest.sRegLow);
       rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
-      tempReg = oatAllocTempDouble(cUnit);
+      int tempReg = oatAllocTempDouble(cUnit);
 
       loadConstant(cUnit, rlResult.lowReg, 0x7fffffff);
       newLIR2(cUnit, kX86Cvtsi2sdRR, tempReg, rlResult.lowReg);
       newLIR2(cUnit, kX86ComisdRR, srcReg, tempReg);
-      branch = newLIR2(cUnit, kX86Jcc8, 0, kX86CondA);
-      newLIR2(cUnit, kX86Cvtsd2siRR, rlResult.lowReg, srcReg);
-      branch->target = newLIR0(cUnit, kPseudoTargetLabel);
+      LIR* branchPosOverflow = newLIR2(cUnit, kX86Jcc8, 0, kX86CondA);
+      LIR* branchNaN = newLIR2(cUnit, kX86Jcc8, 0, kX86CondP);
+      newLIR2(cUnit, kX86Cvttsd2siRR, rlResult.lowReg, srcReg);
+      LIR* branchNormal = newLIR1(cUnit, kX86Jmp8, 0);
+      branchNaN->target = newLIR0(cUnit, kPseudoTargetLabel);
+      newLIR2(cUnit, kX86Xor32RR, rlResult.lowReg, rlResult.lowReg);
+      branchPosOverflow->target = newLIR0(cUnit, kPseudoTargetLabel);
+      branchNormal->target = newLIR0(cUnit, kPseudoTargetLabel);
       storeValue(cUnit, rlDest, rlResult);
       return false;
+    }
     case Instruction::LONG_TO_DOUBLE:
     case Instruction::LONG_TO_FLOAT:
       // These can be implemented inline by using memory as a 64-bit source.
diff --git a/src/compiler/codegen/x86/X86/Factory.cc b/src/compiler/codegen/x86/X86/Factory.cc
index 3698d2d..6f11709 100644
--- a/src/compiler/codegen/x86/X86/Factory.cc
+++ b/src/compiler/codegen/x86/X86/Factory.cc
@@ -360,64 +360,6 @@
     return res;
 }
 
-/* Load value from base + scaled index. */
-LIR *loadBaseIndexed(CompilationUnit *cUnit, int rBase,
-                               int rIndex, int rDest, int scale, OpSize size)
-{
-  UNIMPLEMENTED(WARNING) << "loadBaseIndexed";
-  newLIR0(cUnit, kX86Bkpt);
-  return NULL;
-#if 0
-  LIR *first = NULL;
-  LIR *res;
-  X86OpCode opcode = kX86Nop;
-  int tReg = oatAllocTemp(cUnit);
-
-  if (FPREG(rDest)) {
-    DCHECK(SINGLEREG(rDest));
-    DCHECK((size == kWord) || (size == kSingle));
-    size = kSingle;
-  } else {
-    if (size == kSingle)
-      size = kWord;
-  }
-
-  if (!scale) {
-    first = newLIR3(cUnit, kX86Addu, tReg , rBase, rIndex);
-  } else {
-    first = opRegRegImm(cUnit, kOpLsl, tReg, rIndex, scale);
-    newLIR3(cUnit, kX86Addu, tReg , rBase, tReg);
-  }
-
-  switch (size) {
-    case kSingle:
-      opcode = kX86Flwc1;
-      break;
-    case kWord:
-      opcode = kX86Lw;
-      break;
-    case kUnsignedHalf:
-      opcode = kX86Lhu;
-      break;
-    case kSignedHalf:
-      opcode = kX86Lh;
-      break;
-    case kUnsignedByte:
-      opcode = kX86Lbu;
-      break;
-    case kSignedByte:
-      opcode = kX86Lb;
-      break;
-    default:
-      LOG(FATAL) << "Bad case in loadBaseIndexed";
-  }
-
-  res = newLIR3(cUnit, opcode, rDest, 0, tReg);
-  oatFreeTemp(cUnit, tReg);
-  return (first) ? first : res;
-#endif
-}
-
 LIR *loadMultiple(CompilationUnit *cUnit, int rBase, int rMask)
 {
   UNIMPLEMENTED(WARNING) << "loadMultiple";
@@ -560,6 +502,13 @@
   return load;
 }
 
+/* Load value from base + scaled index. */
+LIR *loadBaseIndexed(CompilationUnit *cUnit, int rBase,
+                     int rIndex, int rDest, int scale, OpSize size) {
+  return loadBaseIndexedDisp(cUnit, NULL, rBase, rIndex, scale, 0,
+                             rDest, INVALID_REG, size, INVALID_SREG);
+}
+
 LIR *loadBaseDisp(CompilationUnit *cUnit, MIR *mir,
                   int rBase, int displacement,
                   int rDest,
diff --git a/src/compiler/codegen/x86/X86LIR.h b/src/compiler/codegen/x86/X86LIR.h
index 36e459c..3ec1112 100644
--- a/src/compiler/codegen/x86/X86LIR.h
+++ b/src/compiler/codegen/x86/X86LIR.h
@@ -427,8 +427,8 @@
   Binary0fOpCode(kX86Addss),    // float add
   Binary0fOpCode(kX86Mulsd),    // double multiply
   Binary0fOpCode(kX86Mulss),    // float multiply
-  Binary0fOpCode(kX86Cvtss2sd), // float to double
   Binary0fOpCode(kX86Cvtsd2ss), // double to float
+  Binary0fOpCode(kX86Cvtss2sd), // float to double
   Binary0fOpCode(kX86Subsd),    // double subtract
   Binary0fOpCode(kX86Subss),    // float subtract
   Binary0fOpCode(kX86Divsd),    // double divide
diff --git a/src/compiler_llvm/compilation_unit.cc b/src/compiler_llvm/compilation_unit.cc
index 598a8b3..277eae6 100644
--- a/src/compiler_llvm/compilation_unit.cc
+++ b/src/compiler_llvm/compilation_unit.cc
@@ -206,98 +206,17 @@
 bool CompilationUnit::Materialize(size_t thread_count) {
   MutexLock GUARD(cunit_lock_);
 
-  if (thread_count == 1) {
-    llvm::raw_string_ostream str_os(elf_image_);
-    bool success = MaterializeToFile(str_os);
-    LOG(INFO) << "Compilation Unit: " << elf_idx_ << (success ? " (done)" : " (failed)");
+  // Materialize the bitcode to elf_image_
+  llvm::raw_string_ostream str_os(elf_image_);
+  bool success = MaterializeToFile(str_os);
+  LOG(INFO) << "Compilation Unit: " << elf_idx_ << (success ? " (done)" : " (failed)");
 
-    // Free the resources
-    context_.reset(NULL);
-    irb_.reset(NULL);
-    module_ = NULL;
+  // Free the resources
+  context_.reset(NULL);
+  irb_.reset(NULL);
+  module_ = NULL;
 
-    return success;
-  }
-
-  // Prepare the pipe between parent process and child process
-  int pipe_fd[2];
-  if (pipe(pipe_fd) == -1) {
-    PLOG(FATAL) << "Failed to create pipe for CompilerWorker";
-    return false;
-  }
-
-  // Fork a process to do the compilation
-  pid_t pid = fork();
-  if (pid < 0) {
-    close(pipe_fd[0]);
-    close(pipe_fd[1]);
-    PLOG(FATAL) << "Failed to fork a process to do the compilation";
-    return false;
-
-  } else if (pid == 0) { // Child process
-    // Close the unused pipe read end
-    close(pipe_fd[0]);
-
-    // Change process groups, so we don't get ripped by ProcessManager
-    setpgid(0, 0);
-
-    llvm::raw_fd_ostream fd_os(pipe_fd[1], /* shouldClose */true);
-
-    // TODO: Should use exec* family instead of invoking a function.
-    // Forward our compilation request to bcc.
-    exit(static_cast<int>(!MaterializeToFile(fd_os)));
-
-  } else { // Parent process
-    // Close the unused pipe write end
-    close(pipe_fd[1]);
-
-    // Free the resources
-    context_.reset(NULL);
-    irb_.reset(NULL);
-    module_ = NULL;
-
-    // Read the result out from the pipe read end (until failure)
-    const size_t buf_size = 1024;
-    std::vector<uint8_t> buf(buf_size);
-    while (true) {
-      // Read from the pipe
-      ssize_t nread = read(pipe_fd[0], &*buf.begin(), buf_size);
-      if (nread < 0) {
-        if (errno == EAGAIN || errno == EINTR) {
-          continue;
-        } else {
-          LOG(ERROR) << "Unexpected error during IPC: " << strerror(errno);
-        }
-      }
-
-      // Append to the end of the elf_image_
-      elf_image_.append(buf.begin(), buf.begin() + nread);
-
-      if (nread < static_cast<ssize_t>(buf_size)) { // EOF reached!
-        break;
-      }
-    }
-
-    close(pipe_fd[0]);
-
-    // Wait for child to finish
-    int status;
-    pid_t got_pid = TEMP_FAILURE_RETRY(waitpid(pid, &status, 0));
-    if (got_pid != pid) {
-      PLOG(ERROR) << "waitpid failed: wanted " << pid << ", got " << got_pid;
-      elf_image_.clear();
-      return false;
-    }
-
-    if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
-      LOG(ERROR) << "Failed to compile the bitcode: " << WEXITSTATUS(status);
-      elf_image_.clear();
-      return false;
-    }
-
-    LOG(INFO) << "Compilation Unit: " << elf_idx_ << " (done)";
-    return true;
-  }
+  return success;
 }
 
 
@@ -369,7 +288,7 @@
   target_options.NoFramePointerElim = true;
   target_options.NoFramePointerElimNonLeaf = true;
   target_options.UseSoftFloat = false;
-  target_options.EnableFastISel = true;
+  target_options.EnableFastISel = false;
 
   // Create the llvm::TargetMachine
   llvm::OwningPtr<llvm::TargetMachine> target_machine(
diff --git a/src/compiler_llvm/ir_builder.h b/src/compiler_llvm/ir_builder.h
index 24a594d..e1a47c4 100644
--- a/src/compiler_llvm/ir_builder.h
+++ b/src/compiler_llvm/ir_builder.h
@@ -105,6 +105,21 @@
     StoreToObjectOffset(object_addr, offset, new_value, tbaa_.GetSpecialType(special_ty));
   }
 
+  llvm::LoadInst* LoadFromObjectOffset(llvm::Value* object_addr,
+                                       int64_t offset,
+                                       llvm::Type* type,
+                                       TBAASpecialType special_ty, JType j_ty) {
+    return LoadFromObjectOffset(object_addr, offset, type, tbaa_.GetMemoryJType(special_ty, j_ty));
+  }
+
+  void StoreToObjectOffset(llvm::Value* object_addr,
+                           int64_t offset,
+                           llvm::Value* new_value,
+                           TBAASpecialType special_ty, JType j_ty) {
+    DCHECK_NE(special_ty, kTBAAConstJObject) << "ConstJObject is read only!";
+    StoreToObjectOffset(object_addr, offset, new_value, tbaa_.GetMemoryJType(special_ty, j_ty));
+  }
+
   void SetTBAACall(llvm::CallInst* call_inst, TBAASpecialType special_ty) {
     call_inst->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_.GetSpecialType(special_ty));
   }
diff --git a/src/compiler_llvm/method_compiler.cc b/src/compiler_llvm/method_compiler.cc
index 4070791..c29e458 100644
--- a/src/compiler_llvm/method_compiler.cc
+++ b/src/compiler_llvm/method_compiler.cc
@@ -66,7 +66,7 @@
     reg_to_shadow_frame_index_(code_item_->registers_size_, -1),
     retval_reg_(NULL),
     basic_block_stack_overflow_(NULL),
-    basic_block_reg_alloca_(NULL), basic_block_shadow_frame_alloca_(NULL),
+    basic_block_alloca_(NULL), basic_block_shadow_frame_(NULL),
     basic_block_reg_arg_init_(NULL),
     basic_blocks_(code_item_->insns_size_in_code_units_),
     basic_block_landing_pads_(code_item_->tries_size_, NULL),
@@ -150,13 +150,13 @@
   llvm::BasicBlock* entry =
     llvm::BasicBlock::Create(*context_, PrettyMethod(method_idx_, *dex_file_), func_);
 #endif
-  basic_block_reg_alloca_ =
+  basic_block_alloca_ =
     llvm::BasicBlock::Create(*context_, "prologue.alloca", func_);
 
   basic_block_stack_overflow_ =
     llvm::BasicBlock::Create(*context_, "prologue.stack_overflow_check", func_);
 
-  basic_block_shadow_frame_alloca_ =
+  basic_block_shadow_frame_ =
     llvm::BasicBlock::Create(*context_, "prologue.shadowframe", func_);
 
   basic_block_reg_arg_init_ =
@@ -164,9 +164,12 @@
 
 #if !defined(NDEBUG)
   irb_.SetInsertPoint(entry);
-  irb_.CreateBr(basic_block_reg_alloca_);
+  irb_.CreateBr(basic_block_alloca_);
 #endif
 
+  irb_.SetInsertPoint(basic_block_alloca_);
+  jvalue_temp_ = irb_.CreateAlloca(irb_.getJValueTy());
+
   // Create register array
   for (uint16_t r = 0; r < code_item_->registers_size_; ++r) {
     regs_[r] = DalvikReg::CreateLocalVarReg(*this, r);
@@ -239,7 +242,7 @@
 
 
 void MethodCompiler::EmitPrologueLastBranch() {
-  irb_.SetInsertPoint(basic_block_reg_alloca_);
+  irb_.SetInsertPoint(basic_block_alloca_);
   irb_.CreateBr(basic_block_stack_overflow_);
 
   // If a method will not call to other method, and the method is small, we can avoid stack overflow
@@ -251,32 +254,39 @@
   }
 
   irb_.SetInsertPoint(basic_block_stack_overflow_);
-  irb_.CreateBr(basic_block_shadow_frame_alloca_);
+  irb_.CreateBr(basic_block_shadow_frame_);
 
-  irb_.SetInsertPoint(basic_block_shadow_frame_alloca_);
+  irb_.SetInsertPoint(basic_block_shadow_frame_);
   irb_.CreateBr(basic_block_reg_arg_init_);
 }
 
 
 void MethodCompiler::EmitPrologueAllocShadowFrame() {
-  irb_.SetInsertPoint(basic_block_shadow_frame_alloca_);
+  irb_.SetInsertPoint(basic_block_alloca_);
 
   // Allocate the shadow frame now!
   uint32_t sirt_size = 0;
-  for (uint32_t i = 0, num_of_regs = code_item_->registers_size_; i < num_of_regs; ++i) {
-    if (IsRegCanBeObject(i)) {
-      reg_to_shadow_frame_index_[i] = sirt_size++;
+  if (method_info_.need_shadow_frame_entry) {
+    for (uint32_t i = 0, num_of_regs = code_item_->registers_size_; i < num_of_regs; ++i) {
+      if (IsRegCanBeObject(i)) {
+        reg_to_shadow_frame_index_[i] = sirt_size++;
+      }
     }
   }
 
   llvm::StructType* shadow_frame_type = irb_.getShadowFrameTy(sirt_size);
   shadow_frame_ = irb_.CreateAlloca(shadow_frame_type);
 
-  // Zero-initialization of the shadow frame
-  llvm::ConstantAggregateZero* zero_initializer =
-    llvm::ConstantAggregateZero::get(shadow_frame_type);
+  irb_.SetInsertPoint(basic_block_shadow_frame_);
 
-  irb_.CreateStore(zero_initializer, shadow_frame_, kTBAAShadowFrame);
+  // Zero-initialization of the shadow frame table
+  llvm::Value* shadow_frame_table = irb_.CreateConstGEP2_32(shadow_frame_, 0, 1);
+  llvm::Type* table_type = shadow_frame_type->getElementType(1);
+
+  llvm::ConstantAggregateZero* zero_initializer =
+    llvm::ConstantAggregateZero::get(table_type);
+
+  irb_.CreateStore(zero_initializer, shadow_frame_table, kTBAAShadowFrame);
 
   // Get method object
   llvm::Value* method_object_addr = EmitLoadMethodObjectAddr();
@@ -1832,11 +1842,14 @@
     EmitAllocNewArray(dex_pc, dec_insn.vA, dec_insn.vB, true);
 
   if (dec_insn.vA > 0) {
-    // Resolve the element type
-    Class* klass = dex_cache_->GetResolvedType(dec_insn.vB)->GetComponentType();
-    // TODO: Avoid the usage of the dex_cache_.  Try to figure out a better
-    // way to distinguish [I and [L.
-    CHECK_NE(klass, static_cast<Class*>(NULL));
+    // Check for the element type
+    uint32_t type_desc_len = 0;
+    const char* type_desc =
+      dex_file_->StringByTypeIdx(dec_insn.vB, &type_desc_len);
+
+    DCHECK_GE(type_desc_len, 2u); // should be guaranteed by verifier
+    DCHECK_EQ(type_desc[0], '['); // should be guaranteed by verifier
+    bool is_elem_int_ty = (type_desc[1] == 'I');
 
     uint32_t alignment;
     llvm::Constant* elem_size;
@@ -1845,12 +1858,11 @@
     // NOTE: Currently filled-new-array only supports 'L', '[', and 'I'
     // as the element, thus we are only checking 2 cases: primitive int and
     // non-primitive type.
-    if (klass->IsPrimitiveInt()) {
+    if (is_elem_int_ty) {
       alignment = sizeof(int32_t);
       elem_size = irb_.getPtrEquivInt(sizeof(int32_t));
       field_type = irb_.getJIntTy()->getPointerTo();
     } else {
-      CHECK(!klass->IsPrimitive());
       alignment = irb_.getSizeOfPtrEquivInt();
       elem_size = irb_.getSizeOfPtrEquivIntValue();
       field_type = irb_.getJObjectTy()->getPointerTo();
@@ -1875,7 +1887,7 @@
       }
 
       llvm::Value* reg_value;
-      if (klass->IsPrimitiveInt()) {
+      if (is_elem_int_ty) {
         reg_value = EmitLoadDalvikReg(reg_index, kInt, kAccurate);
       } else {
         reg_value = EmitLoadDalvikReg(reg_index, kObject, kAccurate);
@@ -2255,7 +2267,6 @@
 // Emit Array GetElementPtr
 llvm::Value* MethodCompiler::EmitArrayGEP(llvm::Value* array_addr,
                                           llvm::Value* index_value,
-                                          llvm::Type* elem_type,
                                           JType elem_jty) {
 
   int data_offset;
@@ -2269,6 +2280,8 @@
   llvm::Constant* data_offset_value =
     irb_.getPtrEquivInt(data_offset);
 
+  llvm::Type* elem_type = irb_.getJType(elem_jty, kArray);
+
   llvm::Value* array_data_addr =
     irb_.CreatePtrDisp(array_addr, data_offset_value,
                        elem_type->getPointerTo());
@@ -2288,10 +2301,7 @@
 
   EmitGuard_ArrayException(dex_pc, array_addr, index_value);
 
-  llvm::Type* elem_type = irb_.getJType(elem_jty, kArray);
-
-  llvm::Value* array_elem_addr =
-    EmitArrayGEP(array_addr, index_value, elem_type, elem_jty);
+  llvm::Value* array_elem_addr = EmitArrayGEP(array_addr, index_value, elem_jty);
 
   llvm::Value* array_elem_value = irb_.CreateLoad(array_elem_addr, kTBAAHeapArray, elem_jty);
 
@@ -2312,10 +2322,7 @@
 
   EmitGuard_ArrayException(dex_pc, array_addr, index_value);
 
-  llvm::Type* elem_type = irb_.getJType(elem_jty, kArray);
-
-  llvm::Value* array_elem_addr =
-    EmitArrayGEP(array_addr, index_value, elem_type, elem_jty);
+  llvm::Value* array_elem_addr = EmitArrayGEP(array_addr, index_value, elem_jty);
 
   llvm::Value* new_value = EmitLoadDalvikReg(dec_insn.vA, elem_jty, kArray);
 
@@ -2812,12 +2819,6 @@
     }
   }
 
-  llvm::Value* code_addr =
-    irb_.LoadFromObjectOffset(callee_method_object_addr,
-                              Method::GetCodeOffset().Int32Value(),
-                              GetFunctionType(callee_method_idx, is_static)->getPointerTo(),
-                              kTBAAJRuntime);
-
   // Load the actual parameter
   std::vector<llvm::Value*> args;
 
@@ -2831,6 +2832,21 @@
   EmitLoadActualParameters(args, callee_method_idx, dec_insn,
                            arg_fmt, is_static);
 
+  if (is_fast_path && (invoke_type == kDirect || invoke_type == kStatic)) {
+    bool need_retry = EmitInlineJavaIntrinsic(PrettyMethod(callee_method_idx, *dex_file_),
+                                              args,
+                                              GetNextBasicBlock(dex_pc));
+    if (!need_retry) {
+      return;
+    }
+  }
+
+  llvm::Value* code_addr =
+    irb_.LoadFromObjectOffset(callee_method_object_addr,
+                              Method::GetCodeOffset().Int32Value(),
+                              GetFunctionType(callee_method_idx, is_static)->getPointerTo(),
+                              kTBAAJRuntime);
+
 #if 0
   // Invoke callee
   EmitUpdateDexPC(dex_pc);
@@ -2902,17 +2918,15 @@
       irb_.SetInsertPoint(block_proxy_stub);
     }
     { // proxy stub
-      llvm::Value* temp_space_addr;
       if (ret_shorty != 'V') {
-        temp_space_addr = irb_.CreateAlloca(irb_.getJValueTy());
-        args.push_back(temp_space_addr);
+        args.push_back(jvalue_temp_);
       }
       // TODO: Remove this after we solve the proxy trampoline calling convention problem.
       irb_.CreateCall(irb_.GetRuntime(ProxyInvokeHandler), args);
       if (ret_shorty != 'V') {
         llvm::Type* accurate_ret_type = irb_.getJType(ret_shorty, kAccurate);
         llvm::Value* result_addr =
-            irb_.CreateBitCast(temp_space_addr, accurate_ret_type->getPointerTo());
+            irb_.CreateBitCast(jvalue_temp_, accurate_ret_type->getPointerTo());
         llvm::Value* retval = irb_.CreateLoad(result_addr, kTBAAStackTemp);
         EmitStoreDalvikRetValReg(ret_shorty, kAccurate, retval);
       }
@@ -2961,7 +2975,7 @@
     irb_.getPtrEquivInt(static_cast<uint64_t>(vtable_idx));
 
   llvm::Value* method_field_addr =
-    EmitArrayGEP(vtable_addr, vtable_idx_value, irb_.getJObjectTy(), kObject);
+    EmitArrayGEP(vtable_addr, vtable_idx_value, kObject);
 
   return irb_.CreateLoad(method_field_addr, kTBAAConstJObject);
 }
@@ -3575,8 +3589,7 @@
 
   llvm::Value* type_idx_value = irb_.getPtrEquivInt(type_idx);
 
-  return EmitArrayGEP(static_storage_dex_cache_addr, type_idx_value,
-                      irb_.getJObjectTy(), kObject);
+  return EmitArrayGEP(static_storage_dex_cache_addr, type_idx_value, kObject);
 }
 
 
@@ -3587,8 +3600,7 @@
 
   llvm::Value* type_idx_value = irb_.getPtrEquivInt(type_idx);
 
-  return EmitArrayGEP(resolved_type_dex_cache_addr, type_idx_value,
-                      irb_.getJObjectTy(), kObject);
+  return EmitArrayGEP(resolved_type_dex_cache_addr, type_idx_value, kObject);
 }
 
 
@@ -3599,8 +3611,7 @@
 
   llvm::Value* method_idx_value = irb_.getPtrEquivInt(method_idx);
 
-  return EmitArrayGEP(resolved_method_dex_cache_addr, method_idx_value,
-                      irb_.getJObjectTy(), kObject);
+  return EmitArrayGEP(resolved_method_dex_cache_addr, method_idx_value, kObject);
 }
 
 
@@ -3611,8 +3622,7 @@
 
   llvm::Value* string_idx_value = irb_.getPtrEquivInt(string_idx);
 
-  return EmitArrayGEP(string_dex_cache_addr, string_idx_value,
-                      irb_.getJObjectTy(), kObject);
+  return EmitArrayGEP(string_dex_cache_addr, string_idx_value, kObject);
 }
 
 
@@ -3865,7 +3875,7 @@
 
   // Save current IR builder insert point
   llvm::IRBuilderBase::InsertPoint irb_ip_original = irb_.saveIP();
-  irb_.SetInsertPoint(basic_block_reg_alloca_);
+  irb_.SetInsertPoint(basic_block_alloca_);
 
   // Alloca
   llvm::Value* reg_addr = irb_.CreateAlloca(reg_type, 0, reg_name);
@@ -3897,7 +3907,7 @@
   // Save current IR builder insert point
   llvm::IRBuilderBase::InsertPoint irb_ip_original = irb_.saveIP();
 
-  irb_.SetInsertPoint(basic_block_shadow_frame_alloca_);
+  irb_.SetInsertPoint(basic_block_shadow_frame_);
 
   llvm::Value* gep_index[] = {
     irb_.getInt32(0), // No pointer displacement
@@ -3926,7 +3936,7 @@
 
   // Save current IR builder insert point
   llvm::IRBuilderBase::InsertPoint irb_ip_original = irb_.saveIP();
-  irb_.SetInsertPoint(basic_block_reg_alloca_);
+  irb_.SetInsertPoint(basic_block_alloca_);
 
   // Alloca
   llvm::Value* reg_addr = irb_.CreateAlloca(reg_type, 0, reg_name);
@@ -3959,6 +3969,79 @@
 
 
 // TODO: Use high-level IR to do this
+bool MethodCompiler::EmitInlineJavaIntrinsic(const std::string& callee_method_name,
+                                             const std::vector<llvm::Value*>& args,
+                                             llvm::BasicBlock* after_invoke) {
+  if (callee_method_name == "char java.lang.String.charAt(int)") {
+    return EmitInlinedStringCharAt(args, after_invoke);
+  }
+  if (callee_method_name == "int java.lang.String.length()") {
+    return EmitInlinedStringLength(args, after_invoke);
+  }
+  return true;
+}
+
+bool MethodCompiler::EmitInlinedStringCharAt(const std::vector<llvm::Value*>& args,
+                                             llvm::BasicBlock* after_invoke) {
+  DCHECK_EQ(args.size(), 3U) <<
+      "char java.lang.String.charAt(int) has 3 args: method, this, char_index";
+  llvm::Value* this_object = args[1];
+  llvm::Value* char_index = args[2];
+  llvm::BasicBlock* block_retry = llvm::BasicBlock::Create(*context_, "CharAtRetry", func_);
+  llvm::BasicBlock* block_cont = llvm::BasicBlock::Create(*context_, "CharAtCont", func_);
+
+  // TODO: Can we safely say the String.count is ConstJObject(constant memory)? (there are so many
+  // iput to String.count in the String.<init>(...))
+  llvm::Value* string_count = irb_.LoadFromObjectOffset(this_object,
+                                                        String::CountOffset().Int32Value(),
+                                                        irb_.getJIntTy(),
+                                                        kTBAAHeapInstance, kInt);
+  // Two's complement, so we can use only one "less than" to check "in bounds"
+  llvm::Value* in_bounds = irb_.CreateICmpULT(char_index, string_count);
+  irb_.CreateCondBr(in_bounds, block_cont, block_retry, kLikely);
+
+  irb_.SetInsertPoint(block_cont);
+  // TODO: Can we safely say the String.offset is ConstJObject(constant memory)?
+  llvm::Value* string_offset = irb_.LoadFromObjectOffset(this_object,
+                                                         String::OffsetOffset().Int32Value(),
+                                                         irb_.getJIntTy(),
+                                                         kTBAAHeapInstance, kInt);
+  llvm::Value* string_value = irb_.LoadFromObjectOffset(this_object,
+                                                        String::ValueOffset().Int32Value(),
+                                                        irb_.getJObjectTy(),
+                                                        kTBAAHeapInstance, kObject);
+
+  // index_value = string.offset + char_index
+  llvm::Value* index_value = irb_.CreateAdd(string_offset, char_index);
+
+  // array_elem_value = string.value[index_value]
+  llvm::Value* array_elem_addr = EmitArrayGEP(string_value, index_value, kChar);
+  llvm::Value* array_elem_value = irb_.CreateLoad(array_elem_addr, kTBAAHeapArray, kChar);
+
+  EmitStoreDalvikRetValReg(kChar, kArray, array_elem_value);
+  irb_.CreateBr(after_invoke);
+
+  irb_.SetInsertPoint(block_retry);
+  return true;
+}
+
+bool MethodCompiler::EmitInlinedStringLength(const std::vector<llvm::Value*>& args,
+                                             llvm::BasicBlock* after_invoke) {
+  DCHECK_EQ(args.size(), 2U) <<
+      "int java.lang.String.length() has 2 args: method, this";
+  llvm::Value* this_object = args[1];
+  // TODO: Can we safely say the String.count is ConstJObject(constant memory)?
+  llvm::Value* string_count = irb_.LoadFromObjectOffset(this_object,
+                                                        String::CountOffset().Int32Value(),
+                                                        irb_.getJIntTy(),
+                                                        kTBAAHeapInstance, kInt);
+  EmitStoreDalvikRetValReg(kInt, kAccurate, string_count);
+  irb_.CreateBr(after_invoke);
+  return false;
+}
+
+
+// TODO: Use high-level IR to do this
 void MethodCompiler::ComputeMethodInfo() {
   // If this method is static, we set the "this" register index to -1. So we don't worry about this
   // method is static or not in the following comparison.
diff --git a/src/compiler_llvm/method_compiler.h b/src/compiler_llvm/method_compiler.h
index 598a337..50411a1 100644
--- a/src/compiler_llvm/method_compiler.h
+++ b/src/compiler_llvm/method_compiler.h
@@ -344,7 +344,6 @@
 
   llvm::Value* EmitArrayGEP(llvm::Value* array_addr,
                             llvm::Value* index_value,
-                            llvm::Type* elem_type,
                             JType elem_jty);
 
   llvm::Value* EmitLoadConstantClass(uint32_t dex_pc, uint32_t type_idx);
@@ -435,6 +434,15 @@
   }
 
   // TODO: Use high-level IR to do this
+  bool EmitInlineJavaIntrinsic(const std::string& callee_method_name,
+                               const std::vector<llvm::Value*>& args,
+                               llvm::BasicBlock* after_invoke);
+
+  bool EmitInlinedStringCharAt(const std::vector<llvm::Value*>& args,
+                               llvm::BasicBlock* after_invoke);
+
+  bool EmitInlinedStringLength(const std::vector<llvm::Value*>& args,
+                               llvm::BasicBlock* after_invoke);
 
   struct MethodInfo {
     int64_t this_reg_idx;
@@ -473,8 +481,8 @@
   UniquePtr<DalvikReg> retval_reg_;
 
   llvm::BasicBlock* basic_block_stack_overflow_;
-  llvm::BasicBlock* basic_block_reg_alloca_;
-  llvm::BasicBlock* basic_block_shadow_frame_alloca_;
+  llvm::BasicBlock* basic_block_alloca_;
+  llvm::BasicBlock* basic_block_shadow_frame_;
   llvm::BasicBlock* basic_block_reg_arg_init_;
   std::vector<llvm::BasicBlock*> basic_blocks_;
 
@@ -483,6 +491,7 @@
   llvm::BasicBlock* basic_block_unreachable_;
 
   llvm::AllocaInst* shadow_frame_;
+  llvm::AllocaInst* jvalue_temp_;
 
   uint16_t elf_func_idx_;
 };
diff --git a/src/dex2oat.cc b/src/dex2oat.cc
index 42a3266..0d3fa58 100644
--- a/src/dex2oat.cc
+++ b/src/dex2oat.cc
@@ -561,12 +561,6 @@
   thread_count = 1;
 #endif
 
-#if defined(ART_USE_LLVM_COMPILER) && defined(ART_TARGET)
-  // To avoid high memory usage, always run dex2oat in single thread mode when
-  // we are using LLVM-based compiler.
-  thread_count = 1;
-#endif
-
   if (oat_filename.empty() && oat_fd == -1) {
     Usage("Output must be supplied with either --oat-file or --oat-fd");
   }
diff --git a/src/oat/runtime/x86/oat_support_entrypoints_x86.cc b/src/oat/runtime/x86/oat_support_entrypoints_x86.cc
index 605024e..a28a898 100644
--- a/src/oat/runtime/x86/oat_support_entrypoints_x86.cc
+++ b/src/oat/runtime/x86/oat_support_entrypoints_x86.cc
@@ -67,6 +67,8 @@
 extern "C" void art_unlock_object_from_code(void*);
 
 // Math entrypoints.
+extern "C" double art_fmod_from_code(double, double);
+extern "C" float art_fmodf_from_code(float, float);
 extern "C" double art_l2d_from_code(int64_t);
 extern "C" float art_l2f_from_code(int64_t);
 extern "C" int64_t art_d2l_from_code(double);
@@ -74,7 +76,7 @@
 extern "C" int32_t art_idivmod_from_code(int32_t, int32_t);
 extern "C" int64_t art_ldiv_from_code(int64_t, int64_t);
 extern "C" int64_t art_ldivmod_from_code(int64_t, int64_t);
-extern "C" int64_t art_lmul_from_code(int64_t a, int64_t b);
+extern "C" int64_t art_lmul_from_code(int64_t, int64_t);
 extern "C" uint64_t art_lshl_from_code(uint64_t, uint32_t);
 extern "C" uint64_t art_lshr_from_code(uint64_t, uint32_t);
 extern "C" uint64_t art_lushr_from_code(uint64_t, uint32_t);
@@ -168,13 +170,13 @@
   //points->pDmul = NULL; // Not needed on x86.
   //points->pDsub = NULL; // Not needed on x86.
   //points->pF2d = NULL;
-  //points->pFmod = NULL;
+  points->pFmod = art_fmod_from_code;
   //points->pI2d = NULL;
   points->pL2d = art_l2d_from_code;
   //points->pD2f = NULL;
   //points->pFadd = NULL; // Not needed on x86.
   //points->pFdiv = NULL; // Not needed on x86.
-  //points->pFmodf = NULL;
+  points->pFmodf = art_fmodf_from_code;
   //points->pFmul = NULL; // Not needed on x86.
   //points->pFsub = NULL; // Not needed on x86.
   //points->pI2f = NULL;
diff --git a/src/oat/runtime/x86/runtime_support_x86.S b/src/oat/runtime/x86/runtime_support_x86.S
index 028d7ec..74ae8fcc 100644
--- a/src/oat/runtime/x86/runtime_support_x86.S
+++ b/src/oat/runtime/x86/runtime_support_x86.S
@@ -93,7 +93,7 @@
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kRefsAndArgs)
      */
-MACRO0(SETUP_REF_AND_ARG_CALLEE_SAVE_FRAME)
+MACRO0(SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME)
     pushl %edi  // Save callee saves
     pushl %esi
     pushl %ebp
@@ -103,7 +103,7 @@
     pushl %eax  // Align stack, eax will be clobbered by Method*
 END_MACRO
 
-MACRO0(RESTORE_REF_AND_ARG_CALLEE_SAVE_FRAME)
+MACRO0(RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME)
     addl MACRO_LITERAL(4), %esp  // Remove padding
     popl %ecx  // Restore args except eax
     popl %edx
@@ -124,8 +124,8 @@
     subl  MACRO_LITERAL(8), %esp             // Alignment padding
     pushl %ecx                               // pass SP
     pushl %fs:THREAD_SELF_OFFSET             // pass Thread::Current()
-    call SYMBOL(artDeliverPendingExceptionFromCode)  // artDeliverExceptionFromCode(Thread*, SP)
-    int3
+    call SYMBOL(artDeliverPendingExceptionFromCode)  // artDeliverPendingExceptionFromCode(Thread*, SP)
+    int3                                     // unreached
 END_MACRO
 
 MACRO2(NO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
@@ -261,6 +261,7 @@
     // Tail call to intended method.
     ret
 1:
+    addl MACRO_LITERAL(4), %esp   // Pop code pointer off stack
     DELIVER_PENDING_EXCEPTION
 END_MACRO
 
@@ -395,12 +396,34 @@
 
 NO_ARG_DOWNCALL art_test_suspend, artTestSuspendFromCode, ret
 
+DEFINE_FUNCTION art_fmod_from_code
+    movl  %ebx, -4(%esp)          // put hi arg2 into memory
+    movl  %edx, -8(%esp)          // put lo arg2 into memory
+    fldl  -8(%esp)                // push arg2 onto fp stack
+    movl  %ecx, -4(%esp)          // put hi arg1 into memory
+    movl  %eax, -8(%esp)          // put lo arg1 into memory
+    fldl  -8(%esp)                // push arg1 onto fp stack
+    fprem1                        // calculate IEEE remainder
+    fstpl -8(%esp)                // pop return value off fp stack
+    movsd -8(%esp), %xmm0         // place into %xmm0
+    ret
+
+DEFINE_FUNCTION art_fmodf_from_code
+    movl  %ecx, -4(%esp)          // put arg2 into memory
+    fld   -4(%esp)                // push arg2 onto fp stack
+    movl  %eax, -4(%esp)          // put arg1 into memory
+    fld   -4(%esp)                // push arg1 onto fp stack
+    fprem1                        // calculate IEEE remainder
+    fstp  -4(%esp)                // pop return value off fp stack
+    movss -4(%esp), %xmm0         // place into %xmm0
+    ret
+
 DEFINE_FUNCTION art_l2d_from_code
     pushl %eax                    // alignment padding
     pushl %ecx                    // pass arg2
     pushl %eax                    // pass arg1
-    call SYMBOL(art_l2d) // (jlong a, Thread*, SP)
-    fstpl (%esp)                  // get return value
+    call SYMBOL(art_l2d)          // (jlong a, Thread*, SP)
+    fstpl (%esp)                  // pop return value off fp stack
     movsd (%esp), %xmm0           // place into %xmm0
     addl LITERAL(12), %esp        // pop arguments
     ret
@@ -409,8 +432,8 @@
     pushl %eax                    // alignment padding
     pushl %ecx                    // pass arg2
     pushl %eax                    // pass arg1
-    call SYMBOL(art_l2f) // (jlong a, Thread*, SP)
-    fstp  (%esp)                  // get return value
+    call SYMBOL(art_l2f)          // (jlong a, Thread*, SP)
+    fstp  (%esp)                  // pop return value off fp stack
     movss (%esp), %xmm0           // place into %xmm0
     addl LITERAL(12), %esp        // pop arguments
     ret
@@ -419,14 +442,14 @@
     pushl %eax                    // alignment padding
     pushl %ecx                    // pass arg2
     pushl %eax                    // pass arg1
-    call SYMBOL(art_d2l) // (jdouble a, Thread*, SP)
+    call SYMBOL(art_d2l)          // (jdouble a, Thread*, SP)
     addl LITERAL(12), %esp        // pop arguments
     ret
 
 DEFINE_FUNCTION art_f2l_from_code
     subl LITERAL(8), %esp         // alignment padding
     pushl %eax                    // pass arg1
-    call SYMBOL(art_f2l) // (jfloat a, Thread*, SP)
+    call SYMBOL(art_f2l)          // (jfloat a, Thread*, SP)
     addl LITERAL(12), %esp        // pop arguments
     ret
 
@@ -509,6 +532,183 @@
 1:
     ret
 
+DEFINE_FUNCTION art_set32_instance_from_code
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME       // save ref containing registers for GC
+    mov %esp, %ebx                // remember SP
+    subl LITERAL(8), %esp         // alignment padding
+    pushl %ebx                    // pass SP
+    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    mov 32(%ebx), %ebx            // get referrer
+    pushl %ebx                    // pass referrer
+    pushl %edx                    // pass new_val
+    pushl %ecx                    // pass object
+    pushl %eax                    // pass field_idx
+    call SYMBOL(artSet32InstanceFromCode)  // (field_idx, Object*, new_val, referrer, Thread*, SP)
+    addl LITERAL(32), %esp        // pop arguments
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME     // restore frame up to return address
+    RETURN_IF_EAX_ZERO            // return or deliver exception
+
+DEFINE_FUNCTION art_set64_instance_from_code
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME       // save ref containing registers for GC
+    subl LITERAL(8), %esp         // alignment padding
+    pushl %esp                    // pass SP-8
+    addl LITERAL(8), (%esp)       // fix SP on stack by adding 8
+    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    pushl %ebx                    // pass high half of new_val
+    pushl %edx                    // pass low half of new_val
+    pushl %ecx                    // pass object
+    pushl %eax                    // pass field_idx
+    call SYMBOL(artSet64InstanceFromCode)  // (field_idx, Object*, new_val, Thread*, SP)
+    addl LITERAL(32), %esp        // pop arguments
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME     // restore frame up to return address
+    RETURN_IF_EAX_ZERO            // return or deliver exception
+
+DEFINE_FUNCTION art_set_obj_instance_from_code
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME       // save ref containing registers for GC
+    mov %esp, %ebx                // remember SP
+    subl LITERAL(8), %esp         // alignment padding
+    pushl %ebx                    // pass SP
+    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    mov 32(%ebx), %ebx            // get referrer
+    pushl %ebx                    // pass referrer
+    pushl %edx                    // pass new_val
+    pushl %ecx                    // pass object
+    pushl %eax                    // pass field_idx
+    call SYMBOL(artSetObjInstanceFromCode) // (field_idx, Object*, new_val, referrer, Thread*, SP)
+    addl LITERAL(32), %esp        // pop arguments
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME     // restore frame up to return address
+    RETURN_IF_EAX_ZERO            // return or deliver exception
+
+DEFINE_FUNCTION art_get32_instance_from_code
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME       // save ref containing registers for GC
+    mov %esp, %ebx                // remember SP
+    mov 32(%esp), %edx            // get referrer
+    subl LITERAL(12), %esp        // alignment padding
+    pushl %ebx                    // pass SP
+    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    pushl %edx                    // pass referrer
+    pushl %ecx                    // pass object
+    pushl %eax                    // pass field_idx
+    call SYMBOL(artGet32InstanceFromCode)  // (field_idx, Object*, referrer, Thread*, SP)
+    addl LITERAL(32), %esp        // pop arguments
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME     // restore frame up to return address
+    RETURN_IF_EAX_ZERO            // return or deliver exception
+
+DEFINE_FUNCTION art_get64_instance_from_code
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME       // save ref containing registers for GC
+    mov %esp, %ebx                // remember SP
+    mov 32(%esp), %edx            // get referrer
+    subl LITERAL(12), %esp        // alignment padding
+    pushl %ebx                    // pass SP
+    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    pushl %edx                    // pass referrer
+    pushl %ecx                    // pass object
+    pushl %eax                    // pass field_idx
+    call SYMBOL(artGet64InstanceFromCode)  // (field_idx, Object*, referrer, Thread*, SP)
+    addl LITERAL(32), %esp        // pop arguments
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME     // restore frame up to return address
+    RETURN_IF_EAX_ZERO            // return or deliver exception
+
+DEFINE_FUNCTION art_get_obj_instance_from_code
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME       // save ref containing registers for GC
+    mov %esp, %ebx                // remember SP
+    mov 32(%esp), %edx            // get referrer
+    subl LITERAL(12), %esp        // alignment padding
+    pushl %ebx                    // pass SP
+    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    pushl %edx                    // pass referrer
+    pushl %ecx                    // pass object
+    pushl %eax                    // pass field_idx
+    call SYMBOL(artGetObjInstanceFromCode) // (field_idx, Object*, referrer, Thread*, SP)
+    addl LITERAL(32), %esp        // pop arguments
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME     // restore frame up to return address
+    RETURN_IF_EAX_ZERO            // return or deliver exception
+
+DEFINE_FUNCTION art_set32_static_from_code
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME       // save ref containing registers for GC
+    mov %esp, %ebx                // remember SP
+    mov 32(%esp), %edx            // get referrer
+    subl LITERAL(12), %esp        // alignment padding
+    pushl %ebx                    // pass SP
+    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    pushl %edx                    // pass referrer
+    pushl %ecx                    // pass new_val
+    pushl %eax                    // pass field_idx
+    call SYMBOL(artSet32StaticFromCode)    // (field_idx, new_val, referrer, Thread*, SP)
+    addl LITERAL(32), %esp        // pop arguments
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME     // restore frame up to return address
+    RETURN_IF_EAX_ZERO            // return or deliver exception
+
+DEFINE_FUNCTION art_set64_static_from_code
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME       // save ref containing registers for GC
+    mov %esp, %ebx                // remember SP
+    subl LITERAL(8), %esp         // alignment padding
+    pushl %ebx                    // pass SP
+    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    mov 32(%ebx), %ebx            // get referrer
+    pushl %edx                    // pass high half of new_val
+    pushl %ecx                    // pass low half of new_val
+    pushl %ebx                    // pass referrer
+    pushl %eax                    // pass field_idx
+    call SYMBOL(artSet64StaticFromCode)    // (field_idx, referrer, new_val, Thread*, SP)
+    addl LITERAL(32), %esp        // pop arguments
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME     // restore frame up to return address
+    RETURN_IF_EAX_ZERO            // return or deliver exception
+
+DEFINE_FUNCTION art_set_obj_static_from_code
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME       // save ref containing registers for GC
+    mov %esp, %ebx                // remember SP
+    mov 32(%esp), %edx            // get referrer
+    subl LITERAL(12), %esp        // alignment padding
+    pushl %ebx                    // pass SP
+    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    pushl %edx                    // pass referrer
+    pushl %ecx                    // pass new_val
+    pushl %eax                    // pass field_idx
+    call SYMBOL(artSetObjStaticFromCode)   // (field_idx, new_val, referrer, Thread*, SP)
+    addl LITERAL(32), %esp        // pop arguments
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME     // restore frame up to return address
+    RETURN_IF_EAX_ZERO            // return or deliver exception
+
+DEFINE_FUNCTION art_get32_static_from_code
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME       // save ref containing registers for GC
+    mov %esp, %edx                // remember SP
+    mov 32(%esp), %ecx            // get referrer
+    pushl %edx                    // pass SP
+    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    pushl %ecx                    // pass referrer
+    pushl %eax                    // pass field_idx
+    call SYMBOL(artGet32StaticFromCode)    // (field_idx, referrer, Thread*, SP)
+    addl LITERAL(16), %esp        // pop arguments
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME     // restore frame up to return address
+    RETURN_IF_EAX_ZERO            // return or deliver exception
+
+DEFINE_FUNCTION art_get64_static_from_code
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME       // save ref containing registers for GC
+    mov %esp, %edx                // remember SP
+    mov 32(%esp), %ecx            // get referrer
+    pushl %edx                    // pass SP
+    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    pushl %ecx                    // pass referrer
+    pushl %eax                    // pass field_idx
+    call SYMBOL(artGet64StaticFromCode)    // (field_idx, referrer, Thread*, SP)
+    addl LITERAL(16), %esp        // pop arguments
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME     // restore frame up to return address
+    RETURN_IF_EAX_ZERO            // return or deliver exception
+
+DEFINE_FUNCTION art_get_obj_static_from_code
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME       // save ref containing registers for GC
+    mov %esp, %edx                // remember SP
+    mov 32(%esp), %ecx            // get referrer
+    pushl %edx                    // pass SP
+    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    pushl %ecx                    // pass referrer
+    pushl %eax                    // pass field_idx
+    call SYMBOL(artGetObjStaticFromCode)   // (field_idx, referrer, Thread*, SP)
+    addl LITERAL(16), %esp        // pop arguments
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME     // restore frame up to return address
+    RETURN_IF_EAX_ZERO            // return or deliver exception
+
 MACRO1(UNIMPLEMENTED,name)
     .globl VAR(name, 0)
     ALIGN_FUNCTION_ENTRY
@@ -519,18 +719,6 @@
     // TODO: implement these!
 UNIMPLEMENTED art_proxy_invoke_handler
 UNIMPLEMENTED art_update_debugger
-UNIMPLEMENTED art_set32_instance_from_code
-UNIMPLEMENTED art_set64_instance_from_code
-UNIMPLEMENTED art_set_obj_instance_from_code
-UNIMPLEMENTED art_get32_instance_from_code
-UNIMPLEMENTED art_get64_instance_from_code
-UNIMPLEMENTED art_get_obj_instance_from_code
-UNIMPLEMENTED art_set32_static_from_code
-UNIMPLEMENTED art_set64_static_from_code
-UNIMPLEMENTED art_set_obj_static_from_code
-UNIMPLEMENTED art_get32_static_from_code
-UNIMPLEMENTED art_get64_static_from_code
-UNIMPLEMENTED art_get_obj_static_from_code
 UNIMPLEMENTED art_indexof
 UNIMPLEMENTED art_memcmp16
 UNIMPLEMENTED art_string_compareto