diff --git a/build/Android.common.mk b/build/Android.common.mk
index df4c5a9..20a73b5 100644
--- a/build/Android.common.mk
+++ b/build/Android.common.mk
@@ -280,6 +280,7 @@
 	MyClass \
 	MyClassNatives \
 	Nested \
+	NonStaticLeafMethods \
 	ProtoCompare \
 	ProtoCompare2 \
 	StaticLeafMethods \
diff --git a/src/disassembler_arm.cc b/src/disassembler_arm.cc
index b2ddc2c..bbdc37e 100644
--- a/src/disassembler_arm.cc
+++ b/src/disassembler_arm.cc
@@ -42,7 +42,7 @@
   }
 }
 
-static const char* ConditionCodeNames[] = {
+static const char* kConditionCodeNames[] = {
     "EQ",  // 0000 - equal
     "NE",  // 0001 - not-equal
     "CS",  // 0010 - carry-set, greater than, equal or unordered
@@ -57,12 +57,12 @@
     "LT",  // 1011 - signed less than
     "GT",  // 1100 - signed greater than
     "LE",  // 1101 - signed less than or equal
-    "AL",  // 1110 - always
+    "",    // 1110 - always
 };
 
 void DisassemblerArm::DumpCond(std::ostream& os, uint32_t cond) {
   if (cond < 15) {
-    os << ConditionCodeNames[cond];
+    os << kConditionCodeNames[cond];
   } else {
     os << "Unexpected condition: " << cond;
   }
@@ -73,30 +73,10 @@
     case 13: os << "SP"; break;
     case 14: os << "LR"; break;
     case 15: os << "PC"; break;
-    default: os << "r" << reg; break;
+    default: os << "R" << reg; break;
   }
 }
 
-void DisassemblerArm::DumpRegList(std::ostream& os, uint32_t reg_list) {
-  if (reg_list == 0) {
-    os << "<no register list?>";
-    return;
-  }
-  bool first = true;
-  for (size_t i = 0; i < 16; i++) {
-    if ((reg_list & (1 << i)) != 0) {
-      if (first) {
-        os << "{";
-        first = false;
-      } else {
-        os << ", ";
-      }
-      DumpReg(os, i);
-    }
-  }
-  os << "}";
-}
-
 void DisassemblerArm::DumpBranchTarget(std::ostream& os, const uint8_t* instr_ptr, int32_t imm32) {
   os << imm32 << " (" << reinterpret_cast<const void*>(instr_ptr + imm32) << ")";
 }
@@ -109,9 +89,155 @@
   return ptr[0] | (ptr[1] << 8) | (ptr[2] << 16) | (ptr[3] << 24);
 }
 
+static const char* kDataProcessingOperations[] = {
+  "AND", "EOR", "SUB", "RSB", "ADD", "ADC", "SBC", "RSC",
+  "TST", "TEQ", "CMP", "CMN", "ORR", "MOV", "BIC", "MVN",
+};
+
+struct ArmRegister {
+  ArmRegister(uint32_t r) : r(r) { CHECK_LE(r, 15U); }
+  uint32_t r;
+};
+std::ostream& operator<<(std::ostream& os, const ArmRegister& r) {
+  if (r.r == 13) {
+    os << "SP";
+  } else if (r.r == 14) {
+    os << "LR";
+  } else if (r.r == 15) {
+    os << "PC";
+  } else {
+    os << "R" << r.r;
+  }
+  return os;
+}
+
+struct Rd : ArmRegister {
+  Rd(uint32_t instruction) : ArmRegister((instruction >> 12) & 0xf) {}
+};
+typedef Rd Rt;
+struct Rn : ArmRegister {
+  Rn(uint32_t instruction) : ArmRegister((instruction >> 16) & 0xf) {}
+};
+
+struct Rm {
+  Rm(uint32_t instruction) : shift((instruction >> 4) & 0xff), rm(instruction & 0xf) {}
+  uint32_t shift;
+  ArmRegister rm;
+};
+std::ostream& operator<<(std::ostream& os, const Rm& r) {
+  os << r.rm;
+  if (r.shift != 0) {
+    os << "-shift-" << r.shift; // TODO
+  }
+  return os;
+}
+
+struct Imm12 {
+  Imm12(uint32_t instruction) : rotate((instruction >> 8) & 0xf), imm(instruction & 0xff) {}
+  uint32_t rotate;
+  uint32_t imm;
+};
+std::ostream& operator<<(std::ostream& os, const Imm12& rhs) {
+  uint32_t imm = (rhs.imm >> (2 * rhs.rotate)) | (rhs.imm << (32 - (2 * rhs.rotate)));
+  os << "#" << imm;
+  return os;
+}
+
+struct RegisterList {
+  RegisterList(uint32_t instruction) : register_list(instruction & 0xffff) {}
+  uint32_t register_list;
+};
+std::ostream& operator<<(std::ostream& os, const RegisterList& rhs) {
+  if (rhs.register_list == 0) {
+    os << "<no register list?>";
+    return os;
+  }
+  bool first = true;
+  for (size_t i = 0; i < 16; i++) {
+    if ((rhs.register_list & (1 << i)) != 0) {
+      if (first) {
+        os << "{";
+        first = false;
+      } else {
+        os << ", ";
+      }
+      os << ArmRegister(i);
+    }
+  }
+  os << "}";
+  return os;
+}
 
 void DisassemblerArm::DumpArm(std::ostream& os, const uint8_t* instr_ptr) {
-  os << StringPrintf("\t\t\t%p: %08x\n", instr_ptr, ReadU32(instr_ptr));
+  uint32_t instruction = ReadU32(instr_ptr);
+  uint32_t cond = (instruction >> 28) & 0xf;
+  uint32_t op1 = (instruction >> 25) & 0x7;
+  os << StringPrintf("\t\t\t%p: %08x: ", instr_ptr, instruction);
+  switch (op1) {
+    case 0:
+    case 1: // Data processing instructions.
+      {
+        if ((instruction & 0x0fffffd0) == 0x012fff10) { // BX and BLX (register)
+          os << (((instruction >> 5) & 1) ? "BLX" : "BX") << " " << ArmRegister(instruction & 0xf);
+          break;
+        }
+        bool i = (instruction & (1 << 25)) != 0;
+        bool s = (instruction & (1 << 20)) != 0;
+        os << kDataProcessingOperations[(instruction >> 21) & 0xf]
+           << kConditionCodeNames[cond]
+           << (s ? "S" : "")
+           << " "
+           << Rd(instruction) << ", ";
+        if (i) {
+          os << Rn(instruction) << ", " << Imm12(instruction);
+        } else {
+          os << Rm(instruction);
+        }
+      }
+      break;
+    case 2: // Load/store word and unsigned byte.
+      {
+        bool p = (instruction & (1 << 24)) != 0;
+        bool b = (instruction & (1 << 22)) != 0;
+        bool w = (instruction & (1 << 21)) != 0;
+        bool l = (instruction & (1 << 20)) != 0;
+        os << (l ? "LDR" : "STR") << (b ? "B" : "") << kConditionCodeNames[cond] << " ";
+        os << Rt(instruction) << ", ";
+        if (Rn(instruction).r == 0xf) {
+          UNIMPLEMENTED(FATAL) << "literals";
+        } else {
+          bool wback = !p || w;
+          if (p && !wback) {
+            os << "[" << Rn(instruction) << ", " << Imm12(instruction) << "]";
+          } else if (p && wback) {
+            os << "[" << Rn(instruction) << ", " << Imm12(instruction) << "]!";
+          } else if (!p && wback) {
+            os << "[" << Rn(instruction) << "], " << Imm12(instruction);
+          } else {
+            LOG(FATAL) << p << " " << w;
+          }
+        }
+      }
+      break;
+    case 4: // Load/store multiple.
+      {
+        bool p = (instruction & (1 << 24)) != 0;
+        bool u = (instruction & (1 << 23)) != 0;
+        bool w = (instruction & (1 << 21)) != 0;
+        bool l = (instruction & (1 << 20)) != 0;
+        os << StringPrintf("%s%c%c%s ",
+                           l ? "LDM" : "STM",
+                           u ? 'I' : 'D',
+                           p ? 'B' : 'A',
+                           kConditionCodeNames[cond]);
+        os << Rn(instruction) << (w ? "!" : "") << ", " << RegisterList(instruction);
+      }
+      break;
+    default:
+      os << "???";
+      break;
+    }
+    os << '\n';
 }
 
 size_t DisassemblerArm::DumpThumb32(std::ostream& os, const uint8_t* instr_ptr) {
@@ -124,11 +250,14 @@
   // |---|---|-------|----|----------------|
   // |111|op1| op2   |    |                |
   uint32_t op1 = (instr >> 27) & 3;
+  if (op1 == 0) {
+    return DumpThumb16(os, instr_ptr);
+  }
+
   uint32_t op2 = (instr >> 20) & 0x7F;
-  os << StringPrintf("\t\t\t%p: ", instr_ptr);
+  os << StringPrintf("\t\t\t%p: %08x: ", instr_ptr, instr);
   switch (op1) {
     case 0:
-      return DumpThumb16(os, instr_ptr);
       break;
     case 1:
       switch (op2) {
@@ -150,7 +279,6 @@
           uint32_t W = (instr >> 21) & 1;
           uint32_t L = (instr >> 20) & 1;
           uint32_t Rn = (instr >> 16) & 0xF;
-          uint32_t reg_list = instr & 0xFFFF;
           if (op == 1 || op == 2) {
             if (op == 1) {
               if (L == 0) {
@@ -197,8 +325,7 @@
                 }
               }
             }
-            DumpRegList(os, reg_list);
-            os << "  // ";
+            os << RegisterList(instr);
           }
           break;
         }
@@ -247,7 +374,7 @@
         DumpReg(os, Rd);
         os << ", ";
         DumpReg(os, Rn);
-        os << ", ThumbExpand(" << imm32 << ")  // ";
+        os << ", ThumbExpand(" << imm32 << ")";
       } else if ((instr & 0x8000) == 0 && (op2 & 0x20) != 0) {
         // Data-processing (plain binary immediate)
         // |111|11|10|00000|0000|1|111110000000000|
@@ -269,7 +396,7 @@
             uint32_t imm16 = (Rn << 12) | (i << 11) | (imm3 << 8) | imm8;
             os << "MOVW ";
             DumpReg(os, Rd);
-            os << ", #" << imm16 << "  // ";
+            os << ", #" << imm16;
             break;
           }
           case 0x0A: {
@@ -283,7 +410,7 @@
             DumpReg(os, Rd);
             os << ", ";
             DumpReg(os, Rn);
-            os << ", #" << imm12 << "  // ";
+            os << ", #" << imm12;
             break;
           }
           default:
@@ -324,7 +451,6 @@
               DumpCond(os, cond);
               os << ".W ";
               DumpBranchTarget(os, instr_ptr + 4, imm32);
-              os << "  // ";
             }
             break;
           case 2:
@@ -390,7 +516,6 @@
                 if (Rn == 13 && P == 1 && U == 0 && W == 1) {
                   os << "PUSH ";
                   DumpReg(os, Rt);
-                  os << "  // ";
                 } else if (Rn == 15 || (P == 0 && W == 0)) {
                   os << "UNDEFINED ";
                 } else {
@@ -410,7 +535,6 @@
                       os << "!";
                     }
                   }
-                  os << "  // ";
                 }
               } else if (op3 == 6) {
                 uint32_t imm12 = instr & 0xFFF;
@@ -418,7 +542,7 @@
                 DumpReg(os, Rt);
                 os << ", [";
                 DumpReg(os, Rn);
-                os << ", #" << imm12 << "]  // ";
+                os << ", #" << imm12 << "]";
               }
               break;
             }
@@ -448,21 +572,21 @@
             DumpReg(os, Rt);
             os << ", [";
             DumpReg(os, Rn);
-            os << ", #" << imm12 << "]  // ";
+            os << ", #" << imm12 << "]";
           } else if (op4 == 0) {
             // LDR.W Rt, [Rn, Rm{, LSL #imm2}] - 111 11 00 00 101 nnnn tttt 000000iimmmm
             uint32_t imm2 = (instr >> 4) & 0xF;
-            uint32_t Rm = instr & 0xF;
+            uint32_t rm = instr & 0xF;
             os << "LDR.W ";
             DumpReg(os, Rt);
             os << ", [";
             DumpReg(os, Rn);
             os << ", ";
-            DumpReg(os, Rm);
+            DumpReg(os, rm);
             if (imm2 != 0) {
               os << ", LSL #" << imm2;
             }
-            os << "]  // ";
+            os << "]";
           } else {
             // LDRT Rt, [Rn, #imm8]            - 111 11 00 00 101 nnnn tttt 1110iiiiiiii
             uint32_t imm8 = instr & 0xFF;
@@ -470,7 +594,7 @@
             DumpReg(os, Rt);
             os << ", [";
             DumpReg(os, Rn);
-            os << ", #" << imm8 << "]  // ";
+            os << ", #" << imm8 << "]";
           }
           break;
         }
@@ -478,7 +602,7 @@
     default:
       break;
   }
-  os << StringPrintf("%08x\n", instr);
+  os << '\n';
   return 4;
 }
 
@@ -488,7 +612,7 @@
   if (is_32bit) {
     return DumpThumb32(os, instr_ptr);
   } else {
-    os << StringPrintf("\t\t\t%p: ", instr_ptr);
+    os << StringPrintf("\t\t\t%p: %04x    : ", instr_ptr, instr);
     uint16_t opcode1 = instr >> 10;
     if (opcode1 < 0x10) {
       // shift (immediate), add, subtract, move, and compare
@@ -500,7 +624,7 @@
           // Logical shift right    - 00 001xx xxxxxxxxx
           // Arithmetic shift right - 00 010xx xxxxxxxxx
           uint16_t imm5 = (instr >> 6) & 0x1F;
-          uint16_t Rm = (instr >> 3) & 7;
+          uint16_t rm = (instr >> 3) & 7;
           uint16_t Rd = instr & 7;
           if (opcode2 <= 3) {
             os << "LSLS ";
@@ -511,8 +635,8 @@
           }
           DumpReg(os, Rd);
           os << ", ";
-          DumpReg(os, Rm);
-          os << ", #" << imm5 << "  // ";
+          DumpReg(os, rm);
+          os << ", #" << imm5;
           break;
         }
         case 0xC: case 0xD: case 0xE: case 0xF: {
@@ -541,7 +665,6 @@
           } else if (imm3_or_Rm != 0) {
             os << ", #" << imm3_or_Rm;
           }
-          os << "  // ";
           break;
         }
         case 0x10: case 0x11: case 0x12: case 0x13:
@@ -561,7 +684,7 @@
             case 7: os << "SUBS "; break;
           }
           DumpReg(os, Rn);
-          os << ", #" << imm8 << "  // ";
+          os << ", #" << imm8;
           break;
         }
         default:
@@ -575,54 +698,50 @@
           // Add low registers  - 010001 0000 xxxxxx
           // Add high registers - 010001 0001/001x xxxxxx
           uint16_t DN = (instr >> 7) & 1;
-          uint16_t Rm = (instr >> 3) & 0xF;
+          uint16_t rm = (instr >> 3) & 0xF;
           uint16_t Rdn = instr & 7;
           uint16_t DN_Rdn = (DN << 3) | Rdn;
           os << "ADD ";
           DumpReg(os, DN_Rdn);
           os << ", ";
-          DumpReg(os, Rm);
-          os << "  // ";
+          DumpReg(os, rm);
           break;
         }
         case 0x8: case 0x9: case 0xA: case 0xB: {
           // Move low registers  - 010001 1000 xxxxxx
           // Move high registers - 010001 1001/101x xxxxxx
           uint16_t DN = (instr >> 7) & 1;
-          uint16_t Rm = (instr >> 3) & 0xF;
+          uint16_t rm = (instr >> 3) & 0xF;
           uint16_t Rdn = instr & 7;
           uint16_t DN_Rdn = (DN << 3) | Rdn;
           os << "MOV ";
           DumpReg(os, DN_Rdn);
           os << ", ";
-          DumpReg(os, Rm);
-          os << "  // ";
+          DumpReg(os, rm);
           break;
         }
         case 0x5: case 0x6: case 0x7: {
           // Compare high registers - 010001 0101/011x xxxxxx
           uint16_t N = (instr >> 7) & 1;
-          uint16_t Rm = (instr >> 3) & 0xF;
+          uint16_t rm = (instr >> 3) & 0xF;
           uint16_t Rn = instr & 7;
           uint16_t N_Rn = (N << 3) | Rn;
           os << "CMP ";
           DumpReg(os, N_Rn);
           os << ", ";
-          DumpReg(os, Rm);
-          os << "  // ";
+          DumpReg(os, rm);
           break;
         }
         case 0xC: case 0xD: case 0xE: case 0xF: {
           // Branch and exchange           - 010001 110x xxxxxx
           // Branch with link and exchange - 010001 111x xxxxxx
-          uint16_t Rm = instr >> 3 & 0xF;
+          uint16_t rm = instr >> 3 & 0xF;
           if ((opcode2 & 0x2) == 0) {
             os << "BX ";
           } else {
             os << "BLX ";
           }
-          DumpReg(os, Rm);
-          os << "  // ";
+          DumpReg(os, rm);
           break;
         }
         default:
@@ -641,7 +760,7 @@
           } else {
             os << "SUB SP, SP, #";
           }
-          os << (imm7 << 2) << "  // ";
+          os << (imm7 << 2);
           break;
         }
         case 0x78: case 0x79: case 0x7A: case 0x7B:  // 1111xxx
@@ -660,7 +779,6 @@
           } else {
             os << "IT " << reinterpret_cast<void*>(opB) << " ";
             DumpCond(os, opA);
-            os << "  // ";
           }
           break;
         }
@@ -687,7 +805,7 @@
           DumpReg(os, Rt);
           os << ", [";
           DumpReg(os, Rn);
-          os << ", #" << (imm5 << 2) << "]  // ";
+          os << ", #" << (imm5 << 2) << "]";
           break;
         }
         case 0x9: {
@@ -701,7 +819,7 @@
             os << "LDR ";
           }
           DumpReg(os, Rt);
-          os << ", [SP, #" << (imm8 << 2) << "]  // ";
+          os << ", [SP, #" << (imm8 << 2) << "]";
           break;
         }
         default:
@@ -713,9 +831,8 @@
       imm32 = (imm32 << 20) >> 20;  // sign extend 12 bit immediate
       os << "B ";
       DumpBranchTarget(os, instr_ptr + 4, imm32);
-      os << "  // ";
     }
-    os << StringPrintf("%04x\n", instr);
+    os << '\n';
   }
   return 2;
 }
diff --git a/src/disassembler_arm.h b/src/disassembler_arm.h
index c9e7f32..81b1b8e 100644
--- a/src/disassembler_arm.h
+++ b/src/disassembler_arm.h
@@ -37,7 +37,6 @@
   void DumpBranchTarget(std::ostream& os, const uint8_t* instr_ptr, int32_t imm32);
   void DumpCond(std::ostream& os, uint32_t cond);
   void DumpReg(std::ostream& os, uint32_t reg);
-  void DumpRegList(std::ostream& os, uint32_t reg_list);
 };
 
 }  // namespace arm
diff --git a/src/heap.cc b/src/heap.cc
index b9e41c0..f4f2996 100644
--- a/src/heap.cc
+++ b/src/heap.cc
@@ -790,8 +790,9 @@
   static Method* FinalizerReference_add =
       java_lang_ref_FinalizerReference_->FindDirectMethod("add", "(Ljava/lang/Object;)V");
   DCHECK(FinalizerReference_add != NULL);
-  Object* args[] = { object };
-  FinalizerReference_add->Invoke(self, NULL, reinterpret_cast<byte*>(&args), NULL);
+  JValue args[1];
+  args[0].l = object;
+  FinalizerReference_add->Invoke(self, NULL, args, NULL);
 }
 
 void Heap::EnqueueClearedReferences(Object** cleared) {
@@ -803,8 +804,9 @@
 
     Thread* self = Thread::Current();
     ScopedThreadStateChange tsc(self, Thread::kRunnable);
-    Object* args[] = { *cleared };
-    ReferenceQueue_add->Invoke(self, NULL, reinterpret_cast<byte*>(&args), NULL);
+    JValue args[1];
+    args[0].l = *cleared;
+    ReferenceQueue_add->Invoke(self, NULL, args, NULL);
     *cleared = NULL;
   }
 }
diff --git a/src/jni_internal.cc b/src/jni_internal.cc
index c447de8..eb33a5c 100644
--- a/src/jni_internal.cc
+++ b/src/jni_internal.cc
@@ -163,48 +163,40 @@
     MethodHelper mh(method);
     shorty_ = mh.GetShorty();
     shorty_len_ = mh.GetShortyLength();
-    size_t num_bytes = NumArgArrayBytes(shorty_, shorty_len_);
-    if (num_bytes < kSmallArgArraySizeInBytes) {
+    if (shorty_len_ - 1 < kSmallArgArraySize) {
       arg_array_ = small_arg_array_;
     } else {
-      large_arg_array_.reset(new byte[num_bytes]);
+      large_arg_array_.reset(new JValue[shorty_len_ - 1]);
       arg_array_ = large_arg_array_.get();
     }
   }
 
-  byte* get() {
+  JValue* get() {
     return arg_array_;
   }
 
   void BuildArgArray(JNIEnv* public_env, va_list ap) {
     JNIEnvExt* env = reinterpret_cast<JNIEnvExt*>(public_env);
-    for (size_t i = 1, offset = 0; i < shorty_len_; ++i) {
+    for (size_t i = 1, offset = 0; i < shorty_len_; ++i, ++offset) {
       switch (shorty_[i]) {
         case 'Z':
         case 'B':
         case 'C':
         case 'S':
         case 'I':
-          *reinterpret_cast<int32_t*>(&arg_array_[offset]) = va_arg(ap, jint);
-          offset += 4;
+          arg_array_[offset].i = va_arg(ap, jint);
           break;
         case 'F':
-          *reinterpret_cast<float*>(&arg_array_[offset]) = va_arg(ap, jdouble);
-          offset += 4;
+          arg_array_[offset].f = va_arg(ap, jdouble);
           break;
-        case 'L': {
-          Object* obj = DecodeObj(env, va_arg(ap, jobject));
-          *reinterpret_cast<Object**>(&arg_array_[offset]) = obj;
-          offset += sizeof(Object*);
+        case 'L':
+          arg_array_[offset].l = DecodeObj(env, va_arg(ap, jobject));
           break;
-        }
         case 'D':
-          *reinterpret_cast<double*>(&arg_array_[offset]) = va_arg(ap, jdouble);
-          offset += 8;
+          arg_array_[offset].d = va_arg(ap, jdouble);
           break;
         case 'J':
-          *reinterpret_cast<int64_t*>(&arg_array_[offset]) = va_arg(ap, jlong);
-          offset += 8;
+          arg_array_[offset].j = va_arg(ap, jlong);
           break;
       }
     }
@@ -212,76 +204,38 @@
 
   void BuildArgArray(JNIEnv* public_env, jvalue* args) {
     JNIEnvExt* env = reinterpret_cast<JNIEnvExt*>(public_env);
-    for (size_t i = 1, offset = 0; i < shorty_len_; ++i) {
+    for (size_t i = 1, offset = 0; i < shorty_len_; ++i, ++offset) {
       switch (shorty_[i]) {
         case 'Z':
         case 'B':
         case 'C':
         case 'S':
         case 'I':
-          *reinterpret_cast<uint32_t*>(&arg_array_[offset]) = args[i - 1].i;
-          offset += 4;
+          arg_array_[offset].i = args[offset].i;
           break;
         case 'F':
-          *reinterpret_cast<float*>(&arg_array_[offset]) = args[i - 1].f;
-          offset += 4;
+          arg_array_[offset].f = args[offset].f;
           break;
-        case 'L': {
-          Object* obj = DecodeObj(env, args[i - 1].l);
-          *reinterpret_cast<Object**>(&arg_array_[offset]) = obj;
-          offset += sizeof(Object*);
+        case 'L':
+          arg_array_[offset].l = DecodeObj(env, args[offset].l);
           break;
-        }
         case 'D':
-          *reinterpret_cast<double*>(&arg_array_[offset]) = args[i - 1].d;
-          offset += 8;
+          arg_array_[offset].d = args[offset].d;
           break;
         case 'J':
-          *reinterpret_cast<uint64_t*>(&arg_array_[offset]) = args[i - 1].j;
-          offset += 8;
+          arg_array_[offset].j = args[offset].j;
           break;
       }
     }
   }
 
-  void BuildArgArray(JValue* args) {
-    for (size_t i = 1, offset = 0; i < shorty_len_; ++i) {
-      switch (shorty_[i]) {
-      case 'Z':
-      case 'B':
-      case 'C':
-      case 'S':
-      case 'I':
-        *reinterpret_cast<uint32_t*>(&arg_array_[offset]) = args[i - 1].i;
-        offset += 4;
-        break;
-      case 'F':
-        *reinterpret_cast<float*>(&arg_array_[offset]) = args[i - 1].f;
-        offset += 4;
-        break;
-      case 'L':
-        *reinterpret_cast<Object**>(&arg_array_[offset]) = args[i - 1].l;
-        offset += sizeof(Object*);
-        break;
-      case 'D':
-        *reinterpret_cast<double*>(&arg_array_[offset]) = args[i - 1].d;
-        offset += 8;
-        break;
-      case 'J':
-        *reinterpret_cast<uint64_t*>(&arg_array_[offset]) = args[i - 1].j;
-        offset += 8;
-        break;
-      }
-    }
-  }
-
  private:
-  enum { kSmallArgArraySizeInBytes = 48 };
+  enum { kSmallArgArraySize = 16 };
   const char* shorty_;
   uint32_t shorty_len_;
-  byte* arg_array_;
-  byte small_arg_array_[kSmallArgArraySizeInBytes];
-  UniquePtr<byte[]> large_arg_array_;
+  JValue* arg_array_;
+  JValue small_arg_array_[kSmallArgArraySize];
+  UniquePtr<JValue[]> large_arg_array_;
 };
 
 namespace {
@@ -303,7 +257,7 @@
   return reinterpret_cast<T>(ts.Self()->DecodeJObject(obj));
 }
 
-static JValue InvokeWithArgArray(JNIEnv* public_env, Object* receiver, Method* method, byte* args) {
+static JValue InvokeWithArgArray(JNIEnv* public_env, Object* receiver, Method* method, JValue* args) {
   JNIEnvExt* env = reinterpret_cast<JNIEnvExt*>(public_env);
   JValue result;
   method->Invoke(env->self, receiver, args, &result);
@@ -723,9 +677,7 @@
 }
 
 JValue InvokeWithJValues(Thread* self, Object* receiver, Method* m, JValue* args) {
-  ArgArray arg_array(m);
-  arg_array.BuildArgArray(args);
-  return InvokeWithArgArray(self->GetJniEnv(), receiver, m, arg_array.get());
+  return InvokeWithArgArray(self->GetJniEnv(), receiver, m, args);
 }
 
 class JNI {
diff --git a/src/jni_internal_arm.cc b/src/jni_internal_arm.cc
index 6197f15..476c623 100644
--- a/src/jni_internal_arm.cc
+++ b/src/jni_internal_arm.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "jni_internal.h"
+#include <stdint.h>
 
 #include <algorithm>
 
@@ -83,34 +83,51 @@
   __ LoadImmediate(IP, 0, AL);
   __ StoreToOffset(kStoreWord, IP, SP, 0);
 
-  // Copy values by stack
-  for (size_t off = 0; off < stack_bytes; off += kPointerSize) {
-    // we're displaced off of r3 by bytes that'll go in registers
-    int r3_offset = reg_bytes + off;
-    __ LoadFromOffset(kLoadWord, IP, R3, r3_offset);
+  // Copy values onto the stack.
+  size_t src_offset = 0;
+  size_t dst_offset = (is_static ? 1 : 2) * kPointerSize;
+  for (size_t i = 1; i < shorty_len; ++i) {
+    switch (shorty[i]) {
+      case 'D':
+      case 'J':
+        // Move both pointers 64 bits.
+        __ LoadFromOffset(kLoadWord, IP, R3, src_offset);
+        src_offset += kPointerSize;
+        __ StoreToOffset(kStoreWord, IP, SP, dst_offset);
+        dst_offset += kPointerSize;
 
-    // we're displaced off of the arguments by the spill space for the incoming
-    // arguments, the Method* and possibly the receiver
-    int sp_offset = reg_bytes + (is_static ? 1 : 2) * kPointerSize + off;
-    __ StoreToOffset(kStoreWord, IP, SP, sp_offset);
+        __ LoadFromOffset(kLoadWord, IP, R3, src_offset);
+        src_offset += kPointerSize;
+        __ StoreToOffset(kStoreWord, IP, SP, dst_offset);
+        dst_offset += kPointerSize;
+        break;
+      default:
+        // Move the source pointer sizeof(JValue) and the destination pointer 32 bits.
+        __ LoadFromOffset(kLoadWord, IP, R3, src_offset);
+        src_offset += sizeof(JValue);
+        __ StoreToOffset(kStoreWord, IP, SP, dst_offset);
+        dst_offset += kPointerSize;
+        break;
+    }
   }
 
   // Move all the register arguments into place.
+  dst_offset = (is_static ? 1 : 2) * kPointerSize;
   if (is_static) {
-    if (reg_bytes > 0) {
-      __ LoadFromOffset(kLoadWord, R1, R3, 0);
-      if (reg_bytes > 4) {
-        __ LoadFromOffset(kLoadWord, R2, R3, 4);
-        if (reg_bytes > 8) {
-          __ LoadFromOffset(kLoadWord, R3, R3, 8);
+    if (reg_bytes > 0 && num_arg_array_bytes > 0) {
+      __ LoadFromOffset(kLoadWord, R1, SP, dst_offset + 0);
+      if (reg_bytes > 4 && num_arg_array_bytes > 4) {
+        __ LoadFromOffset(kLoadWord, R2, SP, dst_offset + 4);
+        if (reg_bytes > 8 && num_arg_array_bytes > 8) {
+          __ LoadFromOffset(kLoadWord, R3, SP, dst_offset + 8);
         }
       }
     }
   } else {
-    if (reg_bytes > 0) {
-      __ LoadFromOffset(kLoadWord, R2, R3, 0);
-      if (reg_bytes > 4) {
-        __ LoadFromOffset(kLoadWord, R3, R3, 4);
+    if (reg_bytes > 0 && num_arg_array_bytes > 0) {
+      __ LoadFromOffset(kLoadWord, R2, SP, dst_offset + 0);
+      if (reg_bytes > 4 && num_arg_array_bytes > 4) {
+        __ LoadFromOffset(kLoadWord, R3, SP, dst_offset + 4);
       }
     }
   }
diff --git a/src/jni_internal_test.cc b/src/jni_internal_test.cc
index a67bec1..e1f62fc 100644
--- a/src/jni_internal_test.cc
+++ b/src/jni_internal_test.cc
@@ -52,6 +52,517 @@
     CommonTest::TearDown();
   }
 
+  Method::InvokeStub* DoCompile(Method*& method, Object*& receiver, bool is_static, const char* method_name, const char* method_signature) {
+    const char* class_name = is_static ? "StaticLeafMethods" : "NonStaticLeafMethods";
+    SirtRef<ClassLoader> class_loader(LoadDex(class_name));
+    if (is_static) {
+      CompileDirectMethod(class_loader.get(), class_name, method_name, method_signature);
+    } else {
+      CompileVirtualMethod(NULL, "java.lang.Class", "isFinalizable", "()Z");
+      CompileDirectMethod(NULL, "java.lang.Object", "<init>", "()V");
+      CompileVirtualMethod(class_loader.get(), class_name, method_name, method_signature);
+    }
+
+    Class* c = class_linker_->FindClass(DotToDescriptor(class_name).c_str(), class_loader.get());
+    CHECK(c != NULL);
+
+    method = is_static ? c->FindDirectMethod(method_name, method_signature) : c->FindVirtualMethod(method_name, method_signature);
+    CHECK(method != NULL);
+
+    receiver = (is_static ? NULL : c->AllocObject());
+
+    Method::InvokeStub* stub = method->GetInvokeStub();
+    CHECK(stub != NULL);
+
+    return stub;
+  }
+
+  void InvokeNopMethod(bool is_static) {
+    // TODO: remove this when we have a working x86 compiler.
+#if !defined(__arm__)
+    if (!is_static) {
+      return;
+    }
+#endif
+    Method* method;
+    Object* receiver;
+    Method::InvokeStub* stub = DoCompile(method, receiver, is_static, "nop", "()V");
+    (*stub)(method, receiver, Thread::Current(), NULL, NULL);
+  }
+
+  void InvokeIdentityByteMethod(bool is_static) {
+    // TODO: remove this when we have a working x86 compiler.
+#if !defined(__arm__)
+    if (!is_static) {
+      return;
+    }
+#endif
+    Method* method;
+    Object* receiver;
+    Method::InvokeStub* stub = DoCompile(method, receiver, is_static, "identity", "(B)B");
+
+    JValue args[1];
+    JValue result;
+
+    args[0].i = 0;
+    result.b = -1;
+    (*stub)(method, receiver, Thread::Current(), args, &result);
+    EXPECT_EQ(0, result.b);
+
+    args[0].i = -1;
+    result.b = 0;
+    (*stub)(method, receiver, Thread::Current(), args, &result);
+    EXPECT_EQ(-1, result.b);
+
+    args[0].i = SCHAR_MAX;
+    result.b = 0;
+    (*stub)(method, receiver, Thread::Current(), args, &result);
+    EXPECT_EQ(SCHAR_MAX, result.b);
+
+    args[0].i = SCHAR_MIN;
+    result.b = 0;
+    (*stub)(method, receiver, Thread::Current(), args, &result);
+    EXPECT_EQ(SCHAR_MIN, result.b);
+  }
+
+  void InvokeIdentityIntMethod(bool is_static) {
+    // TODO: remove this when we have a working x86 compiler.
+#if !defined(__arm__)
+    if (!is_static) {
+      return;
+    }
+#endif
+    Method* method;
+    Object* receiver;
+    Method::InvokeStub* stub = DoCompile(method, receiver, is_static, "identity", "(I)I");
+
+    JValue args[1];
+    JValue result;
+
+    args[0].i = 0;
+    result.i = -1;
+    (*stub)(method, receiver, Thread::Current(), args, &result);
+    EXPECT_EQ(0, result.i);
+
+    args[0].i = -1;
+    result.i = 0;
+    (*stub)(method, receiver, Thread::Current(), args, &result);
+    EXPECT_EQ(-1, result.i);
+
+    args[0].i = INT_MAX;
+    result.i = 0;
+    (*stub)(method, receiver, Thread::Current(), args, &result);
+    EXPECT_EQ(INT_MAX, result.i);
+
+    args[0].i = INT_MIN;
+    result.i = 0;
+    (*stub)(method, receiver, Thread::Current(), args, &result);
+    EXPECT_EQ(INT_MIN, result.i);
+  }
+
+  void InvokeIdentityDoubleMethod(bool is_static) {
+    // TODO: remove this when we have a working x86 compiler.
+#if !defined(__arm__)
+    if (!is_static) {
+      return;
+    }
+#endif
+    Method* method;
+    Object* receiver;
+    Method::InvokeStub* stub = DoCompile(method, receiver, is_static, "identity", "(D)D");
+
+    JValue args[1];
+    JValue result;
+
+    args[0].d = 0.0;
+    result.d = -1.0;
+    (*stub)(method, receiver, Thread::Current(), args, &result);
+    EXPECT_EQ(0.0, result.d);
+
+    args[0].d = -1.0;
+    result.d = 0.0;
+    (*stub)(method, receiver, Thread::Current(), args, &result);
+    EXPECT_EQ(-1.0, result.d);
+
+    args[0].d = DBL_MAX;
+    result.d = 0.0;
+    (*stub)(method, receiver, Thread::Current(), args, &result);
+    EXPECT_EQ(DBL_MAX, result.d);
+
+    args[0].d = DBL_MIN;
+    result.d = 0.0;
+    (*stub)(method, receiver, Thread::Current(), args, &result);
+    EXPECT_EQ(DBL_MIN, result.d);
+  }
+
+  void InvokeSumIntIntMethod(bool is_static) {
+    // TODO: remove this when we have a working x86 compiler.
+#if !defined(__arm__)
+    if (!is_static) {
+      return;
+    }
+#endif
+    Method* method;
+    Object* receiver;
+    Method::InvokeStub* stub = DoCompile(method, receiver, is_static, "sum", "(II)I");
+
+    JValue result;
+    result.i = -1;
+    JValue args[2];
+    args[0].i = 0;
+    args[1].i = 0;
+    (*stub)(method, NULL, Thread::Current(), args, &result);
+    EXPECT_EQ(0, result.i);
+
+    result.i = 0;
+    args[0].i = 1;
+    args[1].i = 2;
+    (*stub)(method, NULL, Thread::Current(), args, &result);
+    EXPECT_EQ(3, result.i);
+
+    result.i = 0;
+    args[0].i = -2;
+    args[1].i = 5;
+    (*stub)(method, NULL, Thread::Current(), args, &result);
+    EXPECT_EQ(3, result.i);
+
+    result.i = 1234;
+    args[0].i = INT_MAX;
+    args[1].i = INT_MIN;
+    (*stub)(method, NULL, Thread::Current(), args, &result);
+    EXPECT_EQ(-1, result.i);
+
+    result.i = INT_MIN;
+    args[0].i = INT_MAX;
+    args[1].i = INT_MAX;
+    (*stub)(method, NULL, Thread::Current(), args, &result);
+    EXPECT_EQ(-2, result.i);
+  }
+
+  void InvokeSumIntIntIntMethod(bool is_static) {
+    // TODO: remove this when we have a working x86 compiler.
+#if !defined(__arm__)
+    if (!is_static) {
+      return;
+    }
+#endif
+    Method* method;
+    Object* receiver;
+    Method::InvokeStub* stub = DoCompile(method, receiver, is_static, "sum", "(III)I");
+
+    JValue result;
+    result.i = -1;
+    JValue args[3];
+    args[0].i = 0;
+    args[1].i = 0;
+    args[2].i = 0;
+    (*stub)(method, NULL, Thread::Current(), args, &result);
+    EXPECT_EQ(0, result.i);
+
+    result.i = 0;
+    args[0].i = 1;
+    args[1].i = 2;
+    args[2].i = 3;
+    (*stub)(method, NULL, Thread::Current(), args, &result);
+    EXPECT_EQ(6, result.i);
+
+    result.i = 0;
+    args[0].i = -1;
+    args[1].i = 2;
+    args[2].i = -3;
+    (*stub)(method, NULL, Thread::Current(), args, &result);
+    EXPECT_EQ(-2, result.i);
+
+    result.i = 1234;
+    args[0].i = INT_MAX;
+    args[1].i = INT_MIN;
+    args[2].i = INT_MAX;
+    (*stub)(method, NULL, Thread::Current(), args, &result);
+    EXPECT_EQ(2147483646, result.i);
+
+    result.i = INT_MIN;
+    args[0].i = INT_MAX;
+    args[1].i = INT_MAX;
+    args[2].i = INT_MAX;
+    (*stub)(method, NULL, Thread::Current(), args, &result);
+    EXPECT_EQ(2147483645, result.i);
+  }
+
+  void InvokeSumIntIntIntIntMethod(bool is_static) {
+    // TODO: remove this when we have a working x86 compiler.
+#if !defined(__arm__)
+    if (!is_static) {
+      return;
+    }
+#endif
+    Method* method;
+    Object* receiver;
+    Method::InvokeStub* stub = DoCompile(method, receiver, is_static, "sum", "(IIII)I");
+
+    JValue result;
+    result.i = -1;
+    JValue args[4];
+    args[0].i = 0;
+    args[1].i = 0;
+    args[2].i = 0;
+    args[3].i = 0;
+    (*stub)(method, NULL, Thread::Current(), args, &result);
+    EXPECT_EQ(0, result.i);
+
+    result.i = 0;
+    args[0].i = 1;
+    args[1].i = 2;
+    args[2].i = 3;
+    args[3].i = 4;
+    (*stub)(method, NULL, Thread::Current(), args, &result);
+    EXPECT_EQ(10, result.i);
+
+    result.i = 0;
+    args[0].i = -1;
+    args[1].i = 2;
+    args[2].i = -3;
+    args[3].i = 4;
+    (*stub)(method, NULL, Thread::Current(), args, &result);
+    EXPECT_EQ(2, result.i);
+
+    result.i = 1234;
+    args[0].i = INT_MAX;
+    args[1].i = INT_MIN;
+    args[2].i = INT_MAX;
+    args[3].i = INT_MIN;
+    (*stub)(method, NULL, Thread::Current(), args, &result);
+    EXPECT_EQ(-2, result.i);
+
+    result.i = INT_MIN;
+    args[0].i = INT_MAX;
+    args[1].i = INT_MAX;
+    args[2].i = INT_MAX;
+    args[3].i = INT_MAX;
+    (*stub)(method, NULL, Thread::Current(), args, &result);
+    EXPECT_EQ(-4, result.i);
+  }
+
+  void InvokeSumIntIntIntIntIntMethod(bool is_static) {
+    // TODO: remove this when we have a working x86 compiler.
+#if !defined(__arm__)
+    if (!is_static) {
+      return;
+    }
+#endif
+    Method* method;
+    Object* receiver;
+    Method::InvokeStub* stub = DoCompile(method, receiver, is_static, "sum", "(IIIII)I");
+
+    JValue result;
+    result.i = -1.0;
+    JValue args[5];
+    args[0].i = 0;
+    args[1].i = 0;
+    args[2].i = 0;
+    args[3].i = 0;
+    args[4].i = 0;
+    (*stub)(method, NULL, Thread::Current(), args, &result);
+    EXPECT_EQ(0, result.i);
+
+    result.i = 0;
+    args[0].i = 1;
+    args[1].i = 2;
+    args[2].i = 3;
+    args[3].i = 4;
+    args[4].i = 5;
+    (*stub)(method, NULL, Thread::Current(), args, &result);
+    EXPECT_EQ(15, result.i);
+
+    result.i = 0;
+    args[0].i = -1;
+    args[1].i = 2;
+    args[2].i = -3;
+    args[3].i = 4;
+    args[4].i = -5;
+    (*stub)(method, NULL, Thread::Current(), args, &result);
+    EXPECT_EQ(-3, result.i);
+
+    result.i = 1234;
+    args[0].i = INT_MAX;
+    args[1].i = INT_MIN;
+    args[2].i = INT_MAX;
+    args[3].i = INT_MIN;
+    args[4].i = INT_MAX;
+    (*stub)(method, NULL, Thread::Current(), args, &result);
+    EXPECT_EQ(2147483645, result.i);
+
+    result.i = INT_MIN;
+    args[0].i = INT_MAX;
+    args[1].i = INT_MAX;
+    args[2].i = INT_MAX;
+    args[3].i = INT_MAX;
+    args[4].i = INT_MAX;
+    (*stub)(method, NULL, Thread::Current(), args, &result);
+    EXPECT_EQ(2147483643, result.i);
+  }
+
+  void InvokeSumDoubleDoubleMethod(bool is_static) {
+    // TODO: remove this when we have a working x86 compiler.
+#if !defined(__arm__)
+    if (!is_static) {
+      return;
+    }
+#endif
+    Method* method;
+    Object* receiver;
+    Method::InvokeStub* stub = DoCompile(method, receiver, is_static, "sum", "(DD)D");
+
+    JValue args[2];
+    JValue result;
+
+    args[0].d = 0.0;
+    args[1].d = 0.0;
+    result.d = -1.0;
+    (*stub)(method, NULL, Thread::Current(), args, &result);
+    EXPECT_EQ(0.0, result.d);
+
+    args[0].d = 1.0;
+    args[1].d = 2.0;
+    result.d = 0.0;
+    (*stub)(method, NULL, Thread::Current(), args, &result);
+    EXPECT_EQ(3.0, result.d);
+
+    args[0].d = 1.0;
+    args[1].d = -2.0;
+    result.d = 0.0;
+    (*stub)(method, NULL, Thread::Current(), args, &result);
+    EXPECT_EQ(-1.0, result.d);
+
+    args[0].d = DBL_MAX;
+    args[1].d = DBL_MIN;
+    result.d = 0.0;
+    (*stub)(method, NULL, Thread::Current(), args, &result);
+    EXPECT_EQ(1.7976931348623157e308, result.d);
+
+    args[0].d = DBL_MAX;
+    args[1].d = DBL_MAX;
+    result.d = 0.0;
+    (*stub)(method, NULL, Thread::Current(), args, &result);
+    EXPECT_EQ(INFINITY, result.d);
+  }
+
+  void InvokeSumDoubleDoubleDoubleMethod(bool is_static) {
+    // TODO: remove this when we have a working x86 compiler.
+#if !defined(__arm__)
+    if (!is_static) {
+      return;
+    }
+#endif
+    Method* method;
+    Object* receiver;
+    Method::InvokeStub* stub = DoCompile(method, receiver, is_static, "sum", "(DDD)D");
+
+    JValue args[3];
+    JValue result;
+
+    args[0].d = 0.0;
+    args[1].d = 0.0;
+    args[2].d = 0.0;
+    result.d = -1.0;
+    (*stub)(method, NULL, Thread::Current(), args, &result);
+    EXPECT_EQ(0.0, result.d);
+
+    args[0].d = 1.0;
+    args[1].d = 2.0;
+    args[2].d = 3.0;
+    result.d = 0.0;
+    (*stub)(method, NULL, Thread::Current(), args, &result);
+    EXPECT_EQ(6.0, result.d);
+
+    args[0].d = 1.0;
+    args[1].d = -2.0;
+    args[2].d = 3.0;
+    result.d = 0.0;
+    (*stub)(method, NULL, Thread::Current(), args, &result);
+    EXPECT_EQ(2.0, result.d);
+  }
+
+  void InvokeSumDoubleDoubleDoubleDoubleMethod(bool is_static) {
+    // TODO: remove this when we have a working x86 compiler.
+#if !defined(__arm__)
+    if (!is_static) {
+      return;
+    }
+#endif
+    Method* method;
+    Object* receiver;
+    Method::InvokeStub* stub = DoCompile(method, receiver, is_static, "sum", "(DDDD)D");
+
+    JValue args[4];
+    JValue result;
+
+    args[0].d = 0.0;
+    args[1].d = 0.0;
+    args[2].d = 0.0;
+    args[3].d = 0.0;
+    result.d = -1.0;
+    (*stub)(method, NULL, Thread::Current(), args, &result);
+    EXPECT_EQ(0.0, result.d);
+
+    args[0].d = 1.0;
+    args[1].d = 2.0;
+    args[2].d = 3.0;
+    args[3].d = 4.0;
+    result.d = 0.0;
+    (*stub)(method, NULL, Thread::Current(), args, &result);
+    EXPECT_EQ(10.0, result.d);
+
+    args[0].d = 1.0;
+    args[1].d = -2.0;
+    args[2].d = 3.0;
+    args[3].d = -4.0;
+    result.d = 0.0;
+    (*stub)(method, NULL, Thread::Current(), args, &result);
+    EXPECT_EQ(-2.0, result.d);
+  }
+
+  void InvokeSumDoubleDoubleDoubleDoubleDoubleMethod(bool is_static) {
+    // TODO: remove this when we have a working x86 compiler.
+#if !defined(__arm__)
+    if (!is_static) {
+      return;
+    }
+#endif
+    Method* method;
+    Object* receiver;
+    Method::InvokeStub* stub = DoCompile(method, receiver, is_static, "sum", "(DDDDD)D");
+
+    JValue args[5];
+    JValue result;
+
+    args[0].d = 0.0;
+    args[1].d = 0.0;
+    args[2].d = 0.0;
+    args[3].d = 0.0;
+    args[4].d = 0.0;
+    result.d = -1.0;
+    (*stub)(method, NULL, Thread::Current(), args, &result);
+    EXPECT_EQ(0.0, result.d);
+
+    args[0].d = 1.0;
+    args[1].d = 2.0;
+    args[2].d = 3.0;
+    args[3].d = 4.0;
+    args[4].d = 5.0;
+    result.d = 0.0;
+    (*stub)(method, NULL, Thread::Current(), args, &result);
+    EXPECT_EQ(15.0, result.d);
+
+    args[0].d = 1.0;
+    args[1].d = -2.0;
+    args[2].d = 3.0;
+    args[3].d = -4.0;
+    args[4].d = 5.0;
+    result.d = 0.0;
+    (*stub)(method, NULL, Thread::Current(), args, &result);
+    EXPECT_EQ(3.0, result.d);
+  }
+
   JavaVMExt* vm_;
   JNIEnvExt* env_;
   jclass aioobe_;
@@ -903,695 +1414,106 @@
 
   Method::InvokeStub* stub = method->GetInvokeStub();
 
-  Object* arg = NULL;
+  JValue args[1];
+  args[0].l = NULL;
 
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(&arg), NULL);
+  (*stub)(method, NULL, Thread::Current(), args, NULL);
 }
 
 TEST_F(JniInternalTest, StaticNopMethod) {
-  SirtRef<ClassLoader> class_loader(LoadDex("StaticLeafMethods"));
-  CompileDirectMethod(class_loader.get(), "StaticLeafMethods", "nop", "()V");
+  InvokeNopMethod(true);
+}
 
-  Class* klass = class_linker_->FindClass("LStaticLeafMethods;", class_loader.get());
-  ASSERT_TRUE(klass != NULL);
-
-  Method* method = klass->FindDirectMethod("nop", "()V");
-  ASSERT_TRUE(method != NULL);
-
-  Method::InvokeStub* stub = method->GetInvokeStub();
-
-  (*stub)(method, NULL, Thread::Current(), NULL, NULL);
+TEST_F(JniInternalTest, NonStaticNopMethod) {
+  InvokeNopMethod(false);
 }
 
 TEST_F(JniInternalTest, StaticIdentityByteMethod) {
-  SirtRef<ClassLoader> class_loader(LoadDex("StaticLeafMethods"));
-  CompileDirectMethod(class_loader.get(), "StaticLeafMethods", "identity", "(B)B");
+  InvokeIdentityByteMethod(true);
+}
 
-  Class* klass = class_linker_->FindClass("LStaticLeafMethods;", class_loader.get());
-  ASSERT_TRUE(klass != NULL);
-
-  Method* method = klass->FindDirectMethod("identity", "(B)B");
-  ASSERT_TRUE(method != NULL);
-
-  Method::InvokeStub* stub = method->GetInvokeStub();
-
-  int arg;
-  JValue result;
-
-  arg = 0;
-  result.b = -1;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(&arg), &result);
-  EXPECT_EQ(0, result.b);
-
-  arg = -1;
-  result.b = 0;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(&arg), &result);
-  EXPECT_EQ(-1, result.b);
-
-  arg = SCHAR_MAX;
-  result.b = 0;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(&arg), &result);
-  EXPECT_EQ(SCHAR_MAX, result.b);
-
-  arg = SCHAR_MIN;
-  result.b = 0;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(&arg), &result);
-  EXPECT_EQ(SCHAR_MIN, result.b);
+TEST_F(JniInternalTest, NonStaticIdentityByteMethod) {
+  InvokeIdentityByteMethod(false);
 }
 
 TEST_F(JniInternalTest, StaticIdentityIntMethod) {
-  SirtRef<ClassLoader> class_loader(LoadDex("StaticLeafMethods"));
-  CompileDirectMethod(class_loader.get(), "StaticLeafMethods", "identity", "(I)I");
+  InvokeIdentityIntMethod(true);
+}
 
-  Class* klass = class_linker_->FindClass("LStaticLeafMethods;", class_loader.get());
-  ASSERT_TRUE(klass != NULL);
-
-  Method* method = klass->FindDirectMethod("identity", "(I)I");
-  ASSERT_TRUE(method != NULL);
-
-  Method::InvokeStub* stub = method->GetInvokeStub();
-
-  int arg;
-  JValue result;
-
-  arg = 0;
-  result.i = -1;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(&arg), &result);
-  EXPECT_EQ(0, result.i);
-
-  arg = -1;
-  result.i = 0;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(&arg), &result);
-  EXPECT_EQ(-1, result.i);
-
-  arg = INT_MAX;
-  result.i = 0;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(&arg), &result);
-  EXPECT_EQ(INT_MAX, result.i);
-
-  arg = INT_MIN;
-  result.i = 0;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(&arg), &result);
-  EXPECT_EQ(INT_MIN, result.i);
+TEST_F(JniInternalTest, NonStaticIdentityIntMethod) {
+  InvokeIdentityIntMethod(false);
 }
 
 TEST_F(JniInternalTest, StaticIdentityDoubleMethod) {
-  SirtRef<ClassLoader> class_loader(LoadDex("StaticLeafMethods"));
-  CompileDirectMethod(class_loader.get(), "StaticLeafMethods", "identity", "(D)D");
-
-  Class* klass = class_linker_->FindClass("LStaticLeafMethods;", class_loader.get());
-  ASSERT_TRUE(klass != NULL);
-
-  Method* method = klass->FindDirectMethod("identity", "(D)D");
-  ASSERT_TRUE(method != NULL);
-
-  Method::InvokeStub* stub = method->GetInvokeStub();
-
-  double arg;
-  JValue result;
-
-  arg = 0.0;
-  result.d = -1.0;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(&arg), &result);
-  EXPECT_EQ(0.0, result.d);
-
-  arg = -1.0;
-  result.d = 0.0;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(&arg), &result);
-  EXPECT_EQ(-1.0, result.d);
-
-  arg = DBL_MAX;
-  result.d = 0.0;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(&arg), &result);
-  EXPECT_EQ(DBL_MAX, result.d);
-
-  arg = DBL_MIN;
-  result.d = 0.0;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(&arg), &result);
-  EXPECT_EQ(DBL_MIN, result.d);
+  InvokeIdentityDoubleMethod(true);
 }
 
-#if defined(ART_USE_LLVM_COMPILER)
-static byte* CreateArgArray(Method* method, JValue* args) {
-  const char* shorty = MethodHelper(method).GetShorty();
-  size_t shorty_len = strlen(shorty);
-  UniquePtr<byte[]> arg_array(new byte[shorty_len * 8]);
-  for (size_t i = 1, offset = 0; i < shorty_len; ++i) {
-    switch (shorty[i]) {
-    case 'Z':
-    case 'B':
-    case 'C':
-    case 'S':
-    case 'I':
-      *reinterpret_cast<uint32_t*>(&arg_array[offset]) = args[i - 1].i;
-      break;
-    case 'F':
-      *reinterpret_cast<float*>(&arg_array[offset]) = args[i - 1].f;
-      break;
-    case 'L':
-      *reinterpret_cast<Object**>(&arg_array[offset]) = args[i - 1].l;
-      break;
-    case 'D':
-      *reinterpret_cast<double*>(&arg_array[offset]) = args[i - 1].d;
-      break;
-    case 'J':
-      *reinterpret_cast<uint64_t*>(&arg_array[offset]) = args[i - 1].j;
-      break;
-    }
-    offset += 8;
-  }
-  return arg_array.release();
+TEST_F(JniInternalTest, NonStaticIdentityDoubleMethod) {
+  InvokeIdentityDoubleMethod(false);
 }
-#endif
 
 TEST_F(JniInternalTest, StaticSumIntIntMethod) {
-  SirtRef<ClassLoader> class_loader(LoadDex("StaticLeafMethods"));
-  CompileDirectMethod(class_loader.get(), "StaticLeafMethods", "sum", "(II)I");
+  InvokeSumIntIntMethod(true);
+}
 
-  Class* klass = class_linker_->FindClass("LStaticLeafMethods;", class_loader.get());
-  ASSERT_TRUE(klass != NULL);
-
-  Method* method = klass->FindDirectMethod("sum", "(II)I");
-  ASSERT_TRUE(method != NULL);
-
-  Method::InvokeStub* stub = method->GetInvokeStub();
-
-  JValue result;
-  result.i = -1;
-#if !defined(ART_USE_LLVM_COMPILER)
-  int args[2];
-  args[0] = 0;
-  args[1] = 0;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(args), &result);
-#else
-  JValue args[2];
-  args[0].i = 0;
-  args[1].i = 0;
-  (*stub)(method, NULL, Thread::Current(), CreateArgArray(method, args), &result);
-#endif
-  EXPECT_EQ(0, result.i);
-
-  result.i = 0;
-#if !defined(ART_USE_LLVM_COMPILER)
-  args[0] = 1;
-  args[1] = 2;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(args), &result);
-#else
-  args[0].i = 1;
-  args[1].i = 2;
-  (*stub)(method, NULL, Thread::Current(), CreateArgArray(method, args), &result);
-#endif
-  EXPECT_EQ(3, result.i);
-
-  result.i = 0;
-#if !defined(ART_USE_LLVM_COMPILER)
-  args[0] = -2;
-  args[1] = 5;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(args), &result);
-#else
-  args[0].i = -2;
-  args[1].i = 5;
-  (*stub)(method, NULL, Thread::Current(), CreateArgArray(method, args), &result);
-#endif
-  EXPECT_EQ(3, result.i);
-
-  result.i = 1234;
-#if !defined(ART_USE_LLVM_COMPILER)
-  args[0] = INT_MAX;
-  args[1] = INT_MIN;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(args), &result);
-#else
-  args[0].i = INT_MAX;
-  args[1].i = INT_MIN;
-  (*stub)(method, NULL, Thread::Current(), CreateArgArray(method, args), &result);
-#endif
-  EXPECT_EQ(-1, result.i);
-
-  result.i = INT_MIN;
-#if !defined(ART_USE_LLVM_COMPILER)
-  args[0] = INT_MAX;
-  args[1] = INT_MAX;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(args), &result);
-#else
-  args[0].i = INT_MAX;
-  args[1].i = INT_MAX;
-  (*stub)(method, NULL, Thread::Current(), CreateArgArray(method, args), &result);
-#endif
-  EXPECT_EQ(-2, result.i);
+TEST_F(JniInternalTest, NonStaticSumIntIntMethod) {
+  InvokeSumIntIntMethod(false);
 }
 
 TEST_F(JniInternalTest, StaticSumIntIntIntMethod) {
-  SirtRef<ClassLoader> class_loader(LoadDex("StaticLeafMethods"));
-  CompileDirectMethod(class_loader.get(), "StaticLeafMethods", "sum", "(III)I");
+  InvokeSumIntIntIntMethod(true);
+}
 
-  Class* klass = class_linker_->FindClass("LStaticLeafMethods;", class_loader.get());
-  ASSERT_TRUE(klass != NULL);
-
-  Method* method = klass->FindDirectMethod("sum", "(III)I");
-  ASSERT_TRUE(method != NULL);
-
-  Method::InvokeStub* stub = method->GetInvokeStub();
-
-  JValue result;
-  result.i = -1;
-#if !defined(ART_USE_LLVM_COMPILER)
-  int args[3];
-  args[0] = 0;
-  args[1] = 0;
-  args[2] = 0;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(args), &result);
-#else
-  JValue args[3];
-  args[0].i = 0;
-  args[1].i = 0;
-  args[2].i = 0;
-  (*stub)(method, NULL, Thread::Current(), CreateArgArray(method, args), &result);
-#endif
-  EXPECT_EQ(0, result.i);
-
-  result.i = 0;
-#if !defined(ART_USE_LLVM_COMPILER)
-  args[0] = 1;
-  args[1] = 2;
-  args[2] = 3;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(args), &result);
-#else
-  args[0].i = 1;
-  args[1].i = 2;
-  args[2].i = 3;
-  (*stub)(method, NULL, Thread::Current(), CreateArgArray(method, args), &result);
-#endif
-  EXPECT_EQ(6, result.i);
-
-  result.i = 0;
-#if !defined(ART_USE_LLVM_COMPILER)
-  args[0] = -1;
-  args[1] = 2;
-  args[2] = -3;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(args), &result);
-#else
-  args[0].i = -1;
-  args[1].i = 2;
-  args[2].i = -3;
-  (*stub)(method, NULL, Thread::Current(), CreateArgArray(method, args), &result);
-#endif
-  EXPECT_EQ(-2, result.i);
-
-  result.i = 1234;
-#if !defined(ART_USE_LLVM_COMPILER)
-  args[0] = INT_MAX;
-  args[1] = INT_MIN;
-  args[2] = INT_MAX;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(args), &result);
-#else
-  args[0].i = INT_MAX;
-  args[1].i = INT_MIN;
-  args[2].i = INT_MAX;
-  (*stub)(method, NULL, Thread::Current(), CreateArgArray(method, args), &result);
-#endif
-  EXPECT_EQ(2147483646, result.i);
-
-  result.i = INT_MIN;
-#if !defined(ART_USE_LLVM_COMPILER)
-  args[0] = INT_MAX;
-  args[1] = INT_MAX;
-  args[2] = INT_MAX;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(args), &result);
-#else
-  args[0].i = INT_MAX;
-  args[1].i = INT_MAX;
-  args[2].i = INT_MAX;
-  (*stub)(method, NULL, Thread::Current(), CreateArgArray(method, args), &result);
-#endif
-  EXPECT_EQ(2147483645, result.i);
+TEST_F(JniInternalTest, NonStaticSumIntIntIntMethod) {
+  InvokeSumIntIntIntMethod(false);
 }
 
 TEST_F(JniInternalTest, StaticSumIntIntIntIntMethod) {
-  SirtRef<ClassLoader> class_loader(LoadDex("StaticLeafMethods"));
-  CompileDirectMethod(class_loader.get(), "StaticLeafMethods", "sum", "(IIII)I");
+  InvokeSumIntIntIntIntMethod(true);
+}
 
-  Class* klass = class_linker_->FindClass("LStaticLeafMethods;", class_loader.get());
-  ASSERT_TRUE(klass != NULL);
-
-  Method* method = klass->FindDirectMethod("sum", "(IIII)I");
-  ASSERT_TRUE(method != NULL);
-
-  Method::InvokeStub* stub = method->GetInvokeStub();
-
-  JValue result;
-  result.i = -1;
-#if !defined(ART_USE_LLVM_COMPILER)
-  int args[4];
-  args[0] = 0;
-  args[1] = 0;
-  args[2] = 0;
-  args[3] = 0;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(args), &result);
-#else
-  JValue args[4];
-  args[0].i = 0;
-  args[1].i = 0;
-  args[2].i = 0;
-  args[3].i = 0;
-  (*stub)(method, NULL, Thread::Current(), CreateArgArray(method, args), &result);
-#endif
-  EXPECT_EQ(0, result.i);
-
-  result.i = 0;
-#if !defined(ART_USE_LLVM_COMPILER)
-  args[0] = 1;
-  args[1] = 2;
-  args[2] = 3;
-  args[3] = 4;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(args), &result);
-#else
-  args[0].i = 1;
-  args[1].i = 2;
-  args[2].i = 3;
-  args[3].i = 4;
-  (*stub)(method, NULL, Thread::Current(), CreateArgArray(method, args), &result);
-#endif
-  EXPECT_EQ(10, result.i);
-
-  result.i = 0;
-#if !defined(ART_USE_LLVM_COMPILER)
-  args[0] = -1;
-  args[1] = 2;
-  args[2] = -3;
-  args[3] = 4;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(args), &result);
-#else
-  args[0].i = -1;
-  args[1].i = 2;
-  args[2].i = -3;
-  args[3].i = 4;
-  (*stub)(method, NULL, Thread::Current(), CreateArgArray(method, args), &result);
-#endif
-  EXPECT_EQ(2, result.i);
-
-  result.i = 1234;
-#if !defined(ART_USE_LLVM_COMPILER)
-  args[0] = INT_MAX;
-  args[1] = INT_MIN;
-  args[2] = INT_MAX;
-  args[3] = INT_MIN;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(args), &result);
-#else
-  args[0].i = INT_MAX;
-  args[1].i = INT_MIN;
-  args[2].i = INT_MAX;
-  args[3].i = INT_MIN;
-  (*stub)(method, NULL, Thread::Current(), CreateArgArray(method, args), &result);
-#endif
-  EXPECT_EQ(-2, result.i);
-
-  result.i = INT_MIN;
-#if !defined(ART_USE_LLVM_COMPILER)
-  args[0] = INT_MAX;
-  args[1] = INT_MAX;
-  args[2] = INT_MAX;
-  args[3] = INT_MAX;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(args), &result);
-#else
-  args[0].i = INT_MAX;
-  args[1].i = INT_MAX;
-  args[2].i = INT_MAX;
-  args[3].i = INT_MAX;
-  (*stub)(method, NULL, Thread::Current(), CreateArgArray(method, args), &result);
-#endif
-  EXPECT_EQ(-4, result.i);
+TEST_F(JniInternalTest, NonStaticSumIntIntIntIntMethod) {
+  InvokeSumIntIntIntIntMethod(false);
 }
 
 TEST_F(JniInternalTest, StaticSumIntIntIntIntIntMethod) {
-  SirtRef<ClassLoader> class_loader(LoadDex("StaticLeafMethods"));
-  CompileDirectMethod(class_loader.get(), "StaticLeafMethods", "sum", "(IIIII)I");
+  InvokeSumIntIntIntIntIntMethod(true);
+}
 
-  Class* klass = class_linker_->FindClass("LStaticLeafMethods;", class_loader.get());
-  ASSERT_TRUE(klass != NULL);
-
-  Method* method = klass->FindDirectMethod("sum", "(IIIII)I");
-  ASSERT_TRUE(method != NULL);
-
-  Method::InvokeStub* stub = method->GetInvokeStub();
-
-  JValue result;
-  result.i = -1.0;
-#if !defined(ART_USE_LLVM_COMPILER)
-  int args[5];
-  args[0] = 0;
-  args[1] = 0;
-  args[2] = 0;
-  args[3] = 0;
-  args[4] = 0;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(args), &result);
-#else
-  JValue args[5];
-  args[0].i = 0;
-  args[1].i = 0;
-  args[2].i = 0;
-  args[3].i = 0;
-  args[4].i = 0;
-  (*stub)(method, NULL, Thread::Current(), CreateArgArray(method, args), &result);
-#endif
-  EXPECT_EQ(0, result.i);
-
-  result.i = 0;
-#if !defined(ART_USE_LLVM_COMPILER)
-  args[0] = 1;
-  args[1] = 2;
-  args[2] = 3;
-  args[3] = 4;
-  args[4] = 5;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(args), &result);
-#else
-  args[0].i = 1;
-  args[1].i = 2;
-  args[2].i = 3;
-  args[3].i = 4;
-  args[4].i = 5;
-  (*stub)(method, NULL, Thread::Current(), CreateArgArray(method, args), &result);
-#endif
-  EXPECT_EQ(15, result.i);
-
-  result.i = 0;
-#if !defined(ART_USE_LLVM_COMPILER)
-  args[0] = -1;
-  args[1] = 2;
-  args[2] = -3;
-  args[3] = 4;
-  args[4] = -5;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(args), &result);
-#else
-  args[0].i = -1;
-  args[1].i = 2;
-  args[2].i = -3;
-  args[3].i = 4;
-  args[4].i = -5;
-  (*stub)(method, NULL, Thread::Current(), CreateArgArray(method, args), &result);
-#endif
-  EXPECT_EQ(-3, result.i);
-
-  result.i = 1234;
-#if !defined(ART_USE_LLVM_COMPILER)
-  args[0] = INT_MAX;
-  args[1] = INT_MIN;
-  args[2] = INT_MAX;
-  args[3] = INT_MIN;
-  args[4] = INT_MAX;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(args), &result);
-#else
-  args[0].i = INT_MAX;
-  args[1].i = INT_MIN;
-  args[2].i = INT_MAX;
-  args[3].i = INT_MIN;
-  args[4].i = INT_MAX;
-  (*stub)(method, NULL, Thread::Current(), CreateArgArray(method, args), &result);
-#endif
-  EXPECT_EQ(2147483645, result.i);
-
-  result.i = INT_MIN;
-#if !defined(ART_USE_LLVM_COMPILER)
-  args[0] = INT_MAX;
-  args[1] = INT_MAX;
-  args[2] = INT_MAX;
-  args[3] = INT_MAX;
-  args[4] = INT_MAX;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(args), &result);
-#else
-  args[0].i = INT_MAX;
-  args[1].i = INT_MAX;
-  args[2].i = INT_MAX;
-  args[3].i = INT_MAX;
-  args[4].i = INT_MAX;
-  (*stub)(method, NULL, Thread::Current(), CreateArgArray(method, args), &result);
-#endif
-  EXPECT_EQ(2147483643, result.i);
+TEST_F(JniInternalTest, NonStaticSumIntIntIntIntIntMethod) {
+  InvokeSumIntIntIntIntIntMethod(false);
 }
 
 TEST_F(JniInternalTest, StaticSumDoubleDoubleMethod) {
-  SirtRef<ClassLoader> class_loader(LoadDex("StaticLeafMethods"));
-  CompileDirectMethod(class_loader.get(), "StaticLeafMethods", "sum", "(DD)D");
+  InvokeSumDoubleDoubleMethod(true);
+}
 
-  Class* klass = class_linker_->FindClass("LStaticLeafMethods;", class_loader.get());
-  ASSERT_TRUE(klass != NULL);
-
-  Method* method = klass->FindDirectMethod("sum", "(DD)D");
-  ASSERT_TRUE(method != NULL);
-
-  Method::InvokeStub* stub = method->GetInvokeStub();
-
-  double args[2];
-  JValue result;
-
-  args[0] = 0.0;
-  args[1] = 0.0;
-  result.d = -1.0;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(args), &result);
-  EXPECT_EQ(0.0, result.d);
-
-  args[0] = 1.0;
-  args[1] = 2.0;
-  result.d = 0.0;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(args), &result);
-  EXPECT_EQ(3.0, result.d);
-
-  args[0] = 1.0;
-  args[1] = -2.0;
-  result.d = 0.0;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(args), &result);
-  EXPECT_EQ(-1.0, result.d);
-
-  args[0] = DBL_MAX;
-  args[1] = DBL_MIN;
-  result.d = 0.0;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(args), &result);
-  EXPECT_EQ(1.7976931348623157e308, result.d);
-
-  args[0] = DBL_MAX;
-  args[1] = DBL_MAX;
-  result.d = 0.0;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(args), &result);
-  EXPECT_EQ(INFINITY, result.d);
+TEST_F(JniInternalTest, NonStaticSumDoubleDoubleMethod) {
+  InvokeSumDoubleDoubleMethod(false);
 }
 
 TEST_F(JniInternalTest, StaticSumDoubleDoubleDoubleMethod) {
-  SirtRef<ClassLoader> class_loader(LoadDex("StaticLeafMethods"));
-  CompileDirectMethod(class_loader.get(), "StaticLeafMethods", "sum", "(DDD)D");
+  InvokeSumDoubleDoubleDoubleMethod(true);
+}
 
-  Class* klass = class_linker_->FindClass("LStaticLeafMethods;", class_loader.get());
-  ASSERT_TRUE(klass != NULL);
-
-  Method* method = klass->FindDirectMethod("sum", "(DDD)D");
-  ASSERT_TRUE(method != NULL);
-
-  Method::InvokeStub* stub = method->GetInvokeStub();
-
-  double args[3];
-  JValue result;
-
-  args[0] = 0.0;
-  args[1] = 0.0;
-  args[2] = 0.0;
-  result.d = -1.0;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(args), &result);
-  EXPECT_EQ(0.0, result.d);
-
-  args[0] = 1.0;
-  args[1] = 2.0;
-  args[2] = 3.0;
-  result.d = 0.0;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(args), &result);
-  EXPECT_EQ(6.0, result.d);
-
-  args[0] = 1.0;
-  args[1] = -2.0;
-  args[2] = 3.0;
-  result.d = 0.0;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(args), &result);
-  EXPECT_EQ(2.0, result.d);
+TEST_F(JniInternalTest, NonStaticSumDoubleDoubleDoubleMethod) {
+  InvokeSumDoubleDoubleDoubleMethod(false);
 }
 
 TEST_F(JniInternalTest, StaticSumDoubleDoubleDoubleDoubleMethod) {
-  SirtRef<ClassLoader> class_loader(LoadDex("StaticLeafMethods"));
-  CompileDirectMethod(class_loader.get(), "StaticLeafMethods", "sum", "(DDDD)D");
+  InvokeSumDoubleDoubleDoubleDoubleMethod(true);
+}
 
-  Class* klass = class_linker_->FindClass("LStaticLeafMethods;", class_loader.get());
-  ASSERT_TRUE(klass != NULL);
-
-  Method* method = klass->FindDirectMethod("sum", "(DDDD)D");
-  ASSERT_TRUE(method != NULL);
-
-  Method::InvokeStub* stub = method->GetInvokeStub();
-
-  double args[4];
-  JValue result;
-
-  args[0] = 0.0;
-  args[1] = 0.0;
-  args[2] = 0.0;
-  args[3] = 0.0;
-  result.d = -1.0;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(args), &result);
-  EXPECT_EQ(0.0, result.d);
-
-  args[0] = 1.0;
-  args[1] = 2.0;
-  args[2] = 3.0;
-  args[3] = 4.0;
-  result.d = 0.0;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(args), &result);
-  EXPECT_EQ(10.0, result.d);
-
-  args[0] = 1.0;
-  args[1] = -2.0;
-  args[2] = 3.0;
-  args[3] = -4.0;
-  result.d = 0.0;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(args), &result);
-  EXPECT_EQ(-2.0, result.d);
+TEST_F(JniInternalTest, NonStaticSumDoubleDoubleDoubleDoubleMethod) {
+  InvokeSumDoubleDoubleDoubleDoubleMethod(false);
 }
 
 TEST_F(JniInternalTest, StaticSumDoubleDoubleDoubleDoubleDoubleMethod) {
-  SirtRef<ClassLoader> class_loader(LoadDex("StaticLeafMethods"));
-  CompileDirectMethod(class_loader.get(), "StaticLeafMethods", "sum", "(DDDDD)D");
+  InvokeSumDoubleDoubleDoubleDoubleDoubleMethod(true);
+}
 
-  Class* klass = class_linker_->FindClass("LStaticLeafMethods;", class_loader.get());
-  ASSERT_TRUE(klass != NULL);
-
-  Method* method = klass->FindDirectMethod("sum", "(DDDDD)D");
-  ASSERT_TRUE(method != NULL);
-
-  Method::InvokeStub* stub = method->GetInvokeStub();
-
-  double args[5];
-  JValue result;
-
-  args[0] = 0.0;
-  args[1] = 0.0;
-  args[2] = 0.0;
-  args[3] = 0.0;
-  args[4] = 0.0;
-  result.d = -1.0;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(args), &result);
-  EXPECT_EQ(0.0, result.d);
-
-  args[0] = 1.0;
-  args[1] = 2.0;
-  args[2] = 3.0;
-  args[3] = 4.0;
-  args[4] = 5.0;
-  result.d = 0.0;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(args), &result);
-  EXPECT_EQ(15.0, result.d);
-
-  args[0] = 1.0;
-  args[1] = -2.0;
-  args[2] = 3.0;
-  args[3] = -4.0;
-  args[4] = 5.0;
-  result.d = 0.0;
-  (*stub)(method, NULL, Thread::Current(), reinterpret_cast<byte*>(args), &result);
-  EXPECT_EQ(3.0, result.d);
+TEST_F(JniInternalTest, NonStaticSumDoubleDoubleDoubleDoubleDoubleMethod) {
+  InvokeSumDoubleDoubleDoubleDoubleDoubleMethod(false);
 }
 
 TEST_F(JniInternalTest, Throw) {
diff --git a/src/jni_internal_x86.cc b/src/jni_internal_x86.cc
index e23c87b..361cd70 100644
--- a/src/jni_internal_x86.cc
+++ b/src/jni_internal_x86.cc
@@ -14,8 +14,6 @@
  * limitations under the License.
  */
 
-#include "jni_internal.h"
-
 #include "assembler.h"
 #include "compiled_method.h"
 #include "compiler.h"
@@ -63,16 +61,32 @@
   if (pad_size != 0) {
     __ subl(ESP, Immediate(pad_size));
   }
-  // Push/copy arguments
-  for (size_t off = num_arg_array_bytes; off > 0; off -= kPointerSize) {
-    if (off > ((is_static ? 2 : 1) * kPointerSize)) {
-      // Copy argument
-      __ pushl(Address(rArgArray, off - kPointerSize));
-    } else {
-      // Space for argument passed in register
-      __ pushl(Immediate(0));
+
+  // Push/copy arguments.
+  size_t arg_count = (shorty_len - 1);
+  size_t dst_offset = num_arg_array_bytes;
+  size_t src_offset = arg_count * sizeof(JValue);
+  for (size_t i = shorty_len - 1; i > 0; --i) {
+    switch (shorty[i]) {
+      case 'D':
+      case 'J':
+        // Move both pointers 64 bits.
+        dst_offset -= kPointerSize;
+        __ pushl(Address(rArgArray, src_offset));
+        src_offset -= sizeof(JValue);
+        dst_offset -= kPointerSize;
+        __ pushl(Address(rArgArray, src_offset));
+        src_offset -= sizeof(JValue);
+        break;
+      default:
+        // Move the source pointer sizeof(JValue) and the destination pointer 32 bits.
+        dst_offset -= kPointerSize;
+        __ pushl(Address(rArgArray, src_offset));
+        src_offset -= sizeof(JValue);
+        break;
     }
   }
+
   // Backing space for receiver
   if (!is_static) {
     __ pushl(Immediate(0));
@@ -81,16 +95,17 @@
   __ pushl(Immediate(0));
   if (!is_static) {
     if (num_arg_array_bytes >= static_cast<size_t>(kPointerSize)) {
-      // Receiver already in EDX, pass 1st arg in ECX
+      // Receiver already in EDX, pass 1st arg in ECX.
       __ movl(ECX, Address(rArgArray, 0));
     }
   } else {
     if (num_arg_array_bytes >= static_cast<size_t>(kPointerSize)) {
-      // Pass 1st arg in EDX
+      // Pass 1st arg in EDX.
       __ movl(EDX, Address(rArgArray, 0));
       if (num_arg_array_bytes >= static_cast<size_t>(2* kPointerSize)) {
-        // Pass 2nd arg in ECX
-        __ movl(ECX, Address(rArgArray, kPointerSize));
+        // Pass 2nd arg (or second 32-bit chunk of a wide 1st arg) in ECX.
+        bool is_wide = (shorty[1] == 'D' || shorty[1] == 'J');
+        __ movl(ECX, Address(rArgArray, is_wide ? kPointerSize : 2 * kPointerSize));
       }
     }
   }
diff --git a/src/oatdump.cc b/src/oatdump.cc
index 96af3b0..9641390 100644
--- a/src/oatdump.cc
+++ b/src/oatdump.cc
@@ -281,8 +281,6 @@
     std::string signature(dex_file.GetMethodSignature(method_id));
     os << StringPrintf("\t%d: %s %s (dex_method_idx=%d)\n",
                        class_method_index, name, signature.c_str(), dex_method_idx);
-    os << StringPrintf("\t\tcode: %p (offset=0x%08x)\n",
-                       oat_method.GetCode(), oat_method.GetCodeOffset());
     os << StringPrintf("\t\tframe_size_in_bytes: %zd\n",
                        oat_method.GetFrameSizeInBytes());
     os << StringPrintf("\t\tcore_spill_mask: 0x%08x",
@@ -301,11 +299,17 @@
     os << StringPrintf("\t\tgc_map: %p (offset=0x%08x)\n",
                        oat_method.GetGcMap(), oat_method.GetGcMapOffset());
     DumpGcMap(os, oat_method.GetGcMap());
-    os << StringPrintf("\t\tinvoke_stub: %p (offset=0x%08x)\n",
-                       oat_method.GetInvokeStub(), oat_method.GetInvokeStubOffset());
-    os << "\t\tCODE: (size=" << oat_method.GetCodeSize() << ")\n";
+    os << StringPrintf("\t\tCODE: %p (offset=0x%08x size=%d)%s\n",
+                       oat_method.GetCode(),
+                       oat_method.GetCodeOffset(),
+                       oat_method.GetCodeSize(),
+                       oat_method.GetCode() != NULL ? "..." : "");
     DumpCode(os, oat_method.GetCode(), oat_method.GetMappingTable(), dex_file, code_item);
-    os << "\t\tINVOKE STUB: (size=" << oat_method.GetInvokeStubSize() << ")\n";
+    os << StringPrintf("\t\tINVOKE STUB: %p (offset=0x%08x size=%d)%s\n",
+                       oat_method.GetInvokeStub(),
+                       oat_method.GetInvokeStubOffset(),
+                       oat_method.GetInvokeStubSize(),
+                       oat_method.GetInvokeStub() != NULL ? "..." : "");
     DumpCode(os, reinterpret_cast<const void*>(oat_method.GetInvokeStub()), NULL, dex_file, NULL);
   }
 
diff --git a/src/object.cc b/src/object.cc
index 2e98f4f..59fa28b 100644
--- a/src/object.cc
+++ b/src/object.cc
@@ -556,7 +556,7 @@
   return DexFile::kDexNoIndex;
 }
 
-void Method::Invoke(Thread* self, Object* receiver, byte* args, JValue* result) const {
+void Method::Invoke(Thread* self, Object* receiver, JValue* args, JValue* result) const {
   // Push a transition back into managed code onto the linked list in thread.
   CHECK_EQ(Thread::kRunnable, self->GetState());
   NativeToManagedRecord record;
diff --git a/src/object.h b/src/object.h
index 3a7efa2..ae435c6 100644
--- a/src/object.h
+++ b/src/object.h
@@ -490,7 +490,7 @@
   typedef void InvokeStub(const Method* method,
                           Object* obj,
                           Thread* thread,
-                          byte* args,
+                          JValue* args,
                           JValue* result);
 
   Class* GetDeclaringClass() const;
@@ -623,7 +623,7 @@
   // Find the method that this method overrides
   Method* FindOverriddenMethod() const;
 
-  void Invoke(Thread* self, Object* receiver, byte* args, JValue* result) const;
+  void Invoke(Thread* self, Object* receiver, JValue* args, JValue* result) const;
 
   const void* GetCode() const {
     return GetFieldPtr<const void*>(OFFSET_OF_OBJECT_MEMBER(Method, code_), false);
diff --git a/src/reflection.cc b/src/reflection.cc
index c317be0..6aa239e 100644
--- a/src/reflection.cc
+++ b/src/reflection.cc
@@ -235,40 +235,30 @@
   }
 
   Method* m = NULL;
-  UniquePtr<byte[]> args(new byte[8]);
-  memset(&args[0], 0, 8);
   switch (src_class) {
   case Primitive::kPrimBoolean:
     m = gBoolean_valueOf;
-    *reinterpret_cast<uint32_t*>(&args[0]) = value.z;
     break;
   case Primitive::kPrimByte:
     m = gByte_valueOf;
-    *reinterpret_cast<uint32_t*>(&args[0]) = value.b;
     break;
   case Primitive::kPrimChar:
     m = gCharacter_valueOf;
-    *reinterpret_cast<uint32_t*>(&args[0]) = value.c;
     break;
   case Primitive::kPrimDouble:
     m = gDouble_valueOf;
-    *reinterpret_cast<double*>(&args[0]) = value.d;
     break;
   case Primitive::kPrimFloat:
     m = gFloat_valueOf;
-    *reinterpret_cast<float*>(&args[0]) = value.f;
     break;
   case Primitive::kPrimInt:
     m = gInteger_valueOf;
-    *reinterpret_cast<uint32_t*>(&args[0]) = value.i;
     break;
   case Primitive::kPrimLong:
     m = gLong_valueOf;
-    *reinterpret_cast<uint64_t*>(&args[0]) = value.j;
     break;
   case Primitive::kPrimShort:
     m = gShort_valueOf;
-    *reinterpret_cast<uint32_t*>(&args[0]) = value.s;
     break;
   case Primitive::kPrimVoid:
     // There's no such thing as a void field, and void methods invoked via reflection return null.
@@ -280,7 +270,9 @@
 
   Thread* self = Thread::Current();
   ScopedThreadStateChange tsc(self, Thread::kRunnable);
-  m->Invoke(self, NULL, args.get(), &value);
+  JValue args[1];
+  args[0].j = 0;
+  m->Invoke(self, NULL, args, &value);
 }
 
 bool UnboxPrimitive(JNIEnv* env, Object* o, Class* dst_class, JValue& unboxed_value, const char* what) {
diff --git a/src/thread.cc b/src/thread.cc
index 629cb0f..58ef1fe 100644
--- a/src/thread.cc
+++ b/src/thread.cc
@@ -949,10 +949,10 @@
 
   // Call the handler.
   Method* m = handler->GetClass()->FindVirtualMethodForVirtualOrInterface(gUncaughtExceptionHandler_uncaughtException);
-  Object* args[2];
-  args[0] = peer_;
-  args[1] = exception;
-  m->Invoke(this, handler, reinterpret_cast<byte*>(&args), NULL);
+  JValue args[2];
+  args[0].l = peer_;
+  args[1].l = exception;
+  m->Invoke(this, handler, args, NULL);
 
   // If the handler threw, clear that exception too.
   ClearException();
@@ -968,8 +968,9 @@
   Object* group = GetThreadGroup();
   if (group != NULL) {
     Method* m = group->GetClass()->FindVirtualMethodForVirtualOrInterface(gThreadGroup_removeThread);
-    Object* args = peer_;
-    m->Invoke(this, group, reinterpret_cast<byte*>(&args), NULL);
+    JValue args[1];
+    args[0].l = peer_;
+    m->Invoke(this, group, args, NULL);
   }
 }
 
diff --git a/test/NonStaticLeafMethods/NonStaticLeafMethods.java b/test/NonStaticLeafMethods/NonStaticLeafMethods.java
new file mode 100644
index 0000000..28e03c6
--- /dev/null
+++ b/test/NonStaticLeafMethods/NonStaticLeafMethods.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class NonStaticLeafMethods {
+    NonStaticLeafMethods() {
+    }
+    void nop() {
+    }
+    byte identity(byte x) {
+        return x;
+    }
+    int identity(int x) {
+        return x;
+    }
+    int sum(int a, int b) {
+        return a + b;
+    }
+    int sum(int a, int b, int c) {
+        return a + b + c;
+    }
+    int sum(int a, int b, int c, int d) {
+        return a + b + c + d;
+    }
+    int sum(int a, int b, int c, int d, int e) {
+        return a + b + c + d + e;
+    }
+    double identity(double x) {
+        return x;
+    }
+    double sum(double a, double b) {
+        return a + b;
+    }
+    double sum(double a, double b, double c) {
+        return a + b + c;
+    }
+    double sum(double a, double b, double c, double d) {
+        return a + b + c + d;
+    }
+    double sum(double a, double b, double c, double d, double e) {
+        return a + b + c + d + e;
+    }
+}
diff --git a/test/ReflectionTest/ReflectionTest.java b/test/ReflectionTest/ReflectionTest.java
index 94c02aa..a9dc6e7 100644
--- a/test/ReflectionTest/ReflectionTest.java
+++ b/test/ReflectionTest/ReflectionTest.java
@@ -119,8 +119,8 @@
     try {
       f = ReflectionTest.class.getDeclaredField("s");
       f.set(null, Integer.valueOf(14));
-    } catch (Exception ex) {
-      ex.printStackTrace();
+    } catch (IllegalArgumentException expected) {
+      expected.printStackTrace();
     }
 
     f = ReflectionTest.class.getDeclaredField("z");
@@ -209,8 +209,8 @@
       System.out.println(Arrays.toString(m.getParameterTypes()));
       show(m.invoke(null));
       System.out.println("************* should have thrown!");
-    } catch (Exception ex) {
-      ex.printStackTrace();
+    } catch (Exception expected) {
+      expected.printStackTrace();
     }
   }
 
