Interpreter tweaks

Increase the amount of inlining and hot function hints in the interpreter
to encourage the hot Execute function to be faster.
Performance is 3x slower than Dalvik+JIT on FibonacciFast and similar
microbenchmarks.

Change-Id: I2b1a0c7545f86036b9b1b5ccac881d06292356d8
diff --git a/src/interpreter/interpreter.cc b/src/interpreter/interpreter.cc
index 33bdf9f..e315710 100644
--- a/src/interpreter/interpreter.cc
+++ b/src/interpreter/interpreter.cc
@@ -385,8 +385,7 @@
 
 static void DoInvoke(Thread* self, MethodHelper& mh, ShadowFrame& shadow_frame,
                      const Instruction* inst, InvokeType type, bool is_range,
-                     JValue* result)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+                     JValue* result) {
   uint32_t vregC = (is_range) ? inst->VRegC_3rc() : inst->VRegC_35c();
   Object* receiver;
   if (type == kStatic) {
@@ -474,7 +473,11 @@
 static void DoFieldGet(Thread* self, ShadowFrame& shadow_frame,
                        const Instruction* inst, FindFieldType find_type,
                        Primitive::Type field_type)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE;
+
+static inline void DoFieldGet(Thread* self, ShadowFrame& shadow_frame,
+                              const Instruction* inst, FindFieldType find_type,
+                              Primitive::Type field_type) {
   bool is_static = (find_type == StaticObjectRead) || (find_type == StaticPrimitiveRead);
   uint32_t field_idx = is_static ? inst->VRegB_21c() : inst->VRegC_22c();
   Field* f = FindFieldFromCode(field_idx, shadow_frame.GetMethod(), self,
@@ -524,7 +527,11 @@
 static void DoFieldPut(Thread* self, ShadowFrame& shadow_frame,
                        const Instruction* inst, FindFieldType find_type,
                        Primitive::Type field_type)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE;
+
+static inline void DoFieldPut(Thread* self, ShadowFrame& shadow_frame,
+                              const Instruction* inst, FindFieldType find_type,
+                              Primitive::Type field_type) {
   bool is_static = (find_type == StaticObjectWrite) || (find_type == StaticPrimitiveWrite);
   uint32_t field_idx = is_static ? inst->VRegB_21c() : inst->VRegC_22c();
   Field* f = FindFieldFromCode(field_idx, shadow_frame.GetMethod(), self,
@@ -572,7 +579,7 @@
   }
 }
 
-static String* ResolveString(Thread* self, MethodHelper& mh, uint32_t string_idx) {
+static inline String* ResolveString(Thread* self, MethodHelper& mh, uint32_t string_idx) {
   Class* java_lang_string_class = String::GetJavaLangString();
   if (UNLIKELY(!java_lang_string_class->IsInitialized())) {
     ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
@@ -585,7 +592,7 @@
   return mh.ResolveString(string_idx);
 }
 
-static void DoIntDivide(Thread* self, ShadowFrame& shadow_frame, size_t result_reg,
+static inline void DoIntDivide(Thread* self, ShadowFrame& shadow_frame, size_t result_reg,
     int32_t dividend, int32_t divisor) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   if (UNLIKELY(divisor == 0)) {
     ThrowArithmeticExceptionDivideByZero(self);
@@ -596,7 +603,7 @@
   }
 }
 
-static void DoIntRemainder(Thread* self, ShadowFrame& shadow_frame, size_t result_reg,
+static inline void DoIntRemainder(Thread* self, ShadowFrame& shadow_frame, size_t result_reg,
     int32_t dividend, int32_t divisor) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   if (UNLIKELY(divisor == 0)) {
     ThrowArithmeticExceptionDivideByZero(self);
@@ -607,7 +614,7 @@
   }
 }
 
-static void DoLongDivide(Thread* self, ShadowFrame& shadow_frame, size_t result_reg,
+static inline void DoLongDivide(Thread* self, ShadowFrame& shadow_frame, size_t result_reg,
     int64_t dividend, int64_t divisor) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   if (UNLIKELY(divisor == 0)) {
     ThrowArithmeticExceptionDivideByZero(self);
@@ -618,7 +625,7 @@
   }
 }
 
-static void DoLongRemainder(Thread* self, ShadowFrame& shadow_frame, size_t result_reg,
+static inline void DoLongRemainder(Thread* self, ShadowFrame& shadow_frame, size_t result_reg,
     int64_t dividend, int64_t divisor) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   if (UNLIKELY(divisor == 0)) {
     ThrowArithmeticExceptionDivideByZero(self);
@@ -629,12 +636,20 @@
   }
 }
 
-static const Instruction* FindNextInstructionFollowingException(Thread* self,
-                                                                ShadowFrame& shadow_frame,
-                                                                uint32_t dex_pc,
-                                                                const uint16_t* const insns,
-                                                                SirtRef<Object>& this_object_ref,
-                                                                instrumentation::Instrumentation* instrumentation) {
+static inline const Instruction* FindNextInstructionFollowingException(Thread* self,
+                                                                       ShadowFrame& shadow_frame,
+                                                                       uint32_t dex_pc,
+                                                                       const uint16_t* insns,
+                                                                       SirtRef<Object>& this_object_ref,
+                                                                       instrumentation::Instrumentation* instrumentation)
+    ALWAYS_INLINE;
+
+static inline const Instruction* FindNextInstructionFollowingException(Thread* self,
+                                                                       ShadowFrame& shadow_frame,
+                                                                       uint32_t dex_pc,
+                                                                       const uint16_t* insns,
+                                                                       SirtRef<Object>& this_object_ref,
+                                                                       instrumentation::Instrumentation* instrumentation) {
   self->VerifyStack();
   ThrowLocation throw_location;
   mirror::Throwable* exception = self->GetException(&throw_location);
@@ -670,9 +685,20 @@
     inst = inst-> next_function (); \
   }
 
+static void UnexpectedOpcode(const Instruction* inst, MethodHelper& mh)
+  __attribute__ ((cold, noreturn, noinline));
+
+static void UnexpectedOpcode(const Instruction* inst, MethodHelper& mh) {
+  LOG(FATAL) << "Unexpected instruction: " << inst->DumpString(&mh.GetDexFile());
+  exit(0);  // Unreachable, keep GCC happy.
+}
+
 static JValue Execute(Thread* self, MethodHelper& mh, const DexFile::CodeItem* code_item,
                       ShadowFrame& shadow_frame, JValue result_register)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) __attribute__ ((hot));
+
+static JValue Execute(Thread* self, MethodHelper& mh, const DexFile::CodeItem* code_item,
+                      ShadowFrame& shadow_frame, JValue result_register) {
   if (UNLIKELY(!shadow_frame.HasReferenceArray())) {
     LOG(FATAL) << "Invalid shadow frame for interpreter use";
     return JValue();
@@ -692,7 +718,7 @@
                                       shadow_frame.GetMethod(), 0);
   }
   while (true) {
-    if (self->TestAllFlags()) {
+    if (UNLIKELY(self->TestAllFlags())) {
       CheckSuspend(self);
     }
     const uint32_t dex_pc = inst->GetDexPc(insns);
@@ -1180,12 +1206,10 @@
         float val1 = shadow_frame.GetVRegFloat(inst->VRegB_23x());
         float val2 = shadow_frame.GetVRegFloat(inst->VRegC_23x());
         int32_t result;
-        // TODO: we should not test float equality like this. Reorder comparisons
-        // or use a different comparison mechanism.
-        if (val1 == val2) {
-          result = 0;
-        } else if (val1 > val2) {
+        if (val1 > val2) {
           result = 1;
+        } else if (val1 == val2) {
+          result = 0;
         } else {
           result = -1;
         }
@@ -1197,12 +1221,10 @@
         float val1 = shadow_frame.GetVRegFloat(inst->VRegB_23x());
         float val2 = shadow_frame.GetVRegFloat(inst->VRegC_23x());
         int32_t result;
-        // TODO: we should not test float equality like this. Reorder comparisons
-        // or use a different comparison mechanism.
-        if (val1 == val2) {
-          result = 0;
-        } else if (val1 < val2) {
+        if (val1 < val2) {
           result = -1;
+        } else if (val1 == val2) {
+          result = 0;
         } else {
           result = 1;
         }
@@ -1214,12 +1236,10 @@
         double val1 = shadow_frame.GetVRegDouble(inst->VRegB_23x());
         double val2 = shadow_frame.GetVRegDouble(inst->VRegC_23x());
         int32_t result;
-        // TODO: we should not test double equality like this. Reorder comparisons
-        // or use a different comparison mechanism.
-        if (val1 == val2) {
-          result = 0;
-        } else if (val1 > val2) {
+        if (val1 > val2) {
           result = 1;
+        } else if (val1 == val2) {
+          result = 0;
         } else {
           result = -1;
         }
@@ -1232,12 +1252,10 @@
         double val1 = shadow_frame.GetVRegDouble(inst->VRegB_23x());
         double val2 = shadow_frame.GetVRegDouble(inst->VRegC_23x());
         int32_t result;
-        // TODO: we should not test double equality like this. Reorder comparisons
-        // or use a different comparison mechanism.
-        if (val1 == val2) {
-          result = 0;
-        } else if (val1 < val2) {
+        if (val1 < val2) {
           result = -1;
+        } else if (val1 == val2) {
+          result = 0;
         } else {
           result = 1;
         }
@@ -2433,9 +2451,12 @@
                              (inst->VRegC_22b() & 0x1f));
         inst = inst->Next_2xx();
         break;
-      default:
-        LOG(FATAL) << "Unexpected instruction: " << inst->DumpString(&mh.GetDexFile());
-        break;
+      case Instruction::UNUSED_3E ... Instruction::UNUSED_43:
+      case Instruction::UNUSED_E3 ... Instruction::UNUSED_FF:
+      case Instruction::UNUSED_73:
+      case Instruction::UNUSED_79:
+      case Instruction::UNUSED_7A:
+	UnexpectedOpcode(inst, mh);
     }
   }
 }
diff --git a/src/mirror/class-inl.h b/src/mirror/class-inl.h
index 62740be..6819fb2 100644
--- a/src/mirror/class-inl.h
+++ b/src/mirror/class-inl.h
@@ -189,7 +189,7 @@
     // src's super should be java_lang_Object, since it is an array.
     Class* java_lang_Object = src->GetSuperClass();
     DCHECK(java_lang_Object != NULL) << PrettyClass(src);
-     DCHECK(java_lang_Object->GetSuperClass() == NULL) << PrettyClass(src);
+    DCHECK(java_lang_Object->GetSuperClass() == NULL) << PrettyClass(src);
     return this == java_lang_Object;
   }
   return IsArrayAssignableFromArray(src);
diff --git a/src/mirror/class.h b/src/mirror/class.h
index dfbe815..0661b42 100644
--- a/src/mirror/class.h
+++ b/src/mirror/class.h
@@ -429,8 +429,7 @@
   // downcast would be necessary. Similarly for interfaces, a class that implements (or an interface
   // that extends) another can be assigned to its parent, but not vice-versa. All Classes may assign
   // to themselves. Classes for primitive types may not assign to each other.
-  bool IsAssignableFrom(const Class* src) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  inline bool IsAssignableFrom(const Class* src) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(src != NULL);
     if (this == src) {
       // Can always assign to things of the same type.
diff --git a/src/oat/runtime/support_dexcache.cc b/src/oat/runtime/support_dexcache.cc
index 6811d20..3e8ebc6 100644
--- a/src/oat/runtime/support_dexcache.cc
+++ b/src/oat/runtime/support_dexcache.cc
@@ -15,7 +15,9 @@
  */
 
 #include "callee_save_frame.h"
+#include "gc/card_table-inl.h"
 #include "class_linker-inl.h"
+#include "dex_file-inl.h"
 #include "mirror/abstract_method-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
diff --git a/src/runtime_support.cc b/src/runtime_support.cc
index b096431..9242c87 100644
--- a/src/runtime_support.cc
+++ b/src/runtime_support.cc
@@ -333,39 +333,6 @@
   }
 }
 
-mirror::Class* ResolveVerifyAndClinit(uint32_t type_idx, const mirror::AbstractMethod* referrer,
-                                      Thread* self, bool can_run_clinit, bool verify_access) {
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  mirror::Class* klass = class_linker->ResolveType(type_idx, referrer);
-  if (UNLIKELY(klass == NULL)) {
-    CHECK(self->IsExceptionPending());
-    return NULL;  // Failure - Indicate to caller to deliver exception
-  }
-  // Perform access check if necessary.
-  mirror::Class* referring_class = referrer->GetDeclaringClass();
-  if (verify_access && UNLIKELY(!referring_class->CanAccess(klass))) {
-    ThrowIllegalAccessErrorClass(referring_class, klass);
-    return NULL;  // Failure - Indicate to caller to deliver exception
-  }
-  // If we're just implementing const-class, we shouldn't call <clinit>.
-  if (!can_run_clinit) {
-    return klass;
-  }
-  // If we are the <clinit> of this class, just return our storage.
-  //
-  // Do not set the DexCache InitializedStaticStorage, since that implies <clinit> has finished
-  // running.
-  if (klass == referring_class && MethodHelper(referrer).IsClassInitializer()) {
-    return klass;
-  }
-  if (!class_linker->EnsureInitialized(klass, true, true)) {
-    CHECK(self->IsExceptionPending());
-    return NULL;  // Failure - Indicate to caller to deliver exception
-  }
-  referrer->GetDexCacheInitializedStaticStorage()->Set(type_idx, klass);
-  return klass;
-}
-
 void ThrowStackOverflowError(Thread* self) {
   CHECK(!self->IsHandlingStackOverflow()) << "Recursive stack overflow.";
 
diff --git a/src/runtime_support.h b/src/runtime_support.h
index 1c39214..5fc8da5 100644
--- a/src/runtime_support.h
+++ b/src/runtime_support.h
@@ -248,10 +248,41 @@
                                                   Thread* self, bool access_check, InvokeType type)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-extern mirror::Class* ResolveVerifyAndClinit(uint32_t type_idx,
-                                             const mirror::AbstractMethod* referrer, Thread* self,
-                                             bool can_run_clinit, bool verify_access)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+static inline mirror::Class* ResolveVerifyAndClinit(uint32_t type_idx,
+                                                    const mirror::AbstractMethod* referrer,
+                                                    Thread* self, bool can_run_clinit,
+                                                    bool verify_access)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  mirror::Class* klass = class_linker->ResolveType(type_idx, referrer);
+  if (UNLIKELY(klass == NULL)) {
+    CHECK(self->IsExceptionPending());
+    return NULL;  // Failure - Indicate to caller to deliver exception
+  }
+  // Perform access check if necessary.
+  mirror::Class* referring_class = referrer->GetDeclaringClass();
+  if (verify_access && UNLIKELY(!referring_class->CanAccess(klass))) {
+    ThrowIllegalAccessErrorClass(referring_class, klass);
+    return NULL;  // Failure - Indicate to caller to deliver exception
+  }
+  // If we're just implementing const-class, we shouldn't call <clinit>.
+  if (!can_run_clinit) {
+    return klass;
+  }
+  // If we are the <clinit> of this class, just return our storage.
+  //
+  // Do not set the DexCache InitializedStaticStorage, since that implies <clinit> has finished
+  // running.
+  if (klass == referring_class && MethodHelper(referrer).IsClassInitializer()) {
+    return klass;
+  }
+  if (!class_linker->EnsureInitialized(klass, true, true)) {
+    CHECK(self->IsExceptionPending());
+    return NULL;  // Failure - Indicate to caller to deliver exception
+  }
+  referrer->GetDexCacheInitializedStaticStorage()->Set(type_idx, klass);
+  return klass;
+}
 
 extern void ThrowStackOverflowError(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);