Performance improvements by removing a DMB and inlining.

Correct the version of CAS used by Mutex::Lock to be acquire and not release.
Don't do a memory barrier in thread transitions when there is already a
barrier associated with the mutator lock.
Force inlining of the hot thread and shared lock code, heavily used by down
calls and JNI.
Force inlining of mirror routines that are used by runtime support and hot.

Performance was measured and improved using perf and maps.

Change-Id: I012580e337143236d8b6d06c1e270183ae51083c
diff --git a/src/base/mutex-inl.h b/src/base/mutex-inl.h
new file mode 100644
index 0000000..03ec6f8
--- /dev/null
+++ b/src/base/mutex-inl.h
@@ -0,0 +1,149 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_SRC_BASE_MUTEX_INL_H_
+#define ART_SRC_BASE_MUTEX_INL_H_
+
+#include "mutex.h"
+
+#include "cutils/atomic-inline.h"
+#include "runtime.h"
+#include "thread.h"
+
+namespace art {
+
+#define CHECK_MUTEX_CALL(call, args) CHECK_PTHREAD_CALL(call, args, name_)
+
+#if ART_USE_FUTEXES
+#include "linux/futex.h"
+#include "sys/syscall.h"
+#ifndef SYS_futex
+#define SYS_futex __NR_futex
+#endif
+static inline int futex(volatile int *uaddr, int op, int val, const struct timespec *timeout, volatile int *uaddr2, int val3) {
+  return syscall(SYS_futex, uaddr, op, val, timeout, uaddr2, val3);
+}
+#endif  // ART_USE_FUTEXES
+
+class ScopedContentionRecorder {
+ public:
+  ScopedContentionRecorder(BaseMutex* mutex, uint64_t blocked_tid, uint64_t owner_tid) :
+      mutex_(mutex), blocked_tid_(blocked_tid), owner_tid_(owner_tid),
+      start_milli_time_(MilliTime()) {
+  }
+
+  ~ScopedContentionRecorder() {
+    uint64_t end_milli_time = MilliTime();
+    mutex_->RecordContention(blocked_tid_, owner_tid_, end_milli_time - start_milli_time_);
+  }
+
+ private:
+  BaseMutex* const mutex_;
+  uint64_t blocked_tid_;
+  uint64_t owner_tid_;
+  const uint64_t start_milli_time_;
+};
+
+static inline uint64_t SafeGetTid(const Thread* self) {
+  if (self != NULL) {
+    return static_cast<uint64_t>(self->GetTid());
+  } else {
+    return static_cast<uint64_t>(GetTid());
+  }
+}
+
+static inline void CheckUnattachedThread(LockLevel level) NO_THREAD_SAFETY_ANALYSIS {
+  // The check below enumerates the cases where we expect not to be able to sanity check locks
+  // on a thread. Lock checking is disabled to avoid deadlock when checking shutdown lock.
+  // TODO: tighten this check.
+  if (kDebugLocking) {
+    Runtime* runtime = Runtime::Current();
+    CHECK(runtime == NULL || !runtime->IsStarted() || runtime->IsShuttingDown() ||
+          level == kDefaultMutexLevel  || level == kRuntimeShutdownLock ||
+          level == kThreadListLock || level == kLoggingLock || level == kAbortLock);
+  }
+}
+
+inline void BaseMutex::RegisterAsUnlocked(Thread* self) {
+  if (UNLIKELY(self == NULL)) {
+    CheckUnattachedThread(level_);
+    return;
+  }
+  if (level_ != kMonitorLock) {
+    if (kDebugLocking && !gAborting) {
+      CHECK(self->GetHeldMutex(level_) == this) << "Unlocking on unacquired mutex: " << name_;
+    }
+    self->SetHeldMutex(level_, NULL);
+  }
+}
+
+inline void ReaderWriterMutex::SharedLock(Thread* self) {
+  DCHECK(self == NULL || self == Thread::Current());
+#if ART_USE_FUTEXES
+  bool done = false;
+  do {
+    int32_t cur_state = state_;
+    if (cur_state >= 0) {
+      // Add as an extra reader.
+      done = android_atomic_acquire_cas(cur_state, cur_state + 1, &state_) == 0;
+    } else {
+      // Owner holds it exclusively, hang up.
+      ScopedContentionRecorder scr(this, GetExclusiveOwnerTid(), SafeGetTid(self));
+      android_atomic_inc(&num_pending_readers_);
+      if (futex(&state_, FUTEX_WAIT, cur_state, NULL, NULL, 0) != 0) {
+        if (errno != EAGAIN) {
+          PLOG(FATAL) << "futex wait failed for " << name_;
+        }
+      }
+      android_atomic_dec(&num_pending_readers_);
+    }
+  } while(!done);
+#else
+  CHECK_MUTEX_CALL(pthread_rwlock_rdlock, (&rwlock_));
+#endif
+  RegisterAsLocked(self);
+  AssertSharedHeld(self);
+}
+
+inline void ReaderWriterMutex::SharedUnlock(Thread* self) {
+  DCHECK(self == NULL || self == Thread::Current());
+  AssertSharedHeld(self);
+  RegisterAsUnlocked(self);
+#if ART_USE_FUTEXES
+  bool done = false;
+  do {
+    int32_t cur_state = state_;
+    if (LIKELY(cur_state > 0)) {
+      // Reduce state by 1.
+      done = android_atomic_release_cas(cur_state, cur_state - 1, &state_) == 0;
+      if (done && (cur_state - 1) == 0) { // cas may fail due to noise?
+        if (num_pending_writers_ > 0 || num_pending_readers_ > 0) {
+          // Wake any exclusive waiters as there are now no readers.
+          futex(&state_, FUTEX_WAKE, -1, NULL, NULL, 0);
+        }
+      }
+    } else {
+      LOG(FATAL) << "Unexpected state_:" << cur_state << " for " << name_;
+    }
+  } while(!done);
+#else
+  CHECK_MUTEX_CALL(pthread_rwlock_unlock, (&rwlock_));
+#endif
+}
+
+}  // namespace art
+
+#endif  // ART_SRC_BASE_MUTEX_INL_H_
diff --git a/src/base/mutex.cc b/src/base/mutex.cc
index d09a6a2..fa7a617 100644
--- a/src/base/mutex.cc
+++ b/src/base/mutex.cc
@@ -21,28 +21,13 @@
 
 #include "base/logging.h"
 #include "cutils/atomic.h"
+#include "cutils/atomic-inline.h"
+#include "mutex-inl.h"
 #include "runtime.h"
 #include "scoped_thread_state_change.h"
 #include "thread.h"
 #include "utils.h"
 
-#define CHECK_MUTEX_CALL(call, args) CHECK_PTHREAD_CALL(call, args, name_)
-
-extern int pthread_mutex_lock(pthread_mutex_t* mutex) EXCLUSIVE_LOCK_FUNCTION(mutex);
-extern int pthread_mutex_unlock(pthread_mutex_t* mutex) UNLOCK_FUNCTION(1);
-extern int pthread_mutex_trylock(pthread_mutex_t* mutex) EXCLUSIVE_TRYLOCK_FUNCTION(0, mutex);
-
-#if ART_USE_FUTEXES
-#include "linux/futex.h"
-#include "sys/syscall.h"
-#ifndef SYS_futex
-#define SYS_futex __NR_futex
-#endif
-int futex(volatile int *uaddr, int op, int val, const struct timespec *timeout, volatile int *uaddr2, int val3) {
-  return syscall(SYS_futex, uaddr, op, val, timeout, uaddr2, val3);
-}
-#endif  // ART_USE_FUTEXES
-
 namespace art {
 
 // This works on Mac OS 10.6 but hasn't been tested on older releases.
@@ -85,14 +70,6 @@
   // ...other stuff we don't care about.
 };
 
-static uint64_t SafeGetTid(const Thread* self) {
-  if (self != NULL) {
-    return static_cast<uint64_t>(self->GetTid());
-  } else {
-    return static_cast<uint64_t>(GetTid());
-  }
-}
-
 #if ART_USE_FUTEXES
 static bool ComputeRelativeTimeSpec(timespec* result_ts, const timespec& lhs, const timespec& rhs) {
   const long int one_sec = 1000 * 1000 * 1000;  // one second in nanoseconds.
@@ -164,18 +141,6 @@
 #endif
 }
 
-static void CheckUnattachedThread(LockLevel level) NO_THREAD_SAFETY_ANALYSIS {
-  // The check below enumerates the cases where we expect not to be able to sanity check locks
-  // on a thread. Lock checking is disabled to avoid deadlock when checking shutdown lock.
-  // TODO: tighten this check.
-  if (kDebugLocking) {
-    Runtime* runtime = Runtime::Current();
-    CHECK(runtime == NULL || !runtime->IsStarted() || runtime->IsShuttingDown() ||
-          level == kDefaultMutexLevel  || level == kRuntimeShutdownLock ||
-          level == kThreadListLock || level == kLoggingLock || level == kAbortLock);
-  }
-}
-
 void BaseMutex::RegisterAsLocked(Thread* self) {
   if (UNLIKELY(self == NULL)) {
     CheckUnattachedThread(level_);
@@ -204,19 +169,6 @@
   }
 }
 
-void BaseMutex::RegisterAsUnlocked(Thread* self) {
-  if (UNLIKELY(self == NULL)) {
-    CheckUnattachedThread(level_);
-    return;
-  }
-  if (level_ != kMonitorLock) {
-    if (kDebugLocking && !gAborting) {
-      CHECK(self->GetHeldMutex(level_) == this) << "Unlocking on unacquired mutex: " << name_;
-    }
-    self->SetHeldMutex(level_, NULL);
-  }
-}
-
 void BaseMutex::CheckSafeToWait(Thread* self) {
   if (self == NULL) {
     CheckUnattachedThread(level_);
@@ -262,25 +214,6 @@
 #endif
 }
 
-class ScopedContentionRecorder {
- public:
-  ScopedContentionRecorder(BaseMutex* mutex, uint64_t blocked_tid, uint64_t owner_tid) :
-      mutex_(mutex), blocked_tid_(blocked_tid), owner_tid_(owner_tid),
-      start_milli_time_(MilliTime()) {
-  }
-
-  ~ScopedContentionRecorder() {
-    uint64_t end_milli_time = MilliTime();
-    mutex_->RecordContention(blocked_tid_, owner_tid_, end_milli_time - start_milli_time_);
-  }
-
- private:
-  BaseMutex* const mutex_;
-  uint64_t blocked_tid_;
-  uint64_t owner_tid_;
-  const uint64_t start_milli_time_;
-};
-
 void BaseMutex::DumpContention(std::ostream& os) const {
 #if CONTENTION_LOGGING
   uint32_t wait_time = wait_time_;
@@ -395,7 +328,7 @@
       int32_t cur_state = state_;
       if (cur_state == 0) {
         // Change state from 0 to 1.
-        done = android_atomic_cmpxchg(0, 1, &state_) == 0;
+        done = android_atomic_acquire_cas(0, 1, &state_) == 0;
       } else {
         // Failed to acquire, hang up.
         ScopedContentionRecorder scr(this, GetExclusiveOwnerTid(), SafeGetTid(self));
@@ -435,7 +368,7 @@
       int32_t cur_state = state_;
       if (cur_state == 0) {
         // Change state from 0 to 1.
-        done = android_atomic_cmpxchg(0, 1, &state_) == 0;
+        done = android_atomic_acquire_cas(0, 1, &state_) == 0;
       } else {
         return false;
       }
@@ -481,7 +414,7 @@
       // We're no longer the owner.
       exclusive_owner_ = 0;
       // Change state to 0.
-      done = android_atomic_cmpxchg(cur_state, 0, &state_) == 0;
+      done = android_atomic_release_cas(cur_state, 0, &state_) == 0;
       if (done) { // Spurious fail?
         // Wake a contender
         if (num_contenders_ > 0) {
@@ -588,7 +521,7 @@
     int32_t cur_state = state_;
     if (cur_state == 0) {
       // Change state from 0 to -1.
-      done = android_atomic_cmpxchg(0, -1, &state_) == 0;
+      done = android_atomic_acquire_cas(0, -1, &state_) == 0;
     } else {
       // Failed to acquire, hang up.
       ScopedContentionRecorder scr(this, GetExclusiveOwnerTid(), SafeGetTid(self));
@@ -622,7 +555,7 @@
       // We're no longer the owner.
       exclusive_owner_ = 0;
       // Change state from -1 to 0.
-      done = android_atomic_cmpxchg(-1, 0, &state_) == 0;
+      done = android_atomic_release_cas(-1, 0, &state_) == 0;
       if (done) { // cmpxchg may fail due to noise?
         // Wake any waiters.
         if (num_pending_readers_ > 0 || num_pending_writers_ > 0) {
@@ -649,7 +582,7 @@
     int32_t cur_state = state_;
     if (cur_state == 0) {
       // Change state from 0 to -1.
-      done = android_atomic_cmpxchg(0, -1, &state_) == 0;
+      done = android_atomic_acquire_cas(0, -1, &state_) == 0;
     } else {
       // Failed to acquire, hang up.
       timespec now_abs_ts;
@@ -690,34 +623,6 @@
 }
 #endif
 
-void ReaderWriterMutex::SharedLock(Thread* self) {
-  DCHECK(self == NULL || self == Thread::Current());
-#if ART_USE_FUTEXES
-  bool done = false;
-  do {
-    int32_t cur_state = state_;
-    if (cur_state >= 0) {
-      // Add as an extra reader.
-      done = android_atomic_cmpxchg(cur_state, cur_state + 1, &state_) == 0;
-    } else {
-      // Owner holds it exclusively, hang up.
-      ScopedContentionRecorder scr(this, GetExclusiveOwnerTid(), SafeGetTid(self));
-      android_atomic_inc(&num_pending_readers_);
-      if (futex(&state_, FUTEX_WAIT, cur_state, NULL, NULL, 0) != 0) {
-        if (errno != EAGAIN) {
-          PLOG(FATAL) << "futex wait failed for " << name_;
-        }
-      }
-      android_atomic_dec(&num_pending_readers_);
-    }
-  } while(!done);
-#else
-  CHECK_MUTEX_CALL(pthread_rwlock_rdlock, (&rwlock_));
-#endif
-  RegisterAsLocked(self);
-  AssertSharedHeld(self);
-}
-
 bool ReaderWriterMutex::SharedTryLock(Thread* self) {
   DCHECK(self == NULL || self == Thread::Current());
 #if ART_USE_FUTEXES
@@ -726,7 +631,7 @@
     int32_t cur_state = state_;
     if (cur_state >= 0) {
       // Add as an extra reader.
-      done = android_atomic_cmpxchg(cur_state, cur_state + 1, &state_) == 0;
+      done = android_atomic_acquire_cas(cur_state, cur_state + 1, &state_) == 0;
     } else {
       // Owner holds it exclusively.
       return false;
@@ -747,32 +652,6 @@
   return true;
 }
 
-void ReaderWriterMutex::SharedUnlock(Thread* self) {
-  DCHECK(self == NULL || self == Thread::Current());
-  AssertSharedHeld(self);
-  RegisterAsUnlocked(self);
-#if ART_USE_FUTEXES
-  bool done = false;
-  do {
-    int32_t cur_state = state_;
-    if (LIKELY(cur_state > 0)) {
-      // Reduce state by 1.
-      done = android_atomic_cmpxchg(cur_state, cur_state - 1, &state_) == 0;
-      if (done && (cur_state - 1) == 0) { // cmpxchg may fail due to noise?
-        if (num_pending_writers_ > 0 || num_pending_readers_ > 0) {
-          // Wake any exclusive waiters as there are now no readers.
-          futex(&state_, FUTEX_WAKE, -1, NULL, NULL, 0);
-        }
-      }
-    } else {
-      LOG(FATAL) << "Unexpected state_:" << cur_state << " for " << name_;
-    }
-  } while(!done);
-#else
-  CHECK_MUTEX_CALL(pthread_rwlock_unlock, (&rwlock_));
-#endif
-}
-
 bool ReaderWriterMutex::IsExclusiveHeld(const Thread* self) const {
   DCHECK(self == NULL || self == Thread::Current());
   bool result = (GetExclusiveOwnerTid() == SafeGetTid(self));
diff --git a/src/base/mutex.h b/src/base/mutex.h
index 8576c03..b530b75 100644
--- a/src/base/mutex.h
+++ b/src/base/mutex.h
@@ -223,14 +223,14 @@
 #endif
 
   // Block until ReaderWriterMutex is shared or free then acquire a share on the access.
-  void SharedLock(Thread* self) SHARED_LOCK_FUNCTION();
+  void SharedLock(Thread* self) SHARED_LOCK_FUNCTION()  __attribute__ ((always_inline));
   void ReaderLock(Thread* self) SHARED_LOCK_FUNCTION() { SharedLock(self); }
 
   // Try to acquire share of ReaderWriterMutex.
   bool SharedTryLock(Thread* self) EXCLUSIVE_TRYLOCK_FUNCTION(true);
 
   // Release a share of the access.
-  void SharedUnlock(Thread* self) UNLOCK_FUNCTION();
+  void SharedUnlock(Thread* self) UNLOCK_FUNCTION() __attribute__ ((always_inline));
   void ReaderUnlock(Thread* self) UNLOCK_FUNCTION() { SharedUnlock(self); }
 
   // Is the current thread the exclusive holder of the ReaderWriterMutex.
diff --git a/src/gc/garbage_collector.cc b/src/gc/garbage_collector.cc
index fbcdbaf..94daec7 100644
--- a/src/gc/garbage_collector.cc
+++ b/src/gc/garbage_collector.cc
@@ -15,6 +15,8 @@
  */
 
 #include "garbage_collector.h"
+
+#include "base/mutex-inl.h"
 #include "thread.h"
 #include "thread_list.h"
 
diff --git a/src/gc/mark_sweep.cc b/src/gc/mark_sweep.cc
index 40102b2..81d5e17 100644
--- a/src/gc/mark_sweep.cc
+++ b/src/gc/mark_sweep.cc
@@ -24,6 +24,7 @@
 #include "barrier.h"
 #include "base/logging.h"
 #include "base/macros.h"
+#include "base/mutex-inl.h"
 #include "card_table.h"
 #include "card_table-inl.h"
 #include "heap.h"
diff --git a/src/gc/space_bitmap-inl.h b/src/gc/space_bitmap-inl.h
index e1fdd29..dd91403 100644
--- a/src/gc/space_bitmap-inl.h
+++ b/src/gc/space_bitmap-inl.h
@@ -18,7 +18,7 @@
 #define ART_SRC_GC_SPACE_BITMAP_INL_H_
 
 #include "base/logging.h"
-#include "cutils/atomic.h"
+#include "cutils/atomic-inline.h"
 
 namespace art {
 
diff --git a/src/jdwp/jdwp_event.cc b/src/jdwp/jdwp_event.cc
index 71e91d4..a2c10b5 100644
--- a/src/jdwp/jdwp_event.cc
+++ b/src/jdwp/jdwp_event.cc
@@ -28,7 +28,7 @@
 #include "jdwp/jdwp_expand_buf.h"
 #include "jdwp/jdwp_handler.h"
 #include "jdwp/jdwp_priv.h"
-#include "thread.h"
+#include "thread-inl.h"
 
 /*
 General notes:
diff --git a/src/jdwp/jdwp_handler.cc b/src/jdwp/jdwp_handler.cc
index aa5a8a0..dd80089 100644
--- a/src/jdwp/jdwp_handler.cc
+++ b/src/jdwp/jdwp_handler.cc
@@ -43,7 +43,7 @@
 #include "jdwp/jdwp_expand_buf.h"
 #include "jdwp/jdwp_priv.h"
 #include "runtime.h"
-#include "thread.h"
+#include "thread-inl.h"
 #include "UniquePtr.h"
 
 namespace art {
diff --git a/src/mirror/class-inl.h b/src/mirror/class-inl.h
index 7eb8601..3ca4c30 100644
--- a/src/mirror/class-inl.h
+++ b/src/mirror/class-inl.h
@@ -130,6 +130,89 @@
   SetFieldObject(OFFSET_OF_OBJECT_MEMBER(Class, vtable_), new_vtable, false);
 }
 
+inline bool Class::Implements(const Class* klass) const {
+  DCHECK(klass != NULL);
+  DCHECK(klass->IsInterface()) << PrettyClass(this);
+  // All interfaces implemented directly and by our superclass, and
+  // recursively all super-interfaces of those interfaces, are listed
+  // in iftable_, so we can just do a linear scan through that.
+  int32_t iftable_count = GetIfTableCount();
+  IfTable* iftable = GetIfTable();
+  for (int32_t i = 0; i < iftable_count; i++) {
+    if (iftable->GetInterface(i) == klass) {
+      return true;
+    }
+  }
+  return false;
+}
+
+// Determine whether "this" is assignable from "src", where both of these
+// are array classes.
+//
+// Consider an array class, e.g. Y[][], where Y is a subclass of X.
+//   Y[][]            = Y[][] --> true (identity)
+//   X[][]            = Y[][] --> true (element superclass)
+//   Y                = Y[][] --> false
+//   Y[]              = Y[][] --> false
+//   Object           = Y[][] --> true (everything is an object)
+//   Object[]         = Y[][] --> true
+//   Object[][]       = Y[][] --> true
+//   Object[][][]     = Y[][] --> false (too many []s)
+//   Serializable     = Y[][] --> true (all arrays are Serializable)
+//   Serializable[]   = Y[][] --> true
+//   Serializable[][] = Y[][] --> false (unless Y is Serializable)
+//
+// Don't forget about primitive types.
+//   Object[]         = int[] --> false
+//
+inline bool Class::IsArrayAssignableFromArray(const Class* src) const {
+  DCHECK(IsArrayClass())  << PrettyClass(this);
+  DCHECK(src->IsArrayClass()) << PrettyClass(src);
+  return GetComponentType()->IsAssignableFrom(src->GetComponentType());
+}
+
+inline bool Class::IsAssignableFromArray(const Class* src) const {
+  DCHECK(!IsInterface()) << PrettyClass(this);  // handled first in IsAssignableFrom
+  DCHECK(src->IsArrayClass()) << PrettyClass(src);
+  if (!IsArrayClass()) {
+    // If "this" is not also an array, it must be Object.
+    // src's super should be java_lang_Object, since it is an array.
+    Class* java_lang_Object = src->GetSuperClass();
+    DCHECK(java_lang_Object != NULL) << PrettyClass(src);
+     DCHECK(java_lang_Object->GetSuperClass() == NULL) << PrettyClass(src);
+    return this == java_lang_Object;
+  }
+  return IsArrayAssignableFromArray(src);
+}
+
+inline bool Class::IsSubClass(const Class* klass) const {
+  DCHECK(!IsInterface()) << PrettyClass(this);
+  DCHECK(!IsArrayClass()) << PrettyClass(this);
+  const Class* current = this;
+  do {
+    if (current == klass) {
+      return true;
+    }
+    current = current->GetSuperClass();
+  } while (current != NULL);
+  return false;
+}
+
+inline AbstractMethod* Class::FindVirtualMethodForInterface(AbstractMethod* method) const {
+  Class* declaring_class = method->GetDeclaringClass();
+  DCHECK(declaring_class != NULL) << PrettyClass(this);
+  DCHECK(declaring_class->IsInterface()) << PrettyMethod(method);
+  // TODO cache to improve lookup speed
+  int32_t iftable_count = GetIfTableCount();
+  IfTable* iftable = GetIfTable();
+  for (int32_t i = 0; i < iftable_count; i++) {
+    if (iftable->GetInterface(i) == declaring_class) {
+      return iftable->GetMethodArray(i)->Get(method->GetMethodIndex());
+    }
+  }
+  return NULL;
+}
+
 inline AbstractMethod* Class::FindVirtualMethodForVirtual(AbstractMethod* method) const
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   DCHECK(!method->GetDeclaringClass()->IsInterface());
diff --git a/src/mirror/class.cc b/src/mirror/class.cc
index 7f52d17..e3347a8 100644
--- a/src/mirror/class.cc
+++ b/src/mirror/class.cc
@@ -238,74 +238,6 @@
              new_reference_offsets, false);
 }
 
-bool Class::Implements(const Class* klass) const {
-  DCHECK(klass != NULL);
-  DCHECK(klass->IsInterface()) << PrettyClass(this);
-  // All interfaces implemented directly and by our superclass, and
-  // recursively all super-interfaces of those interfaces, are listed
-  // in iftable_, so we can just do a linear scan through that.
-  int32_t iftable_count = GetIfTableCount();
-  IfTable* iftable = GetIfTable();
-  for (int32_t i = 0; i < iftable_count; i++) {
-    if (iftable->GetInterface(i) == klass) {
-      return true;
-    }
-  }
-  return false;
-}
-
-// Determine whether "this" is assignable from "src", where both of these
-// are array classes.
-//
-// Consider an array class, e.g. Y[][], where Y is a subclass of X.
-//   Y[][]            = Y[][] --> true (identity)
-//   X[][]            = Y[][] --> true (element superclass)
-//   Y                = Y[][] --> false
-//   Y[]              = Y[][] --> false
-//   Object           = Y[][] --> true (everything is an object)
-//   Object[]         = Y[][] --> true
-//   Object[][]       = Y[][] --> true
-//   Object[][][]     = Y[][] --> false (too many []s)
-//   Serializable     = Y[][] --> true (all arrays are Serializable)
-//   Serializable[]   = Y[][] --> true
-//   Serializable[][] = Y[][] --> false (unless Y is Serializable)
-//
-// Don't forget about primitive types.
-//   Object[]         = int[] --> false
-//
-bool Class::IsArrayAssignableFromArray(const Class* src) const {
-  DCHECK(IsArrayClass())  << PrettyClass(this);
-  DCHECK(src->IsArrayClass()) << PrettyClass(src);
-  return GetComponentType()->IsAssignableFrom(src->GetComponentType());
-}
-
-bool Class::IsAssignableFromArray(const Class* src) const {
-  DCHECK(!IsInterface()) << PrettyClass(this);  // handled first in IsAssignableFrom
-  DCHECK(src->IsArrayClass()) << PrettyClass(src);
-  if (!IsArrayClass()) {
-    // If "this" is not also an array, it must be Object.
-    // src's super should be java_lang_Object, since it is an array.
-    Class* java_lang_Object = src->GetSuperClass();
-    DCHECK(java_lang_Object != NULL) << PrettyClass(src);
-    DCHECK(java_lang_Object->GetSuperClass() == NULL) << PrettyClass(src);
-    return this == java_lang_Object;
-  }
-  return IsArrayAssignableFromArray(src);
-}
-
-bool Class::IsSubClass(const Class* klass) const {
-  DCHECK(!IsInterface()) << PrettyClass(this);
-  DCHECK(!IsArrayClass()) << PrettyClass(this);
-  const Class* current = this;
-  do {
-    if (current == klass) {
-      return true;
-    }
-    current = current->GetSuperClass();
-  } while (current != NULL);
-  return false;
-}
-
 bool Class::IsInSamePackage(const StringPiece& descriptor1, const StringPiece& descriptor2) {
   size_t i = 0;
   while (descriptor1[i] != '\0' && descriptor1[i] == descriptor2[i]) {
@@ -378,21 +310,6 @@
   SetFieldObject(OFFSET_OF_OBJECT_MEMBER(Class, class_loader_), new_class_loader, false);
 }
 
-AbstractMethod* Class::FindVirtualMethodForInterface(AbstractMethod* method) const {
-  Class* declaring_class = method->GetDeclaringClass();
-  DCHECK(declaring_class != NULL) << PrettyClass(this);
-  DCHECK(declaring_class->IsInterface()) << PrettyMethod(method);
-  // TODO cache to improve lookup speed
-  int32_t iftable_count = GetIfTableCount();
-  IfTable* iftable = GetIfTable();
-  for (int32_t i = 0; i < iftable_count; i++) {
-    if (iftable->GetInterface(i) == declaring_class) {
-      return iftable->GetMethodArray(i)->Get(method->GetMethodIndex());
-    }
-  }
-  return NULL;
-}
-
 AbstractMethod* Class::FindInterfaceMethod(const StringPiece& name, const StringPiece& signature) const {
   // Check the current class before checking the interfaces.
   AbstractMethod* method = FindDeclaredVirtualMethod(name, signature);
diff --git a/src/mirror/class.h b/src/mirror/class.h
index 843e07c..9e440b4 100644
--- a/src/mirror/class.h
+++ b/src/mirror/class.h
@@ -542,7 +542,7 @@
   // super class or interface, return the specific implementation
   // method for this class.
   AbstractMethod* FindVirtualMethodForInterface(AbstractMethod* method) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) __attribute__ ((always_inline, hot));
 
   AbstractMethod* FindInterfaceMethod(const StringPiece& name, const StringPiece& descriptor) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/src/native/java_lang_System.cc b/src/native/java_lang_System.cc
index 54ee2e9..79614ae 100644
--- a/src/native/java_lang_System.cc
+++ b/src/native/java_lang_System.cc
@@ -20,6 +20,7 @@
 #include "mirror/class.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
+#include "mirror/object_array-inl.h"
 #include "scoped_thread_state_change.h"
 
 /*
diff --git a/src/oat/runtime/arm/oat_support_entrypoints_arm.cc b/src/oat/runtime/arm/oat_support_entrypoints_arm.cc
index c43b7e2..dea2600 100644
--- a/src/oat/runtime/arm/oat_support_entrypoints_arm.cc
+++ b/src/oat/runtime/arm/oat_support_entrypoints_arm.cc
@@ -63,19 +63,6 @@
 // FillArray entrypoint.
 extern "C" void art_handle_fill_data_from_code(void*, void*);
 
-// JNI entrypoints.
-extern void* FindNativeMethod(Thread* thread);
-extern uint32_t JniMethodStart(Thread* self);
-extern uint32_t JniMethodStartSynchronized(jobject to_lock, Thread* self);
-extern void JniMethodEnd(uint32_t saved_local_ref_cookie, Thread* self);
-extern void JniMethodEndSynchronized(uint32_t saved_local_ref_cookie, jobject locked,
-                                     Thread* self);
-extern mirror::Object* JniMethodEndWithReference(jobject result, uint32_t saved_local_ref_cookie,
-                                                 Thread* self);
-extern mirror::Object* JniMethodEndWithReferenceSynchronized(jobject result,
-                                                             uint32_t saved_local_ref_cookie,
-                                                             jobject locked, Thread* self);
-
 // Lock entrypoints.
 extern "C" void art_lock_object_from_code(void*);
 extern "C" void art_unlock_object_from_code(void*);
diff --git a/src/oat/runtime/mips/oat_support_entrypoints_mips.cc b/src/oat/runtime/mips/oat_support_entrypoints_mips.cc
index db773ba..9c84a8f 100644
--- a/src/oat/runtime/mips/oat_support_entrypoints_mips.cc
+++ b/src/oat/runtime/mips/oat_support_entrypoints_mips.cc
@@ -28,12 +28,13 @@
 extern "C" void* art_check_and_alloc_array_from_code_with_access_check(uint32_t, void*, int32_t);
 
 // Cast entrypoints.
-extern "C" uint32_t artIsAssignableFromCode(const Class* klass, const Class* ref_class);
+extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass,
+                                            const mirror::Class* ref_class);
 extern "C" void art_can_put_array_element_from_code(void*, void*);
 extern "C" void art_check_cast_from_code(void*, void*);
 
 // Debug entrypoints.
-extern void DebugMe(AbstractMethod* method, uint32_t info);
+extern void DebugMe(mirror::AbstractMethod* method, uint32_t info);
 extern "C" void art_update_debugger(void*, void*, int32_t, void*);
 
 // DexCache entrypoints.
@@ -62,19 +63,6 @@
 // FillArray entrypoint.
 extern "C" void art_handle_fill_data_from_code(void*, void*);
 
-// JNI entrypoints.
-extern void* FindNativeMethod(Thread* thread);
-extern uint32_t JniMethodStart(Thread* self);
-extern uint32_t JniMethodStartSynchronized(jobject to_lock, Thread* self);
-extern void JniMethodEnd(uint32_t saved_local_ref_cookie, Thread* self);
-extern void JniMethodEndSynchronized(uint32_t saved_local_ref_cookie, jobject locked,
-                                     Thread* self);
-extern Object* JniMethodEndWithReference(jobject result, uint32_t saved_local_ref_cookie,
-                                         Thread* self);
-extern Object* JniMethodEndWithReferenceSynchronized(jobject result,
-                                                     uint32_t saved_local_ref_cookie,
-                                                     jobject locked, Thread* self);
-
 // Lock entrypoints.
 extern "C" void art_lock_object_from_code(void*);
 extern "C" void art_unlock_object_from_code(void*);
@@ -115,7 +103,8 @@
 extern "C" int32_t art_string_compareto(void*, void*);
 
 // Invoke entrypoints.
-const void* UnresolvedDirectMethodTrampolineFromCode(AbstractMethod*, AbstractMethod**, Thread*,
+const void* UnresolvedDirectMethodTrampolineFromCode(mirror::AbstractMethod*,
+                                                     mirror::AbstractMethod**, Thread*,
                                                      Runtime::TrampolineType);
 extern "C" void art_invoke_direct_trampoline_with_access_check(uint32_t, void*);
 extern "C" void art_invoke_interface_trampoline(uint32_t, void*);
@@ -129,7 +118,8 @@
 extern "C" void art_test_suspend();
 
 // Throw entrypoints.
-extern void ThrowAbstractMethodErrorFromCode(AbstractMethod* method, Thread* thread, AbstractMethod** sp);
+extern void ThrowAbstractMethodErrorFromCode(mirror::AbstractMethod* method, Thread* thread,
+                                             mirror::AbstractMethod** sp);
 extern "C" void art_deliver_exception_from_code(void*);
 extern "C" void art_throw_array_bounds_from_code(int32_t index, int32_t limit);
 extern "C" void art_throw_div_zero_from_code();
diff --git a/src/oat/runtime/oat_support_entrypoints.h b/src/oat/runtime/oat_support_entrypoints.h
index a08a584..ee59df4 100644
--- a/src/oat/runtime/oat_support_entrypoints.h
+++ b/src/oat/runtime/oat_support_entrypoints.h
@@ -141,6 +141,26 @@
   void (*pThrowStackOverflowFromCode)(void*);
 };
 
+// JNI entrypoints.
+extern void* FindNativeMethod(Thread* thread) LOCKS_EXCLUDED(Locks::mutator_lock_);
+extern uint32_t JniMethodStart(Thread* self)
+    UNLOCK_FUNCTION(Locks::mutator_lock_) __attribute__ ((hot));
+extern uint32_t JniMethodStartSynchronized(jobject to_lock, Thread* self)
+    UNLOCK_FUNCTION(Locks::mutator_lock_) __attribute__ ((hot));
+extern void JniMethodEnd(uint32_t saved_local_ref_cookie, Thread* self)
+    SHARED_LOCK_FUNCTION(Locks::mutator_lock_) __attribute__ ((hot));
+extern void JniMethodEndSynchronized(uint32_t saved_local_ref_cookie, jobject locked,
+                                     Thread* self)
+    SHARED_LOCK_FUNCTION(Locks::mutator_lock_) __attribute__ ((hot));
+extern mirror::Object* JniMethodEndWithReference(jobject result, uint32_t saved_local_ref_cookie,
+                                                 Thread* self)
+    SHARED_LOCK_FUNCTION(Locks::mutator_lock_) __attribute__ ((hot));
+
+extern mirror::Object* JniMethodEndWithReferenceSynchronized(jobject result,
+                                                             uint32_t saved_local_ref_cookie,
+                                                             jobject locked, Thread* self)
+    SHARED_LOCK_FUNCTION(Locks::mutator_lock_) __attribute__ ((hot));
+
 // Initialize an entry point data structure.
 void InitEntryPoints(EntryPoints* points);
 
diff --git a/src/oat/runtime/support_cast.cc b/src/oat/runtime/support_cast.cc
index 71a37ef..0b1fb74 100644
--- a/src/oat/runtime/support_cast.cc
+++ b/src/oat/runtime/support_cast.cc
@@ -17,6 +17,7 @@
 #include "callee_save_frame.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
+#include "mirror/object_array-inl.h"
 #include "runtime_support.h"
 
 namespace art {
diff --git a/src/oat/runtime/support_jni.cc b/src/oat/runtime/support_jni.cc
index 0e21700..6799159 100644
--- a/src/oat/runtime/support_jni.cc
+++ b/src/oat/runtime/support_jni.cc
@@ -27,7 +27,7 @@
 namespace art {
 
 // Used by the JNI dlsym stub to find the native method to invoke if none is registered.
-extern void* FindNativeMethod(Thread* self) LOCKS_EXCLUDED(Locks::mutator_lock_) {
+extern void* FindNativeMethod(Thread* self) {
   Locks::mutator_lock_->AssertNotHeld(self);  // We come here as Native.
   DCHECK(Thread::Current() == self);
   ScopedObjectAccess soa(self);
@@ -49,7 +49,7 @@
 }
 
 // Called on entry to JNI, transition out of Runnable and release share of mutator_lock_.
-extern uint32_t JniMethodStart(Thread* self) UNLOCK_FUNCTION(GlobalSynchronizatio::mutator_lock_) {
+extern uint32_t JniMethodStart(Thread* self) {
   JNIEnvExt* env = self->GetJniEnv();
   DCHECK(env != NULL);
   uint32_t saved_local_ref_cookie = env->local_ref_cookie;
@@ -58,8 +58,7 @@
   return saved_local_ref_cookie;
 }
 
-extern uint32_t JniMethodStartSynchronized(jobject to_lock, Thread* self)
-    UNLOCK_FUNCTION(Locks::mutator_lock_) {
+extern uint32_t JniMethodStartSynchronized(jobject to_lock, Thread* self) {
   self->DecodeJObject(to_lock)->MonitorEnter(self);
   return JniMethodStart(self);
 }
@@ -71,23 +70,21 @@
   self->PopSirt();
 }
 
-extern void JniMethodEnd(uint32_t saved_local_ref_cookie, Thread* self)
-    SHARED_LOCK_FUNCTION(Locks::mutator_lock_) {
+extern void JniMethodEnd(uint32_t saved_local_ref_cookie, Thread* self) {
   self->TransitionFromSuspendedToRunnable();
   PopLocalReferences(saved_local_ref_cookie, self);
 }
 
 
-extern void JniMethodEndSynchronized(uint32_t saved_local_ref_cookie, jobject locked, Thread* self)
-    SHARED_LOCK_FUNCTION(Locks::mutator_lock_) {
+extern void JniMethodEndSynchronized(uint32_t saved_local_ref_cookie, jobject locked,
+                                     Thread* self) {
   self->TransitionFromSuspendedToRunnable();
   UnlockJniSynchronizedMethod(locked, self);  // Must decode before pop.
   PopLocalReferences(saved_local_ref_cookie, self);
 }
 
 extern mirror::Object* JniMethodEndWithReference(jobject result, uint32_t saved_local_ref_cookie,
-                                                 Thread* self)
-    SHARED_LOCK_FUNCTION(Locks::mutator_lock_) {
+                                                 Thread* self) {
   self->TransitionFromSuspendedToRunnable();
   mirror::Object* o = self->DecodeJObject(result);  // Must decode before pop.
   PopLocalReferences(saved_local_ref_cookie, self);
@@ -103,8 +100,7 @@
 
 extern mirror::Object* JniMethodEndWithReferenceSynchronized(jobject result,
                                                              uint32_t saved_local_ref_cookie,
-                                                             jobject locked, Thread* self)
-    SHARED_LOCK_FUNCTION(Locks::mutator_lock_) {
+                                                             jobject locked, Thread* self) {
   self->TransitionFromSuspendedToRunnable();
   UnlockJniSynchronizedMethod(locked, self);  // Must decode before pop.
   mirror::Object* o = self->DecodeJObject(result);
diff --git a/src/oat/runtime/x86/oat_support_entrypoints_x86.cc b/src/oat/runtime/x86/oat_support_entrypoints_x86.cc
index 445ae2a..48ec5bf 100644
--- a/src/oat/runtime/x86/oat_support_entrypoints_x86.cc
+++ b/src/oat/runtime/x86/oat_support_entrypoints_x86.cc
@@ -60,19 +60,6 @@
 // FillArray entrypoint.
 extern "C" void art_handle_fill_data_from_code(void*, void*);
 
-// JNI entrypoints.
-extern void* FindNativeMethod(Thread* thread);
-extern uint32_t JniMethodStart(Thread* self);
-extern uint32_t JniMethodStartSynchronized(jobject to_lock, Thread* self);
-extern void JniMethodEnd(uint32_t saved_local_ref_cookie, Thread* self);
-extern void JniMethodEndSynchronized(uint32_t saved_local_ref_cookie, jobject locked,
-                                     Thread* self);
-extern mirror::Object* JniMethodEndWithReference(jobject result, uint32_t saved_local_ref_cookie,
-                                                 Thread* self);
-extern mirror::Object* JniMethodEndWithReferenceSynchronized(jobject result,
-                                                             uint32_t saved_local_ref_cookie,
-                                                             jobject locked, Thread* self);
-
 // Lock entrypoints.
 extern "C" void art_lock_object_from_code(void*);
 extern "C" void art_unlock_object_from_code(void*);
diff --git a/src/runtime_support.h b/src/runtime_support.h
index a504237..09ca0aa 100644
--- a/src/runtime_support.h
+++ b/src/runtime_support.h
@@ -25,6 +25,7 @@
 #include "jni_internal.h"
 #include "mirror/abstract_method.h"
 #include "mirror/array.h"
+#include "mirror/class-inl.h"
 #include "mirror/throwable.h"
 #include "object_utils.h"
 #include "thread.h"
diff --git a/src/scoped_thread_state_change.h b/src/scoped_thread_state_change.h
index 80d47c5..31f178d 100644
--- a/src/scoped_thread_state_change.h
+++ b/src/scoped_thread_state_change.h
@@ -19,7 +19,7 @@
 
 #include "base/casts.h"
 #include "jni_internal.h"
-#include "thread.h"
+#include "thread-inl.h"
 
 namespace art {
 
diff --git a/src/thread-inl.h b/src/thread-inl.h
new file mode 100644
index 0000000..93aa10e
--- /dev/null
+++ b/src/thread-inl.h
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_SRC_THREAD_INL_H_
+#define ART_SRC_THREAD_INL_H_
+
+#include "thread.h"
+
+#include "base/mutex-inl.h"
+#include "cutils/atomic-inline.h"
+
+namespace art {
+
+inline void Thread::AssertThreadSuspensionIsAllowable(bool check_locks) const {
+#ifdef NDEBUG
+  UNUSED(check_locks);  // Keep GCC happy about unused parameters.
+#else
+  CHECK_EQ(0u, no_thread_suspension_) << last_no_thread_suspension_cause_;
+  if (check_locks) {
+    bool bad_mutexes_held = false;
+    for (int i = kMaxMutexLevel; i >= 0; --i) {
+      // We expect no locks except the mutator_lock_.
+      if (i != kMutatorLock) {
+        BaseMutex* held_mutex = GetHeldMutex(static_cast<LockLevel>(i));
+        if (held_mutex != NULL) {
+          LOG(ERROR) << "holding \"" << held_mutex->GetName()
+                  << "\" at point where thread suspension is expected";
+          bad_mutexes_held = true;
+        }
+      }
+    }
+    CHECK(!bad_mutexes_held);
+  }
+#endif
+}
+
+inline void Thread::TransitionFromRunnableToSuspended(ThreadState new_state) {
+  AssertThreadSuspensionIsAllowable();
+  DCHECK_NE(new_state, kRunnable);
+  DCHECK_EQ(this, Thread::Current());
+  // Change to non-runnable state, thereby appearing suspended to the system.
+  DCHECK_EQ(GetState(), kRunnable);
+  union StateAndFlags old_state_and_flags;
+  union StateAndFlags new_state_and_flags;
+  do {
+    old_state_and_flags = state_and_flags_;
+    // Copy over flags and try to clear the checkpoint bit if it is set.
+    new_state_and_flags.as_struct.flags = old_state_and_flags.as_struct.flags & ~kCheckpointRequest;
+    new_state_and_flags.as_struct.state = new_state;
+    // CAS the value without a memory barrier, that will occur in the unlock below.
+  } while (UNLIKELY(android_atomic_cas(old_state_and_flags.as_int, new_state_and_flags.as_int,
+                                       &state_and_flags_.as_int) != 0));
+  // If we toggled the checkpoint flag we must have cleared it.
+  uint16_t flag_change = new_state_and_flags.as_struct.flags ^ old_state_and_flags.as_struct.flags;
+  if (UNLIKELY((flag_change & kCheckpointRequest) != 0)) {
+    RunCheckpointFunction();
+  }
+  // Release share on mutator_lock_.
+  Locks::mutator_lock_->SharedUnlock(this);
+}
+
+inline ThreadState Thread::TransitionFromSuspendedToRunnable() {
+  bool done = false;
+  union StateAndFlags old_state_and_flags = state_and_flags_;
+  int16_t old_state = old_state_and_flags.as_struct.state;
+  DCHECK_NE(static_cast<ThreadState>(old_state), kRunnable);
+  do {
+    Locks::mutator_lock_->AssertNotHeld(this);  // Otherwise we starve GC..
+    old_state_and_flags = state_and_flags_;
+    DCHECK_EQ(old_state_and_flags.as_struct.state, old_state);
+    if (UNLIKELY((old_state_and_flags.as_struct.flags & kSuspendRequest) != 0)) {
+      // Wait while our suspend count is non-zero.
+      MutexLock mu(this, *Locks::thread_suspend_count_lock_);
+      old_state_and_flags = state_and_flags_;
+      DCHECK_EQ(old_state_and_flags.as_struct.state, old_state);
+      while ((old_state_and_flags.as_struct.flags & kSuspendRequest) != 0) {
+        // Re-check when Thread::resume_cond_ is notified.
+        Thread::resume_cond_->Wait(this);
+        old_state_and_flags = state_and_flags_;
+        DCHECK_EQ(old_state_and_flags.as_struct.state, old_state);
+      }
+      DCHECK_EQ(GetSuspendCount(), 0);
+    }
+    // Re-acquire shared mutator_lock_ access.
+    Locks::mutator_lock_->SharedLock(this);
+    // Atomically change from suspended to runnable if no suspend request pending.
+    old_state_and_flags = state_and_flags_;
+    DCHECK_EQ(old_state_and_flags.as_struct.state, old_state);
+    if (LIKELY((old_state_and_flags.as_struct.flags & kSuspendRequest) == 0)) {
+      union StateAndFlags new_state_and_flags = old_state_and_flags;
+      new_state_and_flags.as_struct.state = kRunnable;
+      // CAS the value without a memory barrier, that occurred in the lock above.
+      done = android_atomic_cas(old_state_and_flags.as_int, new_state_and_flags.as_int,
+                                &state_and_flags_.as_int) == 0;
+    }
+    if (UNLIKELY(!done)) {
+      // Failed to transition to Runnable. Release shared mutator_lock_ access and try again.
+      Locks::mutator_lock_->SharedUnlock(this);
+    }
+  } while (UNLIKELY(!done));
+  return static_cast<ThreadState>(old_state);
+}
+
+}  // namespace art
+
+#endif  // ART_SRC_THREAD_INL_H_
diff --git a/src/thread.cc b/src/thread.cc
index 01d6072..5b1a325 100644
--- a/src/thread.cc
+++ b/src/thread.cc
@@ -56,6 +56,7 @@
 #include "gc/space.h"
 #include "stack.h"
 #include "stack_indirect_reference_table.h"
+#include "thread-inl.h"
 #include "thread_list.h"
 #include "utils.h"
 #include "verifier/dex_gc_map.h"
@@ -608,72 +609,6 @@
   VLOG(threads) << this << " self-reviving";
 }
 
-void Thread::TransitionFromRunnableToSuspended(ThreadState new_state) {
-  AssertThreadSuspensionIsAllowable();
-  DCHECK_NE(new_state, kRunnable);
-  DCHECK_EQ(this, Thread::Current());
-  // Change to non-runnable state, thereby appearing suspended to the system.
-  DCHECK_EQ(GetState(), kRunnable);
-  union StateAndFlags old_state_and_flags;
-  union StateAndFlags new_state_and_flags;
-  do {
-    old_state_and_flags = state_and_flags_;
-    // Copy over flags and try to clear the checkpoint bit if it is set.
-    new_state_and_flags.as_struct.flags = old_state_and_flags.as_struct.flags & ~kCheckpointRequest;
-    new_state_and_flags.as_struct.state = new_state;
-  } while (android_atomic_cmpxchg(old_state_and_flags.as_int, new_state_and_flags.as_int,
-                                  &state_and_flags_.as_int) != 0);
-  // If we toggled the checkpoint flag we must have cleared it.
-  uint16_t flag_change = new_state_and_flags.as_struct.flags ^ old_state_and_flags.as_struct.flags;
-  if ((flag_change & kCheckpointRequest) != 0) {
-    RunCheckpointFunction();
-  }
-  // Release share on mutator_lock_.
-  Locks::mutator_lock_->SharedUnlock(this);
-}
-
-ThreadState Thread::TransitionFromSuspendedToRunnable() {
-  bool done = false;
-  union StateAndFlags old_state_and_flags = state_and_flags_;
-  int16_t old_state = old_state_and_flags.as_struct.state;
-  DCHECK_NE(static_cast<ThreadState>(old_state), kRunnable);
-  do {
-    Locks::mutator_lock_->AssertNotHeld(this);  // Otherwise we starve GC..
-    old_state_and_flags = state_and_flags_;
-    DCHECK_EQ(old_state_and_flags.as_struct.state, old_state);
-    if ((old_state_and_flags.as_struct.flags & kSuspendRequest) != 0) {
-      // Wait while our suspend count is non-zero.
-      MutexLock mu(this, *Locks::thread_suspend_count_lock_);
-      old_state_and_flags = state_and_flags_;
-      DCHECK_EQ(old_state_and_flags.as_struct.state, old_state);
-      while ((old_state_and_flags.as_struct.flags & kSuspendRequest) != 0) {
-        // Re-check when Thread::resume_cond_ is notified.
-        Thread::resume_cond_->Wait(this);
-        old_state_and_flags = state_and_flags_;
-        DCHECK_EQ(old_state_and_flags.as_struct.state, old_state);
-      }
-      DCHECK_EQ(GetSuspendCount(), 0);
-    }
-    // Re-acquire shared mutator_lock_ access.
-    Locks::mutator_lock_->SharedLock(this);
-    // Atomically change from suspended to runnable if no suspend request pending.
-    old_state_and_flags = state_and_flags_;
-    DCHECK_EQ(old_state_and_flags.as_struct.state, old_state);
-    if ((old_state_and_flags.as_struct.flags & kSuspendRequest) == 0) {
-      union StateAndFlags new_state_and_flags = old_state_and_flags;
-      new_state_and_flags.as_struct.state = kRunnable;
-      done = android_atomic_cmpxchg(old_state_and_flags.as_int, new_state_and_flags.as_int,
-                                    &state_and_flags_.as_int)
-                                        == 0;
-    }
-    if (!done) {
-      // Failed to transition to Runnable. Release shared mutator_lock_ access and try again.
-      Locks::mutator_lock_->SharedUnlock(this);
-    }
-  } while (!done);
-  return static_cast<ThreadState>(old_state);
-}
-
 Thread* Thread::SuspendForDebugger(jobject peer, bool request_suspension, bool* timed_out) {
   static const useconds_t kTimeoutUs = 30 * 1000000; // 30s.
   useconds_t total_delay_us = 0;
@@ -2112,25 +2047,4 @@
   return os;
 }
 
-#ifndef NDEBUG
-void Thread::AssertThreadSuspensionIsAllowable(bool check_locks) const {
-  CHECK_EQ(0u, no_thread_suspension_) << last_no_thread_suspension_cause_;
-  if (check_locks) {
-    bool bad_mutexes_held = false;
-    for (int i = kMaxMutexLevel; i >= 0; --i) {
-      // We expect no locks except the mutator_lock_.
-      if (i != kMutatorLock) {
-        BaseMutex* held_mutex = GetHeldMutex(static_cast<LockLevel>(i));
-        if (held_mutex != NULL) {
-          LOG(ERROR) << "holding \"" << held_mutex->GetName()
-                  << "\" at point where thread suspension is expected";
-          bad_mutexes_held = true;
-        }
-      }
-    }
-    CHECK(!bad_mutexes_held);
-  }
-}
-#endif
-
 }  // namespace art
diff --git a/src/thread.h b/src/thread.h
index 5e424c1..58de45d 100644
--- a/src/thread.h
+++ b/src/thread.h
@@ -169,13 +169,15 @@
   // Transition from non-runnable to runnable state acquiring share on mutator_lock_.
   ThreadState TransitionFromSuspendedToRunnable()
       LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
-      SHARED_LOCK_FUNCTION(Locks::mutator_lock_);
+      SHARED_LOCK_FUNCTION(Locks::mutator_lock_)
+      __attribute__ ((always_inline));
 
   // Transition from runnable into a state where mutator privileges are denied. Releases share of
   // mutator lock.
   void TransitionFromRunnableToSuspended(ThreadState new_state)
       LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
-      UNLOCK_FUNCTION(Locks::mutator_lock_);
+      UNLOCK_FUNCTION(Locks::mutator_lock_)
+      __attribute__ ((always_inline));
 
   // Wait for a debugger suspension on the thread associated with the given peer. Returns the
   // thread on success, else NULL. If the thread should be suspended then request_suspension should
@@ -215,13 +217,7 @@
 #endif
 
 
-#ifndef NDEBUG
   void AssertThreadSuspensionIsAllowable(bool check_locks = true) const;
-#else
-  void AssertThreadSuspensionIsAllowable(bool check_locks = true) const {
-    UNUSED(check_locks);  // Keep GCC happy about unused parameters.
-  }
-#endif
 
   bool IsDaemon() const {
     return daemon_;
diff --git a/src/verifier/reg_type.cc b/src/verifier/reg_type.cc
index a18c8b1..f412581 100644
--- a/src/verifier/reg_type.cc
+++ b/src/verifier/reg_type.cc
@@ -19,6 +19,7 @@
 #include "mirror/class.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
+#include "mirror/object_array-inl.h"
 #include "object_utils.h"
 #include "reg_type_cache.h"