merge in mnc-mr1-release history after reset to mnc-dr-dev
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk
index fabaaec..b84154b 100644
--- a/build/Android.common_build.mk
+++ b/build/Android.common_build.mk
@@ -233,7 +233,7 @@
 
 # Cflags for debug ART and ART tools.
 art_debug_cflags := \
-  -O0 \
+  -O2 \
   -DDYNAMIC_ANNOTATIONS_ENABLED=1 \
   -DVIXL_DEBUG \
   -UNDEBUG
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index f068571..7d98a30 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -526,7 +526,7 @@
   CHECK(scratch.IsXRegister()) << scratch;
   // Remove base and scratch form the temp list - higher level API uses IP1, IP0.
   vixl::UseScratchRegisterScope temps(vixl_masm_);
-  temps.Exclude(reg_x(base.AsXRegister()));
+  temps.Exclude(reg_x(base.AsXRegister()), reg_x(scratch.AsXRegister()));
   ___ Ldr(reg_x(scratch.AsXRegister()), MEM_OP(reg_x(base.AsXRegister()), offs.Int32Value()));
   ___ Br(reg_x(scratch.AsXRegister()));
 }
diff --git a/runtime/arch/x86/thread_x86.cc b/runtime/arch/x86/thread_x86.cc
index 56b0b79..b97c143 100644
--- a/runtime/arch/x86/thread_x86.cc
+++ b/runtime/arch/x86/thread_x86.cc
@@ -137,7 +137,7 @@
 }
 
 void Thread::CleanupCpu() {
-  MutexLock mu(nullptr, *Locks::modify_ldt_lock_);
+  MutexLock mu(this, *Locks::modify_ldt_lock_);
 
   // Sanity check that reads from %fs point to this Thread*.
   Thread* self_check;
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 69b26eb..10ed0f4 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -89,11 +89,7 @@
             art::Thread::ThinLockIdOffset<__SIZEOF_POINTER__>().Int32Value())
 
 // Offset of field Thread::tlsPtr_.card_table.
-#if defined(__LP64__)
-#define THREAD_CARD_TABLE_OFFSET 160
-#else
-#define THREAD_CARD_TABLE_OFFSET 144
-#endif
+#define THREAD_CARD_TABLE_OFFSET 128
 ADD_TEST_EQ(THREAD_CARD_TABLE_OFFSET,
             art::Thread::CardTableOffset<__SIZEOF_POINTER__>().Int32Value())
 
@@ -108,16 +104,11 @@
             art::Thread::TopOfManagedStackOffset<__SIZEOF_POINTER__>().Int32Value())
 
 // Offset of field Thread::tlsPtr_.managed_stack.top_quick_frame_.
-#if defined(__LP64__)
-#define THREAD_SELF_OFFSET 2112
-#else
-#define THREAD_SELF_OFFSET 1120
-#endif
-// (THREAD_CARD_TABLE_OFFSET + (9 * __SIZEOF_POINTER__))
+#define THREAD_SELF_OFFSET (THREAD_CARD_TABLE_OFFSET + (9 * __SIZEOF_POINTER__))
 ADD_TEST_EQ(THREAD_SELF_OFFSET,
             art::Thread::SelfOffset<__SIZEOF_POINTER__>().Int32Value())
 
-#define THREAD_LOCAL_POS_OFFSET (THREAD_CARD_TABLE_OFFSET + 31 * __SIZEOF_POINTER__)
+#define THREAD_LOCAL_POS_OFFSET (THREAD_CARD_TABLE_OFFSET + 147 * __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_LOCAL_POS_OFFSET,
             art::Thread::ThreadLocalPosOffset<__SIZEOF_POINTER__>().Int32Value())
 #define THREAD_LOCAL_END_OFFSET (THREAD_LOCAL_POS_OFFSET + __SIZEOF_POINTER__)
diff --git a/runtime/base/mutex-inl.h b/runtime/base/mutex-inl.h
index 2ae3b19..87840e7 100644
--- a/runtime/base/mutex-inl.h
+++ b/runtime/base/mutex-inl.h
@@ -74,9 +74,7 @@
           // Ignore logging which may or may not have set up thread data structures.
           level == kLoggingLock ||
           // Avoid recursive death.
-          level == kAbortLock ||
-          // A MemMap may be created for thread objects
-          level == kMemMapsLock) << level;
+          level == kAbortLock) << level;
   }
 }
 
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 73db6ba..2ea5cb0 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -1949,7 +1949,6 @@
                                      ArtMethod* caller_method, Thread* self, ArtMethod** sp) {
   ScopedQuickEntrypointChecks sqec(self);
   DCHECK_EQ(*sp, Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsAndArgs));
-  DCHECK(caller_method != nullptr);
   ArtMethod* method = FindMethodFast(method_idx, this_object, caller_method, access_check, type);
   if (UNLIKELY(method == nullptr)) {
     const DexFile* dex_file = caller_method->GetDeclaringClass()->GetDexCache()->GetDexFile();
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index e13822a..6566060 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -511,29 +511,26 @@
   if (base_begin_ == nullptr && base_size_ == 0) {
     return;
   }
-
-  // Remove it from maps_.
-  {
-    MutexLock mu(Thread::Current(), *Locks::mem_maps_lock_);
-    bool found = false;
-    DCHECK(maps_ != nullptr);
-    for (auto it = maps_->lower_bound(base_begin_), end = maps_->end();
-        it != end && it->first == base_begin_; ++it) {
-      if (it->second == this) {
-        found = true;
-        maps_->erase(it);
-        break;
-      }
-    }
-    CHECK(found) << "MemMap not found";
-  }
-
   if (!reuse_) {
     int result = munmap(base_begin_, base_size_);
     if (result == -1) {
       PLOG(FATAL) << "munmap failed";
     }
   }
+
+  // Remove it from maps_.
+  MutexLock mu(Thread::Current(), *Locks::mem_maps_lock_);
+  bool found = false;
+  DCHECK(maps_ != nullptr);
+  for (auto it = maps_->lower_bound(base_begin_), end = maps_->end();
+       it != end && it->first == base_begin_; ++it) {
+    if (it->second == this) {
+      found = true;
+      maps_->erase(it);
+      break;
+    }
+  }
+  CHECK(found) << "MemMap not found";
 }
 
 MemMap::MemMap(const std::string& name, uint8_t* begin, size_t size, void* base_begin,
diff --git a/runtime/thread.cc b/runtime/thread.cc
index a9173d5..6e8f89c 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -96,11 +96,9 @@
 
 void Thread::InitTlsEntryPoints() {
   // Insert a placeholder so we can easily tell if we call an unimplemented entry point.
-  uintptr_t* begin = reinterpret_cast<uintptr_t*>(&tlsPtr_.quick_entrypoints);
-  uintptr_t* end = reinterpret_cast<uintptr_t*>(
-      reinterpret_cast<uint8_t*>(&tlsPtr_.interpreter_entrypoints) +
-      sizeof(tlsPtr_.interpreter_entrypoints));
-  DCHECK_LT(begin, end);
+  uintptr_t* begin = reinterpret_cast<uintptr_t*>(&tlsPtr_.interpreter_entrypoints);
+  uintptr_t* end = reinterpret_cast<uintptr_t*>(reinterpret_cast<uint8_t*>(&tlsPtr_.quick_entrypoints) +
+      sizeof(tlsPtr_.quick_entrypoints));
   for (uintptr_t* it = begin; it != end; ++it) {
     *it = reinterpret_cast<uintptr_t>(UnimplementedEntryPoint);
   }
@@ -108,90 +106,7 @@
                   &tlsPtr_.quick_entrypoints);
 }
 
-static constexpr bool kUseWriteProtectScheme = true;
-
-static size_t GetProtectionOffset() {
-  return RoundUp(QUICK_ENTRYPOINT_OFFSET(sizeof(void*), pInstanceofNonTrivial).Uint32Value(), 16);
-}
-
-// Allocate a thread. This might do some magic to use two pages.
-Thread* Thread::AllocateThread(bool is_daemon) {
-  if (!kUseWriteProtectScheme) {
-    return new Thread(is_daemon);
-  }
-
-  std::string error_msg;
-  MemMap* mem_map = MemMap::MapAnonymous("some thread",
-                                         nullptr,
-                                         2 * kPageSize,
-                                         PROT_READ | PROT_WRITE,
-                                         false,
-                                         false,
-                                         &error_msg);
-  if (mem_map == nullptr) {
-    PLOG(FATAL) << error_msg;
-  }
-
-  uint8_t* second_page_address = mem_map->Begin() + kPageSize;
-  const uint32_t offset = GetProtectionOffset();
-  uintptr_t start_address = reinterpret_cast<uintptr_t>(second_page_address) - offset;
-  DCHECK_GE(start_address, reinterpret_cast<uintptr_t>(mem_map->Begin()) + sizeof(void*));
-  void* start_address_ptr = reinterpret_cast<void*>(start_address);
-  Thread* t = new (start_address_ptr) Thread(is_daemon);
-
-  // Store a pointer to the MemMap at the bottom.
-  *reinterpret_cast<MemMap**>(mem_map->Begin()) = mem_map;
-
-  return t;
-}
-
-static void ProtectThread(Thread* thread) {
-  if (!kUseWriteProtectScheme) {
-    return;
-  }
-
-  uintptr_t thread_addr = reinterpret_cast<uintptr_t>(thread);
-  DCHECK_EQ(RoundUp(thread_addr, kPageSize), thread_addr + GetProtectionOffset());
-  void* page_address = reinterpret_cast<void*>(RoundUp(thread_addr, kPageSize));
-  mprotect(page_address, kPageSize, PROT_READ);
-}
-
-static void UnprotectThread(Thread* thread) {
-  if (!kUseWriteProtectScheme) {
-    return;
-  }
-
-  uintptr_t thread_addr = reinterpret_cast<uintptr_t>(thread);
-  DCHECK_EQ(RoundUp(thread_addr, kPageSize), thread_addr + GetProtectionOffset());
-  void* page_address = reinterpret_cast<void*>(RoundUp(thread_addr, kPageSize));
-  mprotect(page_address, kPageSize, PROT_READ | PROT_WRITE);
-}
-
-void Thread::DeleteThread(Thread* thread) {
-  if (!kUseWriteProtectScheme) {
-    delete thread;
-    return;
-  }
-
-  if (thread == nullptr) {
-    return;
-  }
-
-  UnprotectThread(thread);
-  thread->~Thread();
-
-  // There should be the MemMap* at the bottom.
-  MemMap* mem_map =
-      *reinterpret_cast<MemMap**>(RoundDown(reinterpret_cast<uintptr_t>(thread), kPageSize));
-
-  delete mem_map;
-}
-
 void Thread::InitStringEntryPoints() {
-  // Ensure things are writable. This may be a late initialization of the entrypoints for the main
-  // thread.
-  UnprotectThread(this);
-
   ScopedObjectAccess soa(this);
   QuickEntryPoints* qpoints = &tlsPtr_.quick_entrypoints;
   qpoints->pNewEmptyString = reinterpret_cast<void(*)()>(
@@ -226,9 +141,6 @@
       soa.DecodeMethod(WellKnownClasses::java_lang_StringFactory_newStringFromStringBuffer));
   qpoints->pNewStringFromStringBuilder = reinterpret_cast<void(*)()>(
       soa.DecodeMethod(WellKnownClasses::java_lang_StringFactory_newStringFromStringBuilder));
-
-  // This is a good time to protect things, now that all entrypoints are set.
-  ProtectThread(this);
 }
 
 void Thread::ResetQuickAllocEntryPointsForThread() {
@@ -494,7 +406,7 @@
     return;
   }
 
-  Thread* child_thread = AllocateThread(is_daemon);
+  Thread* child_thread = new Thread(is_daemon);
   // Use global JNI ref to hold peer live while child thread starts.
   child_thread->tlsPtr_.jpeer = env->NewGlobalRef(java_peer);
   stack_size = FixStackSize(stack_size);
@@ -542,7 +454,7 @@
   // Manually delete the global reference since Thread::Init will not have been run.
   env->DeleteGlobalRef(child_thread->tlsPtr_.jpeer);
   child_thread->tlsPtr_.jpeer = nullptr;
-  DeleteThread(child_thread);
+  delete child_thread;
   child_thread = nullptr;
   // TODO: remove from thread group?
   env->SetLongField(java_peer, WellKnownClasses::java_lang_Thread_nativePeer, 0);
@@ -613,11 +525,11 @@
       return nullptr;
     } else {
       Runtime::Current()->StartThreadBirth();
-      self = AllocateThread(as_daemon);
+      self = new Thread(as_daemon);
       bool init_success = self->Init(runtime->GetThreadList(), runtime->GetJavaVM());
       Runtime::Current()->EndThreadBirth();
       if (!init_success) {
-        DeleteThread(self);
+        delete self;
         return nullptr;
       }
     }
diff --git a/runtime/thread.h b/runtime/thread.h
index eb1809d..0e71c08 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -531,8 +531,7 @@
  private:
   template<size_t pointer_size>
   static ThreadOffset<pointer_size> ThreadOffsetFromTlsPtr(size_t tls_ptr_offset) {
-    size_t base =  /* OFFSETOF_MEMBER(Thread, tlsPtr_); */
-        pointer_size == 8u ? 160 : 144;
+    size_t base = OFFSETOF_MEMBER(Thread, tlsPtr_);
     size_t scale;
     size_t shrink;
     if (pointer_size == sizeof(void*)) {
@@ -952,8 +951,6 @@
   ~Thread() LOCKS_EXCLUDED(Locks::mutator_lock_,
                            Locks::thread_suspend_count_lock_);
   void Destroy();
-  static Thread* AllocateThread(bool is_daemon);
-  static void DeleteThread(Thread* thread);
 
   void CreatePeer(const char* name, bool as_daemon, jobject thread_group);
 
@@ -1135,31 +1132,19 @@
     RuntimeStats stats;
   } tls64_;
 
-  // Guards the 'interrupted_' and 'wait_monitor_' members.
-  Mutex* wait_mutex_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-
-  // Condition variable waited upon during a wait.
-  ConditionVariable* wait_cond_ GUARDED_BY(wait_mutex_);
-  // Pointer to the monitor lock we're currently waiting on or null if not waiting.
-  Monitor* wait_monitor_ GUARDED_BY(wait_mutex_);
-
-  // Thread "interrupted" status; stays raised until queried or thrown.
-  bool interrupted_ GUARDED_BY(wait_mutex_);
-
-  struct PACKED(sizeof(void*)) tls_ptr_sized_values {
-    tls_ptr_sized_values() : card_table(nullptr), exception(nullptr), stack_end(nullptr),
-        managed_stack(), suspend_trigger(nullptr), jni_env(nullptr), tmp_jni_env(nullptr),
-        opeer(nullptr), jpeer(nullptr), stack_begin(nullptr), stack_size(0),
-        stack_trace_sample(nullptr), wait_next(nullptr), monitor_enter_object(nullptr),
-        top_handle_scope(nullptr), class_loader_override(nullptr), long_jump_context(nullptr),
-        instrumentation_stack(nullptr), debug_invoke_req(nullptr), single_step_control(nullptr),
-        stacked_shadow_frame_record(nullptr), deoptimization_return_value_stack(nullptr),
-        name(nullptr), pthread_self(0),
-        last_no_thread_suspension_cause(nullptr), thread_local_start(nullptr),
-        thread_local_pos(nullptr), thread_local_end(nullptr), thread_local_objects(0),
-        thread_local_alloc_stack_top(nullptr), thread_local_alloc_stack_end(nullptr),
-        nested_signal_state(nullptr), flip_function(nullptr), method_verifier(nullptr),
-        self(nullptr) {
+  struct PACKED(4) tls_ptr_sized_values {
+      tls_ptr_sized_values() : card_table(nullptr), exception(nullptr), stack_end(nullptr),
+      managed_stack(), suspend_trigger(nullptr), jni_env(nullptr), tmp_jni_env(nullptr),
+      self(nullptr), opeer(nullptr), jpeer(nullptr), stack_begin(nullptr), stack_size(0),
+      stack_trace_sample(nullptr), wait_next(nullptr), monitor_enter_object(nullptr),
+      top_handle_scope(nullptr), class_loader_override(nullptr), long_jump_context(nullptr),
+      instrumentation_stack(nullptr), debug_invoke_req(nullptr), single_step_control(nullptr),
+      stacked_shadow_frame_record(nullptr), deoptimization_return_value_stack(nullptr),
+      name(nullptr), pthread_self(0),
+      last_no_thread_suspension_cause(nullptr), thread_local_start(nullptr),
+      thread_local_pos(nullptr), thread_local_end(nullptr), thread_local_objects(0),
+      thread_local_alloc_stack_top(nullptr), thread_local_alloc_stack_end(nullptr),
+      nested_signal_state(nullptr), flip_function(nullptr), method_verifier(nullptr) {
       std::fill(held_mutexes, held_mutexes + kLockLevelCount, nullptr);
     }
 
@@ -1187,6 +1172,11 @@
     // created thread.
     JNIEnvExt* tmp_jni_env;
 
+    // Initialized to "this". On certain architectures (such as x86) reading off of Thread::Current
+    // is easy but getting the address of Thread::Current is hard. This field can be read off of
+    // Thread::Current to give the address.
+    Thread* self;
+
     // Our managed peer (an instance of java.lang.Thread). The jobject version is used during thread
     // start up, until the thread is registered and the local opeer_ is used.
     mirror::Object* opeer;
@@ -1248,6 +1238,12 @@
     // Locks::thread_suspend_count_lock_.
     Closure* checkpoint_functions[kMaxCheckpoints];
 
+    // Entrypoint function pointers.
+    // TODO: move this to more of a global offset table model to avoid per-thread duplication.
+    InterpreterEntryPoints interpreter_entrypoints;
+    JniEntryPoints jni_entrypoints;
+    QuickEntryPoints quick_entrypoints;
+
     // Thread-local allocation pointer.
     uint8_t* thread_local_start;
     uint8_t* thread_local_pos;
@@ -1272,19 +1268,19 @@
 
     // Current method verifier, used for root marking.
     verifier::MethodVerifier* method_verifier;
-
-    // Entrypoint function pointers.
-    // TODO: move this to more of a global offset table model to avoid per-thread duplication.
-    QuickEntryPoints quick_entrypoints;
-    JniEntryPoints jni_entrypoints;
-    InterpreterEntryPoints interpreter_entrypoints;
-
-    // Initialized to "this". On certain architectures (such as x86) reading off of Thread::Current
-    // is easy but getting the address of Thread::Current is hard. This field can be read off of
-    // Thread::Current to give the address.
-    Thread* self;
   } tlsPtr_;
 
+  // Guards the 'interrupted_' and 'wait_monitor_' members.
+  Mutex* wait_mutex_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+
+  // Condition variable waited upon during a wait.
+  ConditionVariable* wait_cond_ GUARDED_BY(wait_mutex_);
+  // Pointer to the monitor lock we're currently waiting on or null if not waiting.
+  Monitor* wait_monitor_ GUARDED_BY(wait_mutex_);
+
+  // Thread "interrupted" status; stays raised until queried or thrown.
+  bool interrupted_ GUARDED_BY(wait_mutex_);
+
   friend class Dbg;  // For SetStateUnsafe.
   friend class gc::collector::SemiSpace;  // For getting stack traces.
   friend class Runtime;  // For CreatePeer.
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 7d49112..b697b43 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -1148,7 +1148,7 @@
     }
     // We failed to remove the thread due to a suspend request, loop and try again.
   }
-  Thread::DeleteThread(self);
+  delete self;
 
   // Release the thread ID after the thread is finished and deleted to avoid cases where we can
   // temporarily have multiple threads with the same thread id. When this occurs, it causes