ART: Add option to ensure deterministic compilation

To ensure reliable stable generation of a boot image, add a flag
for forced determinism, trading compile time for a deterministic
output.

We have to run certain passes in the compiler-driver single-threaded.
It is also necessary to try to make the heap layout deterministic.
Switch to nonconcurrent GC, use the free-list implementation for LOS,
and try to allocate the main space at a known location. This is best
effort at the moment.

To allow the compiler phase to be parallelized, const-strings need
to be created beforehand.

The identity hashcode seed needs to be pinned.

Besides the Dex object we also need to null the DexFile pointer in
dex caches.

For classes, we need to remove the clinit thread ID.

Fix oatdump alignment padding accounting.

Bug: 26687569
Change-Id: Ia82120e8f715bb3691d861817b12778ac677355a
diff --git a/compiler/dex/quick/quick_cfi_test.cc b/compiler/dex/quick/quick_cfi_test.cc
index c5df134..0cd41bb 100644
--- a/compiler/dex/quick/quick_cfi_test.cc
+++ b/compiler/dex/quick/quick_cfi_test.cc
@@ -71,6 +71,7 @@
       nullptr,
       false,
       "",
+      false,
       false);
     VerificationResults verification_results(&compiler_options);
     DexFileToMethodInlinerMap method_inliner_map;
diff --git a/compiler/dex/quick/x86/quick_assemble_x86_test.cc b/compiler/dex/quick/x86/quick_assemble_x86_test.cc
index d63878d..efdc333 100644
--- a/compiler/dex/quick/x86/quick_assemble_x86_test.cc
+++ b/compiler/dex/quick/x86/quick_assemble_x86_test.cc
@@ -54,6 +54,7 @@
         nullptr,
         false,
         "",
+        false,
         false));
     verification_results_.reset(new VerificationResults(compiler_options_.get()));
     method_inliner_map_.reset(new DexFileToMethodInlinerMap());
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 818d50a..440fe95 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -39,6 +39,7 @@
 #include "compiler_driver-inl.h"
 #include "dex_compilation_unit.h"
 #include "dex_file-inl.h"
+#include "dex_instruction-inl.h"
 #include "dex/dex_to_dex_compiler.h"
 #include "dex/verification_results.h"
 #include "dex/verified_method.h"
@@ -365,7 +366,7 @@
       classes_to_compile_(compiled_classes),
       methods_to_compile_(compiled_methods),
       had_hard_verifier_failure_(false),
-      thread_count_(thread_count),
+      parallel_thread_count_(thread_count),
       stats_(new AOTCompilationStats),
       dump_stats_(dump_stats),
       dump_passes_(dump_passes),
@@ -435,24 +436,27 @@
                                 const std::vector<const DexFile*>& dex_files,
                                 TimingLogger* timings) {
   DCHECK(!Runtime::Current()->IsStarted());
-  std::unique_ptr<ThreadPool> thread_pool(
-      new ThreadPool("Compiler driver thread pool", thread_count_ - 1));
+
+  InitializeThreadPools();
+
   VLOG(compiler) << "Before precompile " << GetMemoryUsageString(false);
   // Precompile:
   // 1) Load image classes
   // 2) Resolve all classes
   // 3) Attempt to verify all classes
   // 4) Attempt to initialize image classes, and trivially initialized classes
-  PreCompile(class_loader, dex_files, thread_pool.get(), timings);
+  PreCompile(class_loader, dex_files, timings);
   // Compile:
   // 1) Compile all classes and methods enabled for compilation. May fall back to dex-to-dex
   //    compilation.
   if (!GetCompilerOptions().VerifyAtRuntime()) {
-    Compile(class_loader, dex_files, thread_pool.get(), timings);
+    Compile(class_loader, dex_files, timings);
   }
   if (dump_stats_) {
     stats_->Dump();
   }
+
+  FreeThreadPools();
 }
 
 static optimizer::DexToDexCompilationLevel GetDexToDexCompilationLevel(
@@ -653,8 +657,9 @@
   std::vector<const DexFile*> dex_files;
   dex_files.push_back(dex_file);
 
-  std::unique_ptr<ThreadPool> thread_pool(new ThreadPool("Compiler driver thread pool", 0U));
-  PreCompile(jclass_loader, dex_files, thread_pool.get(), timings);
+  InitializeThreadPools();
+
+  PreCompile(jclass_loader, dex_files, timings);
 
   // Can we run DEX-to-DEX compiler on this class ?
   optimizer::DexToDexCompilationLevel dex_to_dex_compilation_level =
@@ -677,20 +682,147 @@
                 true,
                 dex_cache);
 
+  FreeThreadPools();
+
   self->GetJniEnv()->DeleteGlobalRef(jclass_loader);
 }
 
-void CompilerDriver::Resolve(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                             ThreadPool* thread_pool, TimingLogger* timings) {
+void CompilerDriver::Resolve(jobject class_loader,
+                             const std::vector<const DexFile*>& dex_files,
+                             TimingLogger* timings) {
+  // Resolution allocates classes and needs to run single-threaded to be deterministic.
+  bool force_determinism = GetCompilerOptions().IsForceDeterminism();
+  ThreadPool* resolve_thread_pool = force_determinism
+                                     ? single_thread_pool_.get()
+                                     : parallel_thread_pool_.get();
+  size_t resolve_thread_count = force_determinism ? 1U : parallel_thread_count_;
+
   for (size_t i = 0; i != dex_files.size(); ++i) {
     const DexFile* dex_file = dex_files[i];
     CHECK(dex_file != nullptr);
-    ResolveDexFile(class_loader, *dex_file, dex_files, thread_pool, timings);
+    ResolveDexFile(class_loader,
+                   *dex_file,
+                   dex_files,
+                   resolve_thread_pool,
+                   resolve_thread_count,
+                   timings);
   }
 }
 
-void CompilerDriver::PreCompile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                                ThreadPool* thread_pool, TimingLogger* timings) {
+// Resolve const-strings in the code. Done to have deterministic allocation behavior. Right now
+// this is single-threaded for simplicity.
+// TODO: Collect the relevant string indices in parallel, then allocate them sequentially in a
+//       stable order.
+
+static void ResolveConstStrings(CompilerDriver* driver,
+                                const DexFile& dex_file,
+                                const DexFile::CodeItem* code_item) {
+  if (code_item == nullptr) {
+    // Abstract or native method.
+    return;
+  }
+
+  const uint16_t* code_ptr = code_item->insns_;
+  const uint16_t* code_end = code_item->insns_ + code_item->insns_size_in_code_units_;
+
+  while (code_ptr < code_end) {
+    const Instruction* inst = Instruction::At(code_ptr);
+    switch (inst->Opcode()) {
+      case Instruction::CONST_STRING: {
+        uint32_t string_index = inst->VRegB_21c();
+        driver->CanAssumeStringIsPresentInDexCache(dex_file, string_index);
+        break;
+      }
+      case Instruction::CONST_STRING_JUMBO: {
+        uint32_t string_index = inst->VRegB_31c();
+        driver->CanAssumeStringIsPresentInDexCache(dex_file, string_index);
+        break;
+      }
+
+      default:
+        break;
+    }
+
+    code_ptr += inst->SizeInCodeUnits();
+  }
+}
+
+static void ResolveConstStrings(CompilerDriver* driver,
+                                const std::vector<const DexFile*>& dex_files,
+                                TimingLogger* timings) {
+  for (const DexFile* dex_file : dex_files) {
+    TimingLogger::ScopedTiming t("Resolve const-string Strings", timings);
+
+    size_t class_def_count = dex_file->NumClassDefs();
+    for (size_t class_def_index = 0; class_def_index < class_def_count; ++class_def_index) {
+      const DexFile::ClassDef& class_def = dex_file->GetClassDef(class_def_index);
+
+      const uint8_t* class_data = dex_file->GetClassData(class_def);
+      if (class_data == nullptr) {
+        // empty class, probably a marker interface
+        continue;
+      }
+
+      ClassDataItemIterator it(*dex_file, class_data);
+      // Skip fields
+      while (it.HasNextStaticField()) {
+        it.Next();
+      }
+      while (it.HasNextInstanceField()) {
+        it.Next();
+      }
+
+      bool compilation_enabled = driver->IsClassToCompile(
+          dex_file->StringByTypeIdx(class_def.class_idx_));
+      if (!compilation_enabled) {
+        // Compilation is skipped, do not resolve const-string in code of this class.
+        // TODO: Make sure that inlining honors this.
+        continue;
+      }
+
+      // Direct methods.
+      int64_t previous_direct_method_idx = -1;
+      while (it.HasNextDirectMethod()) {
+        uint32_t method_idx = it.GetMemberIndex();
+        if (method_idx == previous_direct_method_idx) {
+          // smali can create dex files with two encoded_methods sharing the same method_idx
+          // http://code.google.com/p/smali/issues/detail?id=119
+          it.Next();
+          continue;
+        }
+        previous_direct_method_idx = method_idx;
+        ResolveConstStrings(driver, *dex_file, it.GetMethodCodeItem());
+        it.Next();
+      }
+      // Virtual methods.
+      int64_t previous_virtual_method_idx = -1;
+      while (it.HasNextVirtualMethod()) {
+        uint32_t method_idx = it.GetMemberIndex();
+        if (method_idx == previous_virtual_method_idx) {
+          // smali can create dex files with two encoded_methods sharing the same method_idx
+          // http://code.google.com/p/smali/issues/detail?id=119
+          it.Next();
+          continue;
+        }
+        previous_virtual_method_idx = method_idx;
+        ResolveConstStrings(driver, *dex_file, it.GetMethodCodeItem());
+        it.Next();
+      }
+      DCHECK(!it.HasNext());
+    }
+  }
+}
+
+inline void CompilerDriver::CheckThreadPools() {
+  DCHECK(parallel_thread_pool_ != nullptr);
+  DCHECK(single_thread_pool_ != nullptr);
+}
+
+void CompilerDriver::PreCompile(jobject class_loader,
+                                const std::vector<const DexFile*>& dex_files,
+                                TimingLogger* timings) {
+  CheckThreadPools();
+
   LoadImageClasses(timings);
   VLOG(compiler) << "LoadImageClasses: " << GetMemoryUsageString(false);
 
@@ -700,20 +832,26 @@
   // We need to resolve for never_verify since it needs to run dex to dex to add the
   // RETURN_VOID_NO_BARRIER.
   if (never_verify || verification_enabled) {
-    Resolve(class_loader, dex_files, thread_pool, timings);
+    Resolve(class_loader, dex_files, timings);
     VLOG(compiler) << "Resolve: " << GetMemoryUsageString(false);
   }
 
   if (never_verify) {
     VLOG(compiler) << "Verify none mode specified, skipping verification.";
-    SetVerified(class_loader, dex_files, thread_pool, timings);
+    SetVerified(class_loader, dex_files, timings);
   }
 
   if (!verification_enabled) {
     return;
   }
 
-  Verify(class_loader, dex_files, thread_pool, timings);
+  if (GetCompilerOptions().IsForceDeterminism() && IsBootImage()) {
+    // Resolve strings from const-string. Do this now to have a deterministic image.
+    ResolveConstStrings(this, dex_files, timings);
+    VLOG(compiler) << "Resolve const-strings: " << GetMemoryUsageString(false);
+  }
+
+  Verify(class_loader, dex_files, timings);
   VLOG(compiler) << "Verify: " << GetMemoryUsageString(false);
 
   if (had_hard_verifier_failure_ && GetCompilerOptions().AbortOnHardVerifierFailure()) {
@@ -721,7 +859,7 @@
                << "situations. Please check the log.";
   }
 
-  InitializeClasses(class_loader, dex_files, thread_pool, timings);
+  InitializeClasses(class_loader, dex_files, timings);
   VLOG(compiler) << "InitializeClasses: " << GetMemoryUsageString(false);
 
   UpdateImageClasses(timings);
@@ -1759,6 +1897,9 @@
 
     // Wait for all the worker threads to finish.
     thread_pool_->Wait(self, true, false);
+
+    // And stop the workers accepting jobs.
+    thread_pool_->StopWorkers(self);
   }
 
   size_t NextIndex() {
@@ -1995,9 +2136,12 @@
   const ParallelCompilationManager* const manager_;
 };
 
-void CompilerDriver::ResolveDexFile(jobject class_loader, const DexFile& dex_file,
+void CompilerDriver::ResolveDexFile(jobject class_loader,
+                                    const DexFile& dex_file,
                                     const std::vector<const DexFile*>& dex_files,
-                                    ThreadPool* thread_pool, TimingLogger* timings) {
+                                    ThreadPool* thread_pool,
+                                    size_t thread_count,
+                                    TimingLogger* timings) {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
 
   // TODO: we could resolve strings here, although the string table is largely filled with class
@@ -2010,27 +2154,43 @@
     // classdefs are resolved by ResolveClassFieldsAndMethods.
     TimingLogger::ScopedTiming t("Resolve Types", timings);
     ResolveTypeVisitor visitor(&context);
-    context.ForAll(0, dex_file.NumTypeIds(), &visitor, thread_count_);
+    context.ForAll(0, dex_file.NumTypeIds(), &visitor, thread_count);
   }
 
   TimingLogger::ScopedTiming t("Resolve MethodsAndFields", timings);
   ResolveClassFieldsAndMethodsVisitor visitor(&context);
-  context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count_);
+  context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count);
 }
 
-void CompilerDriver::SetVerified(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                                 ThreadPool* thread_pool, TimingLogger* timings) {
+void CompilerDriver::SetVerified(jobject class_loader,
+                                 const std::vector<const DexFile*>& dex_files,
+                                 TimingLogger* timings) {
+  // This can be run in parallel.
   for (const DexFile* dex_file : dex_files) {
     CHECK(dex_file != nullptr);
-    SetVerifiedDexFile(class_loader, *dex_file, dex_files, thread_pool, timings);
+    SetVerifiedDexFile(class_loader,
+                       *dex_file,
+                       dex_files,
+                       parallel_thread_pool_.get(),
+                       parallel_thread_count_,
+                       timings);
   }
 }
 
-void CompilerDriver::Verify(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                            ThreadPool* thread_pool, TimingLogger* timings) {
+void CompilerDriver::Verify(jobject class_loader,
+                            const std::vector<const DexFile*>& dex_files,
+                            TimingLogger* timings) {
+  // Note: verification should not be pulling in classes anymore when compiling the boot image,
+  //       as all should have been resolved before. As such, doing this in parallel should still
+  //       be deterministic.
   for (const DexFile* dex_file : dex_files) {
     CHECK(dex_file != nullptr);
-    VerifyDexFile(class_loader, *dex_file, dex_files, thread_pool, timings);
+    VerifyDexFile(class_loader,
+                  *dex_file,
+                  dex_files,
+                  parallel_thread_pool_.get(),
+                  parallel_thread_count_,
+                  timings);
   }
 }
 
@@ -2104,15 +2264,18 @@
   const ParallelCompilationManager* const manager_;
 };
 
-void CompilerDriver::VerifyDexFile(jobject class_loader, const DexFile& dex_file,
+void CompilerDriver::VerifyDexFile(jobject class_loader,
+                                   const DexFile& dex_file,
                                    const std::vector<const DexFile*>& dex_files,
-                                   ThreadPool* thread_pool, TimingLogger* timings) {
+                                   ThreadPool* thread_pool,
+                                   size_t thread_count,
+                                   TimingLogger* timings) {
   TimingLogger::ScopedTiming t("Verify Dex File", timings);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   ParallelCompilationManager context(class_linker, class_loader, this, &dex_file, dex_files,
                                      thread_pool);
   VerifyClassVisitor visitor(&context);
-  context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count_);
+  context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count);
 }
 
 class SetVerifiedClassVisitor : public CompilationVisitor {
@@ -2162,15 +2325,18 @@
   const ParallelCompilationManager* const manager_;
 };
 
-void CompilerDriver::SetVerifiedDexFile(jobject class_loader, const DexFile& dex_file,
+void CompilerDriver::SetVerifiedDexFile(jobject class_loader,
+                                        const DexFile& dex_file,
                                         const std::vector<const DexFile*>& dex_files,
-                                        ThreadPool* thread_pool, TimingLogger* timings) {
+                                        ThreadPool* thread_pool,
+                                        size_t thread_count,
+                                        TimingLogger* timings) {
   TimingLogger::ScopedTiming t("Verify Dex File", timings);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   ParallelCompilationManager context(class_linker, class_loader, this, &dex_file, dex_files,
                                      thread_pool);
   SetVerifiedClassVisitor visitor(&context);
-  context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count_);
+  context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count);
 }
 
 class InitializeClassVisitor : public CompilationVisitor {
@@ -2271,31 +2437,37 @@
   const ParallelCompilationManager* const manager_;
 };
 
-void CompilerDriver::InitializeClasses(jobject jni_class_loader, const DexFile& dex_file,
+void CompilerDriver::InitializeClasses(jobject jni_class_loader,
+                                       const DexFile& dex_file,
                                        const std::vector<const DexFile*>& dex_files,
-                                       ThreadPool* thread_pool, TimingLogger* timings) {
+                                       TimingLogger* timings) {
   TimingLogger::ScopedTiming t("InitializeNoClinit", timings);
+
+  // Initialization allocates objects and needs to run single-threaded to be deterministic.
+  bool force_determinism = GetCompilerOptions().IsForceDeterminism();
+  ThreadPool* init_thread_pool = force_determinism
+                                     ? single_thread_pool_.get()
+                                     : parallel_thread_pool_.get();
+  size_t init_thread_count = force_determinism ? 1U : parallel_thread_count_;
+
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   ParallelCompilationManager context(class_linker, jni_class_loader, this, &dex_file, dex_files,
-                                     thread_pool);
-  size_t thread_count;
+                                     init_thread_pool);
   if (IsBootImage()) {
     // TODO: remove this when transactional mode supports multithreading.
-    thread_count = 1U;
-  } else {
-    thread_count = thread_count_;
+    init_thread_count = 1U;
   }
   InitializeClassVisitor visitor(&context);
-  context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count);
+  context.ForAll(0, dex_file.NumClassDefs(), &visitor, init_thread_count);
 }
 
 void CompilerDriver::InitializeClasses(jobject class_loader,
                                        const std::vector<const DexFile*>& dex_files,
-                                       ThreadPool* thread_pool, TimingLogger* timings) {
+                                       TimingLogger* timings) {
   for (size_t i = 0; i != dex_files.size(); ++i) {
     const DexFile* dex_file = dex_files[i];
     CHECK(dex_file != nullptr);
-    InitializeClasses(class_loader, *dex_file, dex_files, thread_pool, timings);
+    InitializeClasses(class_loader, *dex_file, dex_files, timings);
   }
   if (IsBootImage()) {
     // Prune garbage objects created during aborted transactions.
@@ -2303,8 +2475,9 @@
   }
 }
 
-void CompilerDriver::Compile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                             ThreadPool* thread_pool, TimingLogger* timings) {
+void CompilerDriver::Compile(jobject class_loader,
+                             const std::vector<const DexFile*>& dex_files,
+                             TimingLogger* timings) {
   if (kDebugProfileGuidedCompilation) {
     LOG(INFO) << "[ProfileGuidedCompilation] " <<
         ((profile_compilation_info_ == nullptr)
@@ -2314,7 +2487,12 @@
   for (size_t i = 0; i != dex_files.size(); ++i) {
     const DexFile* dex_file = dex_files[i];
     CHECK(dex_file != nullptr);
-    CompileDexFile(class_loader, *dex_file, dex_files, thread_pool, timings);
+    CompileDexFile(class_loader,
+                   *dex_file,
+                   dex_files,
+                   parallel_thread_pool_.get(),
+                   parallel_thread_count_,
+                   timings);
   }
   VLOG(compiler) << "Compile: " << GetMemoryUsageString(false);
 }
@@ -2421,14 +2599,17 @@
   const ParallelCompilationManager* const manager_;
 };
 
-void CompilerDriver::CompileDexFile(jobject class_loader, const DexFile& dex_file,
+void CompilerDriver::CompileDexFile(jobject class_loader,
+                                    const DexFile& dex_file,
                                     const std::vector<const DexFile*>& dex_files,
-                                    ThreadPool* thread_pool, TimingLogger* timings) {
+                                    ThreadPool* thread_pool,
+                                    size_t thread_count,
+                                    TimingLogger* timings) {
   TimingLogger::ScopedTiming t("Compile Dex File", timings);
   ParallelCompilationManager context(Runtime::Current()->GetClassLinker(), class_loader, this,
                                      &dex_file, dex_files, thread_pool);
   CompileClassVisitor visitor(&context);
-  context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count_);
+  context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count);
 }
 
 void CompilerDriver::AddCompiledMethod(const MethodReference& method_ref,
@@ -2589,4 +2770,16 @@
   return true;
 }
 
+void CompilerDriver::InitializeThreadPools() {
+  size_t parallel_count = parallel_thread_count_ > 0 ? parallel_thread_count_ - 1 : 0;
+  parallel_thread_pool_.reset(
+      new ThreadPool("Compiler driver thread pool", parallel_count));
+  single_thread_pool_.reset(new ThreadPool("Single-threaded Compiler driver thread pool", 0));
+}
+
+void CompilerDriver::FreeThreadPools() {
+  parallel_thread_pool_.reset();
+  single_thread_pool_.reset();
+}
+
 }  // namespace art
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 6a2f7bf..5e35cbb 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -411,7 +411,7 @@
   }
 
   size_t GetThreadCount() const {
-    return thread_count_;
+    return parallel_thread_count_;
   }
 
   bool GetDumpStats() const {
@@ -550,8 +550,9 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
  private:
-  void PreCompile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                  ThreadPool* thread_pool, TimingLogger* timings)
+  void PreCompile(jobject class_loader,
+                  const std::vector<const DexFile*>& dex_files,
+                  TimingLogger* timings)
       REQUIRES(!Locks::mutator_lock_, !compiled_classes_lock_);
 
   void LoadImageClasses(TimingLogger* timings) REQUIRES(!Locks::mutator_lock_);
@@ -559,49 +560,71 @@
   // Attempt to resolve all type, methods, fields, and strings
   // referenced from code in the dex file following PathClassLoader
   // ordering semantics.
-  void Resolve(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-               ThreadPool* thread_pool, TimingLogger* timings)
+  void Resolve(jobject class_loader,
+               const std::vector<const DexFile*>& dex_files,
+               TimingLogger* timings)
       REQUIRES(!Locks::mutator_lock_);
-  void ResolveDexFile(jobject class_loader, const DexFile& dex_file,
+  void ResolveDexFile(jobject class_loader,
+                      const DexFile& dex_file,
                       const std::vector<const DexFile*>& dex_files,
-                      ThreadPool* thread_pool, TimingLogger* timings)
+                      ThreadPool* thread_pool,
+                      size_t thread_count,
+                      TimingLogger* timings)
       REQUIRES(!Locks::mutator_lock_);
 
-  void Verify(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-              ThreadPool* thread_pool, TimingLogger* timings);
-  void VerifyDexFile(jobject class_loader, const DexFile& dex_file,
+  void Verify(jobject class_loader,
+              const std::vector<const DexFile*>& dex_files,
+              TimingLogger* timings);
+  void VerifyDexFile(jobject class_loader,
+                     const DexFile& dex_file,
                      const std::vector<const DexFile*>& dex_files,
-                     ThreadPool* thread_pool, TimingLogger* timings)
+                     ThreadPool* thread_pool,
+                     size_t thread_count,
+                     TimingLogger* timings)
       REQUIRES(!Locks::mutator_lock_);
 
-  void SetVerified(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                   ThreadPool* thread_pool, TimingLogger* timings);
-  void SetVerifiedDexFile(jobject class_loader, const DexFile& dex_file,
+  void SetVerified(jobject class_loader,
+                   const std::vector<const DexFile*>& dex_files,
+                   TimingLogger* timings);
+  void SetVerifiedDexFile(jobject class_loader,
+                          const DexFile& dex_file,
                           const std::vector<const DexFile*>& dex_files,
-                          ThreadPool* thread_pool, TimingLogger* timings)
+                          ThreadPool* thread_pool,
+                          size_t thread_count,
+                          TimingLogger* timings)
       REQUIRES(!Locks::mutator_lock_);
 
-  void InitializeClasses(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                         ThreadPool* thread_pool, TimingLogger* timings)
-      REQUIRES(!Locks::mutator_lock_, !compiled_classes_lock_);
-  void InitializeClasses(jobject class_loader, const DexFile& dex_file,
+  void InitializeClasses(jobject class_loader,
                          const std::vector<const DexFile*>& dex_files,
-                         ThreadPool* thread_pool, TimingLogger* timings)
+                         TimingLogger* timings)
+      REQUIRES(!Locks::mutator_lock_, !compiled_classes_lock_);
+  void InitializeClasses(jobject class_loader,
+                         const DexFile& dex_file,
+                         const std::vector<const DexFile*>& dex_files,
+                         TimingLogger* timings)
       REQUIRES(!Locks::mutator_lock_, !compiled_classes_lock_);
 
   void UpdateImageClasses(TimingLogger* timings) REQUIRES(!Locks::mutator_lock_);
   static void FindClinitImageClassesCallback(mirror::Object* object, void* arg)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void Compile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-               ThreadPool* thread_pool, TimingLogger* timings);
-  void CompileDexFile(jobject class_loader, const DexFile& dex_file,
+  void Compile(jobject class_loader,
+               const std::vector<const DexFile*>& dex_files,
+               TimingLogger* timings);
+  void CompileDexFile(jobject class_loader,
+                      const DexFile& dex_file,
                       const std::vector<const DexFile*>& dex_files,
-                      ThreadPool* thread_pool, TimingLogger* timings)
+                      ThreadPool* thread_pool,
+                      size_t thread_count,
+                      TimingLogger* timings)
       REQUIRES(!Locks::mutator_lock_);
 
   bool MayInlineInternal(const DexFile* inlined_from, const DexFile* inlined_into) const;
 
+  void InitializeThreadPools();
+  void FreeThreadPools();
+  void CheckThreadPools();
+
   const CompilerOptions* const compiler_options_;
   VerificationResults* const verification_results_;
   DexFileToMethodInlinerMap* const method_inliner_map_;
@@ -652,7 +675,12 @@
 
   bool had_hard_verifier_failure_;
 
-  size_t thread_count_;
+  // A thread pool that can (potentially) run tasks in parallel.
+  std::unique_ptr<ThreadPool> parallel_thread_pool_;
+  size_t parallel_thread_count_;
+
+  // A thread pool that guarantees running single-threaded on the main thread.
+  std::unique_ptr<ThreadPool> single_thread_pool_;
 
   class AOTCompilationStats;
   std::unique_ptr<AOTCompilationStats> stats_;
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc
index 2644528..d5667ad 100644
--- a/compiler/driver/compiler_options.cc
+++ b/compiler/driver/compiler_options.cc
@@ -46,7 +46,8 @@
       abort_on_hard_verifier_failure_(false),
       init_failure_output_(nullptr),
       dump_cfg_file_name_(""),
-      dump_cfg_append_(false) {
+      dump_cfg_append_(false),
+      force_determinism_(false) {
 }
 
 CompilerOptions::~CompilerOptions() {
@@ -75,7 +76,8 @@
                                  std::ostream* init_failure_output,
                                  bool abort_on_hard_verifier_failure,
                                  const std::string& dump_cfg_file_name,
-                                 bool dump_cfg_append
+                                 bool dump_cfg_append,
+                                 bool force_determinism
                                  ) :  // NOLINT(whitespace/parens)
     compiler_filter_(compiler_filter),
     huge_method_threshold_(huge_method_threshold),
@@ -100,7 +102,8 @@
     abort_on_hard_verifier_failure_(abort_on_hard_verifier_failure),
     init_failure_output_(init_failure_output),
     dump_cfg_file_name_(dump_cfg_file_name),
-    dump_cfg_append_(dump_cfg_append) {
+    dump_cfg_append_(dump_cfg_append),
+    force_determinism_(force_determinism) {
 }
 
 void CompilerOptions::ParseHugeMethodMax(const StringPiece& option, UsageFn Usage) {
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index d47fc2a..1a9bf4b 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -85,7 +85,8 @@
                   std::ostream* init_failure_output,
                   bool abort_on_hard_verifier_failure,
                   const std::string& dump_cfg_file_name,
-                  bool dump_cfg_append);
+                  bool dump_cfg_append,
+                  bool force_determinism);
 
   CompilerFilter GetCompilerFilter() const {
     return compiler_filter_;
@@ -234,6 +235,10 @@
     return dump_cfg_append_;
   }
 
+  bool IsForceDeterminism() const {
+    return force_determinism_;
+  }
+
  private:
   void ParseDumpInitFailures(const StringPiece& option, UsageFn Usage);
   void ParsePassOptions(const StringPiece& option, UsageFn Usage);
@@ -286,6 +291,10 @@
   std::string dump_cfg_file_name_;
   bool dump_cfg_append_;
 
+  // Whether the compiler should trade performance for determinism to guarantee exactly reproducable
+  // outcomes.
+  bool force_determinism_;
+
   friend class Dex2Oat;
 
   DISALLOW_COPY_AND_ASSIGN(CompilerOptions);
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 72c615e..c8720ea 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -1866,6 +1866,9 @@
   orig->FixupNativePointers(copy, target_ptr_size_, NativeLocationVisitor(this, oat_filename));
   FixupClassVisitor visitor(this, copy);
   static_cast<mirror::Object*>(orig)->VisitReferences(visitor, visitor);
+
+  // Remove the clinitThreadId. This is required for image determinism.
+  copy->SetClinitThreadId(static_cast<pid_t>(0));
 }
 
 void ImageWriter::FixupObject(Object* orig, Object* copy) {
@@ -1993,6 +1996,10 @@
       mirror::DexCache::SetElementPtrSize(copy_fields, i, copy, target_ptr_size_);
     }
   }
+
+  // Remove the DexFile pointers. They will be fixed up when the runtime loads the oat file. Leaving
+  // compiler pointers in here will make the output non-deterministic.
+  copy_dex_cache->SetDexFile(nullptr);
 }
 
 const uint8_t* ImageWriter::GetOatAddress(OatAddress type) const {
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index 3a3275a..478d169 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -112,7 +112,8 @@
       /* init_failure_output */ nullptr,
       /* abort_on_hard_verifier_failure */ false,
       /* dump_cfg_file_name */ "",
-      /* dump_cfg_append */ false));
+      /* dump_cfg_append */ false,
+      /* force_determinism */ false));
   for (const std::string& argument : Runtime::Current()->GetCompilerOptions()) {
     compiler_options_->ParseCompilerOption(argument, Usage);
   }
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 86f51e1..6c4c994 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -554,7 +554,8 @@
       dump_slow_timing_(kIsDebugBuild),
       swap_fd_(-1),
       app_image_fd_(kInvalidFd),
-      timings_(timings) {}
+      timings_(timings),
+      force_determinism_(false) {}
 
   ~Dex2Oat() {
     // Log completion time before deleting the runtime_, because this accesses
@@ -916,6 +917,12 @@
 
     // Fill some values into the key-value store for the oat header.
     key_value_store_.reset(new SafeMap<std::string, std::string>());
+
+    // Automatically force determinism for the boot image in a host build.
+    if (!kIsTargetBuild && IsBootImage()) {
+      force_determinism_ = true;
+    }
+    compiler_options_->force_determinism_ = force_determinism_;
   }
 
   void ExpandOatAndImageFilenames() {
@@ -1159,6 +1166,8 @@
         multi_image_ = true;
       } else if (option.starts_with("--no-inline-from=")) {
         no_inline_from_string_ = option.substr(strlen("--no-inline-from=")).data();
+      } else if (option == "--force-determinism") {
+        force_determinism_ = true;
       } else if (!compiler_options_->ParseCompilerOption(option, Usage)) {
         Usage("Unknown argument %s", option.data());
       }
@@ -2105,6 +2114,21 @@
     // foreground collector by default for dex2oat.
     raw_options.push_back(std::make_pair("-XX:DisableHSpaceCompactForOOM", nullptr));
 
+    // If we're asked to be deterministic, ensure non-concurrent GC for determinism. Also
+    // force the free-list implementation for large objects.
+    if (compiler_options_->IsForceDeterminism()) {
+      raw_options.push_back(std::make_pair("-Xgc:nonconcurrent", nullptr));
+      raw_options.push_back(std::make_pair("-XX:LargeObjectSpace=freelist", nullptr));
+
+      // We also need to turn off the nonmoving space. For that, we need to disable HSpace
+      // compaction (done above) and ensure that neither foreground nor background collectors
+      // are concurrent.
+      raw_options.push_back(std::make_pair("-XX:BackgroundGC=nonconcurrent", nullptr));
+
+      // To make identity hashcode deterministic, set a known seed.
+      mirror::Object::SetHashCodeSeed(987654321U);
+    }
+
     if (!Runtime::ParseOptions(raw_options, false, runtime_options)) {
       LOG(ERROR) << "Failed to parse runtime options";
       return false;
@@ -2406,6 +2430,9 @@
   // Backing storage.
   std::vector<std::string> char_backing_storage_;
 
+  // See CompilerOptions.force_determinism_.
+  bool force_determinism_;
+
   DISALLOW_IMPLICIT_CONSTRUCTORS(Dex2Oat);
 };
 
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 7b9ce5b..d32fed7 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -1656,7 +1656,7 @@
       stats_.file_bytes = file->GetLength();
     }
     size_t header_bytes = sizeof(ImageHeader);
-    const auto& bitmap_section = image_header_.GetImageSection(ImageHeader::kSectionImageBitmap);
+    const auto& object_section = image_header_.GetImageSection(ImageHeader::kSectionObjects);
     const auto& field_section = image_header_.GetImageSection(ImageHeader::kSectionArtFields);
     const auto& method_section = image_header_.GetMethodsSection();
     const auto& dex_cache_arrays_section = image_header_.GetImageSection(
@@ -1665,17 +1665,46 @@
         ImageHeader::kSectionInternedStrings);
     const auto& class_table_section = image_header_.GetImageSection(
         ImageHeader::kSectionClassTable);
+    const auto& bitmap_section = image_header_.GetImageSection(ImageHeader::kSectionImageBitmap);
+
     stats_.header_bytes = header_bytes;
-    stats_.alignment_bytes += RoundUp(header_bytes, kObjectAlignment) - header_bytes;
-    // Add padding between the field and method section.
-    // (Field section is 4-byte aligned, method section is 8-byte aligned on 64-bit targets.)
-    stats_.alignment_bytes += method_section.Offset() -
-        (field_section.Offset() + field_section.Size());
-    // Add padding between the dex cache arrays section and the intern table. (Dex cache
-    // arrays section is 4-byte aligned on 32-bit targets, intern table is 8-byte aligned.)
-    stats_.alignment_bytes += intern_section.Offset() -
-        (dex_cache_arrays_section.Offset() + dex_cache_arrays_section.Size());
-    stats_.alignment_bytes += bitmap_section.Offset() - image_header_.GetImageSize();
+
+    // Objects are kObjectAlignment-aligned.
+    // CHECK_EQ(RoundUp(header_bytes, kObjectAlignment), object_section.Offset());
+    if (object_section.Offset() > header_bytes) {
+      stats_.alignment_bytes += object_section.Offset() - header_bytes;
+    }
+
+    // Field section is 4-byte aligned.
+    constexpr size_t kFieldSectionAlignment = 4U;
+    uint32_t end_objects = object_section.Offset() + object_section.Size();
+    CHECK_EQ(RoundUp(end_objects, kFieldSectionAlignment), field_section.Offset());
+    stats_.alignment_bytes += field_section.Offset() - end_objects;
+
+    // Method section is 4/8 byte aligned depending on target. Just check for 4-byte alignment.
+    uint32_t end_fields = field_section.Offset() + field_section.Size();
+    CHECK_ALIGNED(method_section.Offset(), 4);
+    stats_.alignment_bytes += method_section.Offset() - end_fields;
+
+    // Dex cache arrays section is aligned depending on the target. Just check for 4-byte alignment.
+    uint32_t end_methods = method_section.Offset() + method_section.Size();
+    CHECK_ALIGNED(dex_cache_arrays_section.Offset(), 4);
+    stats_.alignment_bytes += dex_cache_arrays_section.Offset() - end_methods;
+
+    // Intern table is 8-byte aligned.
+    uint32_t end_caches = dex_cache_arrays_section.Offset() + dex_cache_arrays_section.Size();
+    CHECK_EQ(RoundUp(end_caches, 8U), intern_section.Offset());
+    stats_.alignment_bytes += intern_section.Offset() - end_caches;
+
+    // Add space between intern table and class table.
+    uint32_t end_intern = intern_section.Offset() + intern_section.Size();
+    stats_.alignment_bytes += class_table_section.Offset() - end_intern;
+
+    // Add space between class table and bitmap. Expect the bitmap to be page-aligned.
+    uint32_t end_ctable = class_table_section.Offset() + class_table_section.Size();
+    CHECK_ALIGNED(bitmap_section.Offset(), kPageSize);
+    stats_.alignment_bytes += bitmap_section.Offset() - end_ctable;
+
     stats_.bitmap_bytes += bitmap_section.Size();
     stats_.art_field_bytes += field_section.Size();
     stats_.art_method_bytes += method_section.Size();
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 8cd8d73..5bb7cf3 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -114,6 +114,9 @@
 // timeout on how long we wait for finalizers to run. b/21544853
 static constexpr uint64_t kNativeAllocationFinalizeTimeout = MsToNs(250u);
 
+// For deterministic compilation, we need the heap to be at a well-known address.
+static constexpr uint32_t kAllocSpaceBeginForDeterministicAoT = 0x40000000;
+
 Heap::Heap(size_t initial_size,
            size_t growth_limit,
            size_t min_free,
@@ -352,6 +355,11 @@
   }
   std::unique_ptr<MemMap> main_mem_map_1;
   std::unique_ptr<MemMap> main_mem_map_2;
+
+  // Gross hack to make dex2oat deterministic.
+  if (requested_alloc_space_begin == nullptr && Runtime::Current()->IsAotCompiler()) {
+    requested_alloc_space_begin = reinterpret_cast<uint8_t*>(kAllocSpaceBeginForDeterministicAoT);
+  }
   uint8_t* request_begin = requested_alloc_space_begin;
   if (request_begin != nullptr && separate_non_moving_space) {
     request_begin += non_moving_space_capacity;