Merge "Enable multi-threaded Quick compilation" into dalvik-dev
diff --git a/src/compiler.cc b/src/compiler.cc
index 7bec994..b0d373b 100644
--- a/src/compiler.cc
+++ b/src/compiler.cc
@@ -322,6 +322,8 @@
   }
   VLOG(compiler) << "dlopen(\"" << compiler_so_name << "\", RTLD_LAZY) returned " << compiler_library_;
 
+  CHECK_PTHREAD_CALL(pthread_key_create, (&tls_key_, NULL), "compiler tls key");
+
 #if defined(ART_USE_LLVM_COMPILER) || defined(ART_USE_GREENLAND_COMPILER)
   // Initialize compiler_context_
   typedef void (*InitCompilerContextFn)(Compiler&);
@@ -431,6 +433,16 @@
   }
 }
 
+CompilerTls* Compiler::GetTls() {
+  // Lazily create thread-local storage
+  CompilerTls* res = static_cast<CompilerTls*>(pthread_getspecific(tls_key_));
+  if (res == NULL) {
+    res = new CompilerTls();
+    CHECK_PTHREAD_CALL(pthread_setspecific, (tls_key_, res), "compiler tls");
+  }
+  return res;
+}
+
 ByteArray* Compiler::CreateResolutionStub(InstructionSet instruction_set,
                                           Runtime::TrampolineType type) {
   switch (instruction_set) {
diff --git a/src/compiler.h b/src/compiler.h
index c5f19f7..5e9dbd7 100644
--- a/src/compiler.h
+++ b/src/compiler.h
@@ -40,6 +40,22 @@
 class OatCompilationUnit;
 class TimingLogger;
 
+// Thread-local storage compiler worker threads
+class CompilerTls {
+#if defined(ART_USE_QUICK_COMPILER)
+  public:
+    CompilerTls() : llvm_info_(NULL) {}
+    ~CompilerTls() {}
+
+    void* GetLLVMInfo() { return llvm_info_; }
+
+    void SetLLVMInfo(void* llvm_info) { llvm_info_ = llvm_info; }
+
+  private:
+    void* llvm_info_;
+#endif
+};
+
 class Compiler {
  public:
   // Create a compiler targeting the requested "instruction_set".
@@ -72,6 +88,8 @@
     return image_;
   }
 
+  CompilerTls* GetTls();
+
   // Stub to throw AbstractMethodError
   static ByteArray* CreateAbstractMethodErrorStub(InstructionSet instruction_set)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -347,6 +365,8 @@
                                                     const char* shorty, uint32_t shorty_len);
   CreateInvokeStubFn create_invoke_stub_;
 
+  pthread_key_t tls_key_;
+
 #if defined(ART_USE_LLVM_COMPILER)
   typedef CompiledInvokeStub* (*CreateProxyStubFn)
       (Compiler& compiler, const char* shorty, uint32_t shorty_len);
diff --git a/src/compiler/Compiler.h b/src/compiler/Compiler.h
index 8bda3fe..7eb32c2 100644
--- a/src/compiler/Compiler.h
+++ b/src/compiler/Compiler.h
@@ -178,10 +178,10 @@
 };
 
 #if defined(ART_USE_QUICK_COMPILER)
-class QuickCompiler {
+class LLVMInfo {
   public:
-    QuickCompiler();
-    ~QuickCompiler();
+    LLVMInfo();
+    ~LLVMInfo();
 
     llvm::LLVMContext* GetLLVMContext() {
       return llvm_context_.get();
diff --git a/src/compiler/CompilerIR.h b/src/compiler/CompilerIR.h
index 593fce5..5a10831 100644
--- a/src/compiler/CompilerIR.h
+++ b/src/compiler/CompilerIR.h
@@ -549,7 +549,7 @@
   Checkstats* checkstats;
 #if defined(ART_USE_QUICK_COMPILER)
   bool genBitcode;
-  QuickCompiler* quick_compiler;
+  LLVMInfo* llvm_info;
   llvm::LLVMContext* context;
   llvm::Module* module;
   llvm::Function* func;
diff --git a/src/compiler/Frontend.cc b/src/compiler/Frontend.cc
index d1259b7..72eb8a1 100644
--- a/src/compiler/Frontend.cc
+++ b/src/compiler/Frontend.cc
@@ -21,10 +21,24 @@
 #include "object.h"
 #include "runtime.h"
 
+#if defined(ART_USE_QUICK_COMPILER)
+#include <llvm/Support/Threading.h>
+
+namespace {
+  pthread_once_t llvm_multi_init = PTHREAD_ONCE_INIT;
+  void InitializeLLVMForQuick() {
+    llvm::llvm_start_multithreaded();
+  }
+}
+#endif
+
 namespace art {
 
 #if defined(ART_USE_QUICK_COMPILER)
-QuickCompiler::QuickCompiler() {
+LLVMInfo::LLVMInfo() {
+#if !defined(ART_USE_LLVM_COMPILER)
+  pthread_once(&llvm_multi_init, InitializeLLVMForQuick);
+#endif
   // Create context, module, intrinsic helper & ir builder
   llvm_context_.reset(new llvm::LLVMContext());
   llvm_module_ = new llvm::Module("art", *llvm_context_);
@@ -33,17 +47,17 @@
   ir_builder_.reset(new greenland::IRBuilder(*llvm_context_, *llvm_module_, *intrinsic_helper_));
 }
 
-QuickCompiler::~QuickCompiler() {
+LLVMInfo::~LLVMInfo() {
 }
 
 extern "C" void ArtInitQuickCompilerContext(art::Compiler& compiler) {
   CHECK(compiler.GetCompilerContext() == NULL);
-  QuickCompiler* quickCompiler = new QuickCompiler();
-  compiler.SetCompilerContext(quickCompiler);
+  LLVMInfo* llvmInfo = new LLVMInfo();
+  compiler.SetCompilerContext(llvmInfo);
 }
 
 extern "C" void ArtUnInitQuickCompilerContext(art::Compiler& compiler) {
-  delete reinterpret_cast<QuickCompiler*>(compiler.GetCompilerContext());
+  delete reinterpret_cast<LLVMInfo*>(compiler.GetCompilerContext());
   compiler.SetCompilerContext(NULL);
 }
 #endif
@@ -777,7 +791,7 @@
                               uint32_t method_idx, jobject class_loader,
                               const DexFile& dex_file
 #if defined(ART_USE_QUICK_COMPILER)
-                              , QuickCompiler* quick_compiler,
+                              , LLVMInfo* llvm_info,
                               bool gbcOnly
 #endif
                              )
@@ -812,14 +826,7 @@
   DCHECK((cUnit->instructionSet == kThumb2) ||
          (cUnit->instructionSet == kX86) ||
          (cUnit->instructionSet == kMips));
-  if (gbcOnly) {
-    cUnit->quick_compiler = quick_compiler;
-  } else {
-    // TODO: We need one LLVMContext per thread.
-    cUnit->quick_compiler =
-        reinterpret_cast<QuickCompiler*>(compiler.GetCompilerContext());
-  }
-  DCHECK(cUnit->quick_compiler != NULL);
+  cUnit->llvm_info = llvm_info;
   if (cUnit->instructionSet == kThumb2) {
     // TODO: remove this once x86 is tested
     cUnit->genBitcode = true;
@@ -1257,10 +1264,10 @@
                            uint32_t access_flags, InvokeType invoke_type,
                            uint32_t method_idx, jobject class_loader,
                            const DexFile& dex_file,
-                           QuickCompiler* quick_compiler)
+                           LLVMInfo* llvm_info)
 {
   compileMethod(compiler, code_item, access_flags, invoke_type, method_idx, class_loader,
-                dex_file, quick_compiler, true);
+                dex_file, llvm_info, true);
 }
 #else
 CompiledMethod* oatCompileMethod(Compiler& compiler,
diff --git a/src/compiler/codegen/MethodBitcode.cc b/src/compiler/codegen/MethodBitcode.cc
index 58678a0..cf07ea4 100644
--- a/src/compiler/codegen/MethodBitcode.cc
+++ b/src/compiler/codegen/MethodBitcode.cc
@@ -170,11 +170,20 @@
 }
 void initIR(CompilationUnit* cUnit)
 {
-  QuickCompiler* quick = cUnit->quick_compiler;
-  cUnit->context = quick->GetLLVMContext();
-  cUnit->module = quick->GetLLVMModule();
-  cUnit->intrinsic_helper = quick->GetIntrinsicHelper();
-  cUnit->irb = quick->GetIRBuilder();
+  LLVMInfo* llvmInfo = cUnit->llvm_info;
+  if (llvmInfo == NULL) {
+    CompilerTls* tls = cUnit->compiler->GetTls();
+    CHECK(tls != NULL);
+    llvmInfo = static_cast<LLVMInfo*>(tls->GetLLVMInfo());
+    if (llvmInfo == NULL) {
+      llvmInfo = new LLVMInfo();
+      tls->SetLLVMInfo(llvmInfo);
+    }
+  }
+  cUnit->context = llvmInfo->GetLLVMContext();
+  cUnit->module = llvmInfo->GetLLVMModule();
+  cUnit->intrinsic_helper = llvmInfo->GetIntrinsicHelper();
+  cUnit->irb = llvmInfo->GetIRBuilder();
 }
 
 const char* llvmSSAName(CompilationUnit* cUnit, int ssaReg) {
diff --git a/src/compiler_llvm/compilation_unit.cc b/src/compiler_llvm/compilation_unit.cc
index 95073ea..ba71aee 100644
--- a/src/compiler_llvm/compilation_unit.cc
+++ b/src/compiler_llvm/compilation_unit.cc
@@ -172,9 +172,9 @@
 #else
   compiler_ = NULL;
   oat_compilation_unit_ = NULL;
-  quick_ctx_.reset(new QuickCompiler());
-  context_.reset(quick_ctx_->GetLLVMContext());
-  module_ = quick_ctx_->GetLLVMModule();
+  llvm_info_.reset(new LLVMInfo());
+  context_.reset(llvm_info_->GetLLVMContext());
+  module_ = llvm_info_->GetLLVMModule();
 #endif
 
   // Include the runtime function declaration
@@ -211,7 +211,7 @@
 #if defined(ART_USE_DEXLANG_FRONTEND)
   delete dex_lang_ctx_;
 #elif defined(ART_USE_QUICK_COMPILER)
-  llvm::LLVMContext* llvm_context = context_.release(); // Managed by quick_ctx_
+  llvm::LLVMContext* llvm_context = context_.release(); // Managed by llvm_info_
   CHECK(llvm_context != NULL);
 #endif
 }
@@ -331,7 +331,7 @@
 #if defined(ART_USE_DEXLANG_FRONTEND)
     fpm.add(CreateGBCExpanderPass(dex_lang_ctx_->GetIntrinsicHelper(), *irb_.get()));
 #elif defined(ART_USE_QUICK_COMPILER)
-    fpm.add(CreateGBCExpanderPass(*quick_ctx_->GetIntrinsicHelper(), *irb_.get(),
+    fpm.add(CreateGBCExpanderPass(*llvm_info_->GetIntrinsicHelper(), *irb_.get(),
                                   compiler_, oat_compilation_unit_));
 #endif
     fpm.add(new ::AddSuspendCheckToLoopLatchPass(irb_.get()));
@@ -341,7 +341,7 @@
 #if defined(ART_USE_DEXLANG_FRONTEND)
     fpm2.add(CreateGBCExpanderPass(dex_lang_ctx_->GetIntrinsicHelper(), *irb_.get()));
 #elif defined(ART_USE_QUICK_COMPILER)
-    fpm2.add(CreateGBCExpanderPass(*quick_ctx_->GetIntrinsicHelper(), *irb_.get(),
+    fpm2.add(CreateGBCExpanderPass(*llvm_info_->GetIntrinsicHelper(), *irb_.get(),
                                    compiler_, oat_compilation_unit_));
 #endif
     fpm2.add(new ::AddSuspendCheckToLoopLatchPass(irb_.get()));
diff --git a/src/compiler_llvm/compilation_unit.h b/src/compiler_llvm/compilation_unit.h
index c4fbae4..6ad7ee1 100644
--- a/src/compiler_llvm/compilation_unit.h
+++ b/src/compiler_llvm/compilation_unit.h
@@ -91,8 +91,8 @@
   }
 
 #if defined(ART_USE_QUICK_COMPILER)
-  QuickCompiler* GetQuickContext() const {
-    return quick_ctx_.get();
+  LLVMInfo* GetQuickContext() const {
+    return llvm_info_.get();
   }
   void SetCompiler(Compiler* compiler) {
     compiler_ = compiler;
@@ -125,7 +125,7 @@
   greenland::DexLang::Context* dex_lang_ctx_;
 #endif
 #if defined(ART_USE_QUICK_COMPILER)
-  UniquePtr<QuickCompiler> quick_ctx_;
+  UniquePtr<LLVMInfo> llvm_info_;
   Compiler* compiler_;
   OatCompilationUnit* oat_compilation_unit_;
 #endif
diff --git a/src/compiler_llvm/compiler_llvm.cc b/src/compiler_llvm/compiler_llvm.cc
index 85ae794..a964b40 100644
--- a/src/compiler_llvm/compiler_llvm.cc
+++ b/src/compiler_llvm/compiler_llvm.cc
@@ -45,7 +45,7 @@
                            uint32_t access_flags, InvokeType invoke_type,
                            uint32_t method_idx, jobject class_loader,
                            const DexFile& dex_file,
-                           QuickCompiler* quick_compiler);
+                           LLVMInfo* llvm_info);
 }
 #endif
 
diff --git a/src/dex2oat.cc b/src/dex2oat.cc
index c96620e..d20d7ab 100644
--- a/src/dex2oat.cc
+++ b/src/dex2oat.cc
@@ -493,11 +493,7 @@
   uintptr_t image_base = 0;
   UniquePtr<std::string> host_prefix;
   std::vector<const char*> runtime_args;
-#if defined(ART_USE_QUICK_COMPILER) || defined(__APPLE__)
-  int thread_count = 1;
-#else
   int thread_count = sysconf(_SC_NPROCESSORS_CONF);
-#endif
   bool support_debugging = false;
 #if defined(__arm__)
   InstructionSet instruction_set = kThumb2;