Merge remote-tracking branch 'aosp/upstream'

* aosp/upstream:
  Fix 32-bit builds; only build 64-bit in Android trees
  Collect stats about how long it takes to process included makefiles
  When printing top stats, include count
  Stop using TLS for stats start time

Test: treehugger
Change-Id: I1a464d7d265e73d66bdf93069cc3008a19beb11d
diff --git a/Android.bp b/Android.bp
index ef7c990..149d170 100644
--- a/Android.bp
+++ b/Android.bp
@@ -20,6 +20,7 @@
         "-Werror",
         "-DNOLOG",
     ],
+    compile_multilib: "64",
     tidy_checks: [
         "-google-global-names-in-headers",
         "-google-build-using-namespace",
diff --git a/Makefile.ckati b/Makefile.ckati
index 5bc7c9d..0984c4e 100644
--- a/Makefile.ckati
+++ b/Makefile.ckati
@@ -82,17 +82,17 @@
 # Rule to build ckati into KATI_BIN_PATH
 $(KATI_BIN_PATH)/ckati: $(KATI_CXX_OBJS) $(KATI_CXX_GENERATED_OBJS)
 	@mkdir -p $(dir $@)
-	$(KATI_LD) -std=c++11 $(KATI_CXXFLAGS) -o $@ $^ $(KATI_LIBS)
+	$(KATI_LD) -std=c++17 $(KATI_CXXFLAGS) -o $@ $^ $(KATI_LIBS)
 
 # Rule to build normal source files into object files in KATI_INTERMEDIATES_PATH
 $(KATI_CXX_OBJS) $(KATI_CXX_TEST_OBJS): $(KATI_INTERMEDIATES_PATH)/%.o: $(KATI_SRC_PATH)/%.cc
 	@mkdir -p $(dir $@)
-	$(KATI_CXX) -c -std=c++11 $(KATI_CXXFLAGS) -o $@ $<
+	$(KATI_CXX) -c -std=c++17 $(KATI_CXXFLAGS) -o $@ $<
 
 # Rule to build generated source files into object files in KATI_INTERMEDIATES_PATH
 $(KATI_CXX_GENERATED_OBJS): $(KATI_INTERMEDIATES_PATH)/%.o: $(KATI_INTERMEDIATES_PATH)/%.cc
 	@mkdir -p $(dir $@)
-	$(KATI_CXX) -c -std=c++11 $(KATI_CXXFLAGS) -o $@ $<
+	$(KATI_CXX) -c -std=c++17 $(KATI_CXXFLAGS) -o $@ $<
 
 ckati_tests: $(KATI_CXX_TEST_EXES)
 
diff --git a/eval.cc b/eval.cc
index f1ad42f..8de8f41 100644
--- a/eval.cc
+++ b/eval.cc
@@ -26,6 +26,7 @@
 #include "fileutil.h"
 #include "parser.h"
 #include "rule.h"
+#include "stats.h"
 #include "stmt.h"
 #include "strutil.h"
 #include "symtab.h"
@@ -405,6 +406,7 @@
 
 void Evaluator::DoInclude(const string& fname) {
   CheckStack();
+  COLLECT_STATS_WITH_SLOW_REPORT("included makefiles", fname.c_str());
 
   Makefile* mk = MakefileCacheManager::Get()->ReadMakefile(fname);
   if (!mk->Exists()) {
@@ -418,6 +420,11 @@
     LOG("%s", stmt->DebugString().c_str());
     stmt->Eval(this);
   }
+
+  for (auto& mk : profiled_files_) {
+    stats.MarkInteresting(mk);
+  }
+  profiled_files_.clear();
 }
 
 void Evaluator::EvalInclude(const IncludeStmt* stmt) {
diff --git a/eval.h b/eval.h
index 147d5f0..0859cb1 100644
--- a/eval.h
+++ b/eval.h
@@ -107,6 +107,10 @@
     export_error_ = true;
   }
 
+  void ProfileMakefile(StringPiece mk) {
+    profiled_files_.emplace_back(mk.as_string());
+  }
+
  private:
   Var* EvalRHS(Symbol lhs,
                Value* rhs,
@@ -159,6 +163,8 @@
   unique_ptr<string> export_message_;
   bool export_error_;
 
+  vector<string> profiled_files_;
+
   static SymbolSet used_undefined_vars_;
 };
 
diff --git a/func.cc b/func.cc
index 9b0560e..2713fbe 100644
--- a/func.cc
+++ b/func.cc
@@ -926,6 +926,15 @@
   ev->SetExportObsolete(msg);
 }
 
+void ProfileFunc(const vector<Value*>& args, Evaluator* ev, string*) {
+  for (auto arg : args) {
+    string files = arg->Eval(ev);
+    for (StringPiece file : WordScanner(files)) {
+      ev->ProfileMakefile(file);
+    }
+  }
+}
+
 FuncInfo g_func_infos[] = {
     {"patsubst", &PatsubstFunc, 3, 3, false, false},
     {"strip", &StripFunc, 1, 1, false, false},
@@ -975,6 +984,8 @@
     {"KATI_obsolete_var", &ObsoleteVarFunc, 2, 1, false, false},
     {"KATI_deprecate_export", &DeprecateExportFunc, 1, 1, false, false},
     {"KATI_obsolete_export", &ObsoleteExportFunc, 1, 1, false, false},
+
+    {"KATI_profile_makefile", &ProfileFunc, 0, 0, false, false},
 };
 
 unordered_map<StringPiece, FuncInfo*>* g_func_info_map;
diff --git a/stats.cc b/stats.cc
index 27d6c50..5e7b27d 100644
--- a/stats.cc
+++ b/stats.cc
@@ -23,14 +23,12 @@
 #include "flags.h"
 #include "log.h"
 #include "stringprintf.h"
-#include "thread_local.h"
 #include "timeutil.h"
 
 namespace {
 
 mutex g_mu;
 vector<Stats*>* g_stats;
-DEFINE_THREAD_LOCAL(double, g_start_time);
 
 }  // namespace
 
@@ -44,53 +42,90 @@
 void Stats::DumpTop() const {
   unique_lock<mutex> lock(mu_);
   if (detailed_.size() > 0) {
-    vector<pair<string, double>> v(detailed_.begin(), detailed_.end());
-    sort(
-        v.begin(), v.end(),
-        [](const pair<string, double> a, const pair<string, double> b) -> bool {
-          return a.second > b.second;
-        });
-    for (unsigned int i = 0; i < 10 && i < v.size(); i++) {
-      LOG_STAT(" %5.3f %s", v[i].first.c_str(), v[i].second);
+    vector<pair<string, StatsDetails>> details(detailed_.begin(),
+                                               detailed_.end());
+    sort(details.begin(), details.end(),
+         [](const pair<string, StatsDetails> a,
+            const pair<string, StatsDetails> b) -> bool {
+           return a.second.elapsed_ > b.second.elapsed_;
+         });
+
+    // Only print the top 10
+    details.resize(min(details.size(), static_cast<size_t>(10)));
+
+    if (!interesting_.empty()) {
+      // No need to print anything out twice
+      auto interesting = interesting_;
+      for (auto& [n, detail] : details) {
+        interesting.erase(n);
+      }
+
+      for (auto& name : interesting) {
+        auto detail = detailed_.find(name);
+        if (detail == detailed_.end()) {
+          details.emplace_back(name, StatsDetails());
+          continue;
+        }
+        details.emplace_back(*detail);
+      }
+    }
+
+    int max_cnt_len = 1;
+    for (auto& [name, detail] : details) {
+      max_cnt_len =
+          max(max_cnt_len, static_cast<int>(to_string(detail.cnt_).length()));
+    }
+
+    for (auto& [name, detail] : details) {
+      LOG_STAT(" %6.3f / %*d %s", detail.elapsed_, max_cnt_len, detail.cnt_,
+               name.c_str());
     }
   }
 }
 
 string Stats::String() const {
   unique_lock<mutex> lock(mu_);
+  if (!detailed_.empty())
+    return StringPrintf("%s: %f / %d (%d unique)", name_, elapsed_, cnt_,
+                        detailed_.size());
   return StringPrintf("%s: %f / %d", name_, elapsed_, cnt_);
 }
 
-void Stats::Start() {
-  CHECK(!TLS_REF(g_start_time));
-  TLS_REF(g_start_time) = GetTime();
+double Stats::Start() {
+  double start = GetTime();
   unique_lock<mutex> lock(mu_);
   cnt_++;
+  return start;
 }
 
-double Stats::End(const char* msg) {
-  CHECK(TLS_REF(g_start_time));
-  double e = GetTime() - TLS_REF(g_start_time);
-  TLS_REF(g_start_time) = 0;
+double Stats::End(double start, const char* msg) {
+  double e = GetTime() - start;
   unique_lock<mutex> lock(mu_);
   elapsed_ += e;
   if (msg != 0) {
-    detailed_[string(msg)] += e;
+    StatsDetails& details = detailed_[string(msg)];
+    details.elapsed_ += e;
+    details.cnt_++;
   }
   return e;
 }
 
+void Stats::MarkInteresting(const string& msg) {
+  unique_lock<mutex> lock(mu_);
+  interesting_.emplace(msg);
+}
+
 ScopedStatsRecorder::ScopedStatsRecorder(Stats* st, const char* msg)
-    : st_(st), msg_(msg) {
+    : st_(st), msg_(msg), start_time_(0) {
   if (!g_flags.enable_stat_logs)
     return;
-  st_->Start();
+  start_time_ = st_->Start();
 }
 
 ScopedStatsRecorder::~ScopedStatsRecorder() {
   if (!g_flags.enable_stat_logs)
     return;
-  double e = st_->End(msg_);
+  double e = st_->End(start_time_, msg_);
   if (msg_ && e > 3.0) {
     LOG_STAT("slow %s (%f): %s", st_->name_, e, msg_);
   }
diff --git a/stats.h b/stats.h
index 25f6e5b..4501353 100644
--- a/stats.h
+++ b/stats.h
@@ -18,9 +18,15 @@
 #include <mutex>
 #include <string>
 #include <unordered_map>
+#include <unordered_set>
 
 using namespace std;
 
+struct StatsDetails {
+  int cnt_ = 0;
+  double elapsed_ = 0;
+};
+
 class Stats {
  public:
   explicit Stats(const char* name);
@@ -28,9 +34,11 @@
   void DumpTop() const;
   string String() const;
 
+  void MarkInteresting(const string& msg);
+
  private:
-  void Start();
-  double End(const char* msg);
+  double Start();
+  double End(double start, const char* msg);
 
   friend class ScopedStatsRecorder;
 
@@ -38,7 +46,8 @@
   double elapsed_;
   int cnt_;
   mutable mutex mu_;
-  unordered_map<string, double> detailed_;
+  unordered_map<string, StatsDetails> detailed_;
+  unordered_set<string> interesting_;
 };
 
 class ScopedStatsRecorder {
@@ -49,6 +58,7 @@
  private:
   Stats* st_;
   const char* msg_;
+  double start_time_;
 };
 
 void ReportAllStats();
diff --git a/thread_local.h b/thread_local.h
deleted file mode 100644
index d9d5191..0000000
--- a/thread_local.h
+++ /dev/null
@@ -1,97 +0,0 @@
-// Copyright 2014 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-//
-// A simple cross platform thread local storage implementation.
-//
-// This is a drop-in replacement of __thread keyword. If your compiler
-// toolchain supports __thread keyword, the user of this code should
-// be as fast as the code which uses __thread. Chrome's
-// base::ThreadLocalPointer and base::ThreadLocalStorage cannot be as
-// fast as __thread.
-// TODO(crbug.com/249345): If pthread_getspecific is slow for our use,
-// expose bionic's internal TLS and stop using pthread_getspecific
-// based implementation.
-//
-// Usage:
-//
-// Before (linux):
-//
-// __thread Foo* foo;
-// foo = new Foo();
-// foo->func();
-//
-//
-// After:
-//
-// DEFINE_THREAD_LOCAL(Foo*, foo);
-// foo.Ref() = new Foo();
-// foo.Ref()->func();
-//
-// Thread local PODs are zero-initialized.
-// Thread local non-PODs are initialized with the default constructor.
-
-#ifndef THREAD_LOCAL_H_
-#define THREAD_LOCAL_H_
-
-#include <errno.h>
-#include <pthread.h>
-
-#include "log.h"
-
-#ifdef __linux__
-
-#define DEFINE_THREAD_LOCAL(Type, name) thread_local Type name
-#define TLS_REF(x) x
-
-#else
-
-// Thread local storage implementation which uses pthread.
-// Note that DEFINE_THREAD_LOCAL creates a global variable just like
-// thread local storage based on __thread keyword. So we should not use
-// constructor in ThreadLocal class to avoid static initializator.
-template <typename Type>
-void ThreadLocalDestructor(void* ptr) {
-  delete reinterpret_cast<Type>(ptr);
-}
-
-template <typename Type, pthread_key_t* key>
-void ThreadLocalInit() {
-  if (pthread_key_create(key, ThreadLocalDestructor<Type>))
-    ERROR("Failed to create a pthread key for TLS errno=%d", errno);
-}
-
-template <typename Type, pthread_key_t* key, pthread_once_t* once>
-class ThreadLocal {
- public:
-  Type& Ref() { return *GetPointer(); }
-  Type Get() { return Ref(); }
-  void Set(const Type& value) { Ref() = value; }
-  Type* GetPointer() {
-    pthread_once(once, ThreadLocalInit<Type*, key>);
-    Type* value = reinterpret_cast<Type*>(pthread_getspecific(*key));
-    if (value)
-      return value;
-    // new Type() for PODs means zero initialization.
-    value = new Type();
-    int error = pthread_setspecific(*key, value);
-    if (error != 0)
-      ERROR("Failed to set a TLS: error=%d", error);
-    return value;
-  }
-};
-
-// We need a namespace for name##_key and name##_once since template parameters
-// do not accept unnamed values such as static global variables.
-#define DEFINE_THREAD_LOCAL(Type, name)           \
-  namespace {                                     \
-  pthread_once_t name##_once = PTHREAD_ONCE_INIT; \
-  pthread_key_t name##_key;                       \
-  }                                               \
-  ThreadLocal<Type, &name##_key, &name##_once> name;
-
-#define TLS_REF(x) x.Ref()
-
-#endif
-
-#endif  // THREAD_LOCAL_H_