profcollectd: add interface to wait for ETM after boot.

ETM device may not be ready when profcollectd is started.
So this CL adds interface to wait for ETM after boot:

1. In simpleperf_profcollect, split has_support() into
   has_driver_support() and has_device_support().
2. In ProfcollectdBinderService and Scheduler, add interface
   to register ProviderStatusCallback, which is called when
   the trace provider is ready.
3. In profcollectd, register ProviderStatusCallback after boot.
   Then record boot profile and schedule periodic recording in
   the callback.

Bug: 213519191
Test: on oriole EVT and DVT devices, profcollectd can record boot
Test: profile and periodic profiles.
Test: run simpleperf_unit_test.
Change-Id: I87befcd372ad85a0131116a7925f303b4990ec47
diff --git a/profcollectd/Android.bp b/profcollectd/Android.bp
index f172300..122e4f6 100644
--- a/profcollectd/Android.bp
+++ b/profcollectd/Android.bp
@@ -59,6 +59,7 @@
     name: "profcollectd_aidl",
     srcs: [
         "binder/com/android/server/profcollect/IProfCollectd.aidl",
+        "binder/com/android/server/profcollect/IProviderStatusCallback.aidl",
     ],
     path: "binder",
 }
diff --git a/profcollectd/binder/com/android/server/profcollect/IProfCollectd.aidl b/profcollectd/binder/com/android/server/profcollect/IProfCollectd.aidl
index c8f33a1..bfc2444 100644
--- a/profcollectd/binder/com/android/server/profcollect/IProfCollectd.aidl
+++ b/profcollectd/binder/com/android/server/profcollect/IProfCollectd.aidl
@@ -16,6 +16,8 @@
 
 package com.android.server.profcollect;
 
+import com.android.server.profcollect.IProviderStatusCallback;
+
 /** {@hide} */
 interface IProfCollectd {
     void schedule();
@@ -24,4 +26,5 @@
     void process();
     @utf8InCpp String report();
     @utf8InCpp String get_supported_provider();
+    void registerProviderStatusCallback(IProviderStatusCallback cb);
 }
diff --git a/profcollectd/binder/com/android/server/profcollect/IProviderStatusCallback.aidl b/profcollectd/binder/com/android/server/profcollect/IProviderStatusCallback.aidl
new file mode 100644
index 0000000..b311cf9
--- /dev/null
+++ b/profcollectd/binder/com/android/server/profcollect/IProviderStatusCallback.aidl
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.server.profcollect;
+
+/** {@hide} */
+oneway interface IProviderStatusCallback {
+    void onProviderReady();
+}
diff --git a/profcollectd/libprofcollectd/lib.rs b/profcollectd/libprofcollectd/lib.rs
index abc1127..f95132f 100644
--- a/profcollectd/libprofcollectd/lib.rs
+++ b/profcollectd/libprofcollectd/lib.rs
@@ -30,11 +30,34 @@
 use profcollectd_aidl_interface::aidl::com::android::server::profcollect::IProfCollectd::{
     self, BnProfCollectd,
 };
+use profcollectd_aidl_interface::aidl::com::android::server::profcollect::IProviderStatusCallback::{IProviderStatusCallback, BnProviderStatusCallback};
 use profcollectd_aidl_interface::binder::{self, BinderFeatures};
-use service::ProfcollectdBinderService;
+use service::{err_to_binder_status, ProfcollectdBinderService};
+use std::time::{Duration, Instant};
 
 const PROFCOLLECTD_SERVICE_NAME: &str = "profcollectd";
 
+struct ProviderStatusCallback {
+    service_start_time: Instant,
+}
+
+impl binder::Interface for ProviderStatusCallback {}
+
+impl IProviderStatusCallback for ProviderStatusCallback {
+    fn onProviderReady(&self) -> binder::Result<()> {
+        // If we have waited too long for the provider to be ready, then we have passed
+        // boot phase, and no need to collect boot profile.
+        // TODO: should we check boottime instead?
+        const TIMEOUT_TO_COLLECT_BOOT_PROFILE: Duration = Duration::from_secs(3);
+        let elapsed = Instant::now().duration_since(self.service_start_time);
+        if elapsed < TIMEOUT_TO_COLLECT_BOOT_PROFILE {
+            trace_once("boot").map_err(err_to_binder_status)?;
+        }
+        schedule().map_err(err_to_binder_status)?;
+        Ok(())
+    }
+}
+
 /// Initialise profcollectd service.
 /// * `schedule_now` - Immediately schedule collection after service is initialised.
 pub fn init_service(schedule_now: bool) -> Result<()> {
@@ -49,8 +72,11 @@
     .context("Failed to register service.")?;
 
     if schedule_now {
-        trace_once("boot")?;
-        schedule()?;
+        let cb = BnProviderStatusCallback::new_binder(
+            ProviderStatusCallback { service_start_time: Instant::now() },
+            BinderFeatures::default(),
+        );
+        get_profcollectd_service()?.registerProviderStatusCallback(&cb)?;
     }
 
     binder::ProcessState::join_thread_pool();
diff --git a/profcollectd/libprofcollectd/logging_trace_provider.rs b/profcollectd/libprofcollectd/logging_trace_provider.rs
index 1325855..fda4c66 100644
--- a/profcollectd/libprofcollectd/logging_trace_provider.rs
+++ b/profcollectd/libprofcollectd/logging_trace_provider.rs
@@ -32,6 +32,10 @@
         "logging"
     }
 
+    fn is_ready(&self) -> bool {
+        true
+    }
+
     fn trace(&self, trace_dir: &Path, tag: &str, sampling_period: &Duration) {
         let trace_file = trace_provider::get_path(trace_dir, tag, LOGGING_TRACEFILE_EXTENSION);
 
diff --git a/profcollectd/libprofcollectd/scheduler.rs b/profcollectd/libprofcollectd/scheduler.rs
index 31a495a..f58c499 100644
--- a/profcollectd/libprofcollectd/scheduler.rs
+++ b/profcollectd/libprofcollectd/scheduler.rs
@@ -17,11 +17,13 @@
 //! ProfCollect tracing scheduler.
 
 use std::fs;
+use std::mem;
 use std::path::Path;
 use std::sync::mpsc::{sync_channel, SyncSender};
 use std::sync::Arc;
 use std::sync::Mutex;
 use std::thread;
+use std::time::{Duration, Instant};
 
 use crate::config::{Config, PROFILE_OUTPUT_DIR, TRACE_OUTPUT_DIR};
 use crate::trace_provider::{self, TraceProvider};
@@ -33,12 +35,17 @@
     termination_ch: Option<SyncSender<()>>,
     /// The preferred trace provider for the system.
     trace_provider: Arc<Mutex<dyn TraceProvider + Send>>,
+    provider_ready_callbacks: Arc<Mutex<Vec<Box<dyn FnOnce() + Send>>>>,
 }
 
 impl Scheduler {
     pub fn new() -> Result<Self> {
         let p = trace_provider::get_trace_provider()?;
-        Ok(Scheduler { termination_ch: None, trace_provider: p })
+        Ok(Scheduler {
+            termination_ch: None,
+            trace_provider: p,
+            provider_ready_callbacks: Arc::new(Mutex::new(Vec::new())),
+        })
     }
 
     fn is_scheduled(&self) -> bool {
@@ -106,6 +113,51 @@
     pub fn get_trace_provider_name(&self) -> &'static str {
         self.trace_provider.lock().unwrap().get_name()
     }
+
+    pub fn is_provider_ready(&self) -> bool {
+        self.trace_provider.lock().unwrap().is_ready()
+    }
+
+    pub fn register_provider_ready_callback(&self, cb: Box<dyn FnOnce() + Send>) {
+        let mut locked_callbacks = self.provider_ready_callbacks.lock().unwrap();
+        locked_callbacks.push(cb);
+        if locked_callbacks.len() == 1 {
+            self.start_thread_waiting_for_provider_ready();
+        }
+    }
+
+    fn start_thread_waiting_for_provider_ready(&self) {
+        let provider = self.trace_provider.clone();
+        let callbacks = self.provider_ready_callbacks.clone();
+
+        thread::spawn(move || {
+            let start_time = Instant::now();
+            loop {
+                let elapsed = Instant::now().duration_since(start_time);
+                if provider.lock().unwrap().is_ready() {
+                    break;
+                }
+                // Decide check period based on how long we have waited:
+                // For the first 10s waiting, check every 100ms (likely to work on EVT devices).
+                // For the first 10m waiting, check every 10s (likely to work on DVT devices).
+                // For others, check every 10m.
+                let sleep_duration = if elapsed < Duration::from_secs(10) {
+                    Duration::from_millis(100)
+                } else if elapsed < Duration::from_secs(60 * 10) {
+                    Duration::from_secs(10)
+                } else {
+                    Duration::from_secs(60 * 10)
+                };
+                thread::sleep(sleep_duration);
+            }
+
+            let mut locked_callbacks = callbacks.lock().unwrap();
+            let v = mem::take(&mut *locked_callbacks);
+            for cb in v {
+                cb();
+            }
+        });
+    }
 }
 
 /// Run if space usage is under limit.
diff --git a/profcollectd/libprofcollectd/service.rs b/profcollectd/libprofcollectd/service.rs
index 0199c10..3f33802 100644
--- a/profcollectd/libprofcollectd/service.rs
+++ b/profcollectd/libprofcollectd/service.rs
@@ -18,8 +18,9 @@
 
 use anyhow::{anyhow, Context, Error, Result};
 use binder::Result as BinderResult;
-use binder::Status;
+use binder::{SpIBinder, Status};
 use profcollectd_aidl_interface::aidl::com::android::server::profcollect::IProfCollectd::IProfCollectd;
+use profcollectd_aidl_interface::aidl::com::android::server::profcollect::IProviderStatusCallback::IProviderStatusCallback;
 use std::ffi::CString;
 use std::fs::{read_dir, read_to_string, remove_file, write};
 use std::str::FromStr;
@@ -32,7 +33,7 @@
 use crate::report::{get_report_ts, pack_report};
 use crate::scheduler::Scheduler;
 
-fn err_to_binder_status(msg: Error) -> Status {
+pub fn err_to_binder_status(msg: Error) -> Status {
     let msg = format!("{:#?}", msg);
     let msg = CString::new(msg).expect("Failed to convert to CString");
     Status::new_service_specific_error(1, Some(&msg))
@@ -89,6 +90,30 @@
     fn get_supported_provider(&self) -> BinderResult<String> {
         Ok(self.lock().scheduler.get_trace_provider_name().to_string())
     }
+
+    fn registerProviderStatusCallback(
+        &self,
+        cb: &binder::Strong<(dyn IProviderStatusCallback)>,
+    ) -> BinderResult<()> {
+        if self.lock().scheduler.is_provider_ready() {
+            if let Err(e) = cb.onProviderReady() {
+                log::error!("Failed to call ProviderStatusCallback {:?}", e);
+            }
+            return Ok(());
+        }
+
+        let cb_binder: SpIBinder = cb.as_binder();
+        self.lock().scheduler.register_provider_ready_callback(Box::new(move || {
+            if let Ok(cb) = cb_binder.into_interface::<dyn IProviderStatusCallback>() {
+                if let Err(e) = cb.onProviderReady() {
+                    log::error!("Failed to call ProviderStatusCallback {:?}", e)
+                }
+            } else {
+                log::error!("SpIBinder is not a IProviderStatusCallback.");
+            }
+        }));
+        Ok(())
+    }
 }
 
 impl ProfcollectdBinderService {
diff --git a/profcollectd/libprofcollectd/simpleperf_etm_trace_provider.rs b/profcollectd/libprofcollectd/simpleperf_etm_trace_provider.rs
index 14fb750..f25f5ff 100644
--- a/profcollectd/libprofcollectd/simpleperf_etm_trace_provider.rs
+++ b/profcollectd/libprofcollectd/simpleperf_etm_trace_provider.rs
@@ -34,6 +34,10 @@
         "simpleperf_etm"
     }
 
+    fn is_ready(&self) -> bool {
+        simpleperf_profcollect::has_device_support()
+    }
+
     fn trace(&self, trace_dir: &Path, tag: &str, sampling_period: &Duration) {
         let trace_file = trace_provider::get_path(trace_dir, tag, ETM_TRACEFILE_EXTENSION);
 
@@ -76,6 +80,6 @@
 
 impl SimpleperfEtmTraceProvider {
     pub fn supported() -> bool {
-        simpleperf_profcollect::has_support()
+        simpleperf_profcollect::has_driver_support()
     }
 }
diff --git a/profcollectd/libprofcollectd/trace_provider.rs b/profcollectd/libprofcollectd/trace_provider.rs
index 98cd5ec..1305919 100644
--- a/profcollectd/libprofcollectd/trace_provider.rs
+++ b/profcollectd/libprofcollectd/trace_provider.rs
@@ -29,6 +29,7 @@
 
 pub trait TraceProvider {
     fn get_name(&self) -> &'static str;
+    fn is_ready(&self) -> bool;
     fn trace(&self, trace_dir: &Path, tag: &str, sampling_period: &Duration);
     fn process(&self, trace_dir: &Path, profile_dir: &Path, binary_filter: &str) -> Result<()>;
 }
diff --git a/simpleperf/ETMRecorder.cpp b/simpleperf/ETMRecorder.cpp
index 7bef4f2..554e8f5 100644
--- a/simpleperf/ETMRecorder.cpp
+++ b/simpleperf/ETMRecorder.cpp
@@ -23,6 +23,7 @@
 #include <memory>
 #include <string>
 
+#include <android-base/expected.h>
 #include <android-base/file.h>
 #include <android-base/logging.h>
 #include <android-base/parseint.h>
@@ -34,6 +35,9 @@
 
 namespace simpleperf {
 
+using android::base::expected;
+using android::base::unexpected;
+
 static constexpr bool ETM_RECORD_TIMESTAMP = false;
 
 static const std::string ETM_DIR = "/sys/bus/event_source/devices/cs_etm/";
@@ -103,32 +107,30 @@
                                      "CoreSight ETM instruction tracing", "arm");
 }
 
-bool ETMRecorder::CheckEtmSupport() {
+bool ETMRecorder::IsETMDriverAvailable() {
+  return IsDir(ETM_DIR);
+}
+
+expected<bool, std::string> ETMRecorder::CheckEtmSupport() {
   if (GetEtmEventType() == -1) {
-    LOG(ERROR) << "etm event type isn't supported on device";
-    return false;
+    return unexpected("etm event type isn't supported on device");
   }
   if (!ReadEtmInfo()) {
-    LOG(ERROR) << "etm devices are not available";
-    return false;
+    return unexpected("etm devices are not available");
   }
   for (const auto& p : etm_info_) {
     if (p.second.GetMajorVersion() < 4) {
-      LOG(ERROR) << "etm device version is less than 4.0";
-      return false;
+      return unexpected("etm device version is less than 4.0");
     }
     if (!p.second.IsContextIDSupported()) {
-      LOG(ERROR) << "etm device doesn't support contextID";
-      return false;
+      return unexpected("etm device doesn't support contextID");
     }
     if (!p.second.IsEnabled()) {
-      LOG(ERROR) << "etm device isn't enabled by the bootloader";
-      return false;
+      return unexpected("etm device isn't enabled by the bootloader");
     }
   }
   if (!FindSinkConfig()) {
-    LOG(ERROR) << "can't find etr device, which moves etm data to memory";
-    return false;
+    return unexpected("can't find etr device, which moves etm data to memory");
   }
   etm_supported_ = true;
   return true;
diff --git a/simpleperf/ETMRecorder.h b/simpleperf/ETMRecorder.h
index 1e33ed8..f304f60 100644
--- a/simpleperf/ETMRecorder.h
+++ b/simpleperf/ETMRecorder.h
@@ -21,6 +21,8 @@
 #include <map>
 #include <memory>
 
+#include <android-base/expected.h>
+
 #include "event_type.h"
 #include "perf_event.h"
 #include "record.h"
@@ -55,7 +57,8 @@
   // If not found, return -1.
   int GetEtmEventType();
   std::unique_ptr<EventType> BuildEventType();
-  bool CheckEtmSupport();
+  bool IsETMDriverAvailable();
+  android::base::expected<bool, std::string> CheckEtmSupport();
   void SetEtmPerfEventAttr(perf_event_attr* attr);
   AuxTraceInfoRecord CreateAuxTraceInfoRecord();
   size_t GetAddrFilterPairs();
diff --git a/simpleperf/cmd_record_test.cpp b/simpleperf/cmd_record_test.cpp
index f51393f..ef2c1b6 100644
--- a/simpleperf/cmd_record_test.cpp
+++ b/simpleperf/cmd_record_test.cpp
@@ -917,7 +917,7 @@
 }
 
 TEST(record_cmd, cs_etm_event) {
-  if (!ETMRecorder::GetInstance().CheckEtmSupport()) {
+  if (!ETMRecorder::GetInstance().CheckEtmSupport().ok()) {
     GTEST_LOG_(INFO) << "Omit this test since etm isn't supported on this device";
     return;
   }
@@ -952,7 +952,7 @@
 
 TEST(record_cmd, cs_etm_system_wide) {
   TEST_REQUIRE_ROOT();
-  if (!ETMRecorder::GetInstance().CheckEtmSupport()) {
+  if (!ETMRecorder::GetInstance().CheckEtmSupport().ok()) {
     GTEST_LOG_(INFO) << "Omit this test since etm isn't supported on this device";
     return;
   }
@@ -960,7 +960,7 @@
 }
 
 TEST(record_cmd, aux_buffer_size_option) {
-  if (!ETMRecorder::GetInstance().CheckEtmSupport()) {
+  if (!ETMRecorder::GetInstance().CheckEtmSupport().ok()) {
     GTEST_LOG_(INFO) << "Omit this test since etm isn't supported on this device";
     return;
   }
@@ -973,7 +973,7 @@
 
 TEST(record_cmd, addr_filter_option) {
   TEST_REQUIRE_HW_COUNTER();
-  if (!ETMRecorder::GetInstance().CheckEtmSupport()) {
+  if (!ETMRecorder::GetInstance().CheckEtmSupport().ok()) {
     GTEST_LOG_(INFO) << "Omit this test since etm isn't supported on this device";
     return;
   }
diff --git a/simpleperf/event_selection_set.cpp b/simpleperf/event_selection_set.cpp
index a6d9a7e..d5bca31 100644
--- a/simpleperf/event_selection_set.cpp
+++ b/simpleperf/event_selection_set.cpp
@@ -217,7 +217,8 @@
   selection->event_attr.precise_ip = event_type->precise_ip;
   if (IsEtmEventType(event_type->event_type.type)) {
     auto& etm_recorder = ETMRecorder::GetInstance();
-    if (!etm_recorder.CheckEtmSupport()) {
+    if (auto result = etm_recorder.CheckEtmSupport(); !result.ok()) {
+      LOG(ERROR) << result.error();
       return false;
     }
     ETMRecorder::GetInstance().SetEtmPerfEventAttr(&selection->event_attr);
diff --git a/simpleperf/include/simpleperf_profcollect.hpp b/simpleperf/include/simpleperf_profcollect.hpp
index f2c48da..67123e5 100644
--- a/simpleperf/include/simpleperf_profcollect.hpp
+++ b/simpleperf/include/simpleperf_profcollect.hpp
@@ -16,7 +16,8 @@
 
 extern "C" {
 
-bool HasSupport();
+bool HasDriverSupport();
+bool HasDeviceSupport();
 bool Record(const char* event_name, const char* output, float duration);
 bool Inject(const char* traceInput, const char* profileOutput, const char* binary_filter);
 }
diff --git a/simpleperf/profcollect.cpp b/simpleperf/profcollect.cpp
index 3f64abd..39c04ab 100644
--- a/simpleperf/profcollect.cpp
+++ b/simpleperf/profcollect.cpp
@@ -24,8 +24,14 @@
 
 using namespace simpleperf;
 
-bool HasSupport() {
-  if (!ETMRecorder::GetInstance().CheckEtmSupport()) {
+bool HasDriverSupport() {
+  return ETMRecorder::GetInstance().IsETMDriverAvailable();
+}
+
+bool HasDeviceSupport() {
+  auto result = ETMRecorder::GetInstance().CheckEtmSupport();
+  if (!result.ok()) {
+    LOG(DEBUG) << result.error();
     return false;
   }
   const EventType* type = FindEventTypeByName("cs-etm", false);
diff --git a/simpleperf/rust/lib.rs b/simpleperf/rust/lib.rs
index c76bf0e..7d39273 100644
--- a/simpleperf/rust/lib.rs
+++ b/simpleperf/rust/lib.rs
@@ -25,9 +25,16 @@
     CString::new(path.to_str().unwrap()).unwrap()
 }
 
-/// Returns whether the system has support for simpleperf etm.
-pub fn has_support() -> bool {
-    unsafe { simpleperf_profcollect_bindgen::HasSupport() }
+/// Returns whether the system has etm driver. ETM driver should be available immediately
+/// after boot.
+pub fn has_driver_support() -> bool {
+    unsafe { simpleperf_profcollect_bindgen::HasDriverSupport() }
+}
+
+/// Returns whether the system has etm device. ETM device may not be available immediately
+/// after boot.
+pub fn has_device_support() -> bool {
+    unsafe { simpleperf_profcollect_bindgen::HasDeviceSupport() }
 }
 
 /// ETM recording scope