blob: 2e8dc4e301a3978e50630c6b8f5aee9cb054afcc [file] [log] [blame]
/*
* Copyright (C) 2019 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef INCLUDE_PERFETTO_TRACING_DATA_SOURCE_H_
#define INCLUDE_PERFETTO_TRACING_DATA_SOURCE_H_
// This header contains the key class (DataSource) that a producer app should
// override in order to create a custom data source that gets tracing Start/Stop
// notifications and emits tracing data.
#include <assert.h>
#include <stddef.h>
#include <stdint.h>
#include <array>
#include <atomic>
#include <functional>
#include <memory>
#include <mutex>
#include "perfetto/base/compiler.h"
#include "perfetto/base/export.h"
#include "perfetto/protozero/message.h"
#include "perfetto/protozero/message_handle.h"
#include "perfetto/tracing/buffer_exhausted_policy.h"
#include "perfetto/tracing/core/forward_decls.h"
#include "perfetto/tracing/internal/basic_types.h"
#include "perfetto/tracing/internal/data_source_internal.h"
#include "perfetto/tracing/internal/tracing_muxer.h"
#include "perfetto/tracing/locked_handle.h"
#include "perfetto/tracing/trace_writer_base.h"
#include "protos/perfetto/trace/trace_packet.pbzero.h"
namespace perfetto {
namespace internal {
class TracingMuxerImpl;
class TrackEventCategoryRegistry;
template <typename, const internal::TrackEventCategoryRegistry*>
class TrackEventDataSource;
} // namespace internal
// Base class with the virtual methods to get start/stop notifications.
// Embedders are supposed to derive the templated version below, not this one.
class PERFETTO_EXPORT DataSourceBase {
public:
virtual ~DataSourceBase();
// TODO(primiano): change the const& args below to be pointers instead. It
// makes it more awkward to handle output arguments and require mutable(s).
// This requires synchronizing a breaking API change for existing embedders.
// OnSetup() is invoked when tracing is configured. In most cases this happens
// just before starting the trace. In the case of deferred start (see
// deferred_start in trace_config.proto) start might happen later.
class SetupArgs {
public:
// This is valid only within the scope of the OnSetup() call and must not
// be retained.
const DataSourceConfig* config = nullptr;
// The index of this data source instance (0..kMaxDataSourceInstances - 1).
uint32_t internal_instance_index = 0;
};
virtual void OnSetup(const SetupArgs&);
class StartArgs {
public:
// The index of this data source instance (0..kMaxDataSourceInstances - 1).
uint32_t internal_instance_index = 0;
};
virtual void OnStart(const StartArgs&);
class StopArgs {
public:
virtual ~StopArgs();
// HandleAsynchronously() can optionally be called to defer the tracing
// session stop and write tracing data just before stopping.
// This function returns a closure that must be invoked after the last
// trace events have been emitted. The returned closure can be called from
// any thread. The caller also needs to explicitly call TraceContext.Flush()
// from the last Trace() lambda invocation because no other implicit flushes
// will happen after the stop signal.
// When this function is called, the tracing service will defer the stop of
// the tracing session until the returned closure is invoked.
// However, the caller cannot hang onto this closure for too long. The
// tracing service will forcefully stop the tracing session without waiting
// for pending producers after TraceConfig.data_source_stop_timeout_ms
// (default: 5s, can be overridden by Consumers when starting a trace).
// If the closure is called after this timeout an error will be logged and
// the trace data emitted will not be present in the trace. No other
// functional side effects (e.g. crashes or corruptions) will happen. In
// other words, it is fine to accidentally hold onto this closure for too
// long but, if that happens, some tracing data will be lost.
virtual std::function<void()> HandleStopAsynchronously() const = 0;
// The index of this data source instance (0..kMaxDataSourceInstances - 1).
uint32_t internal_instance_index = 0;
};
virtual void OnStop(const StopArgs&);
};
struct DefaultDataSourceTraits {
// |IncrementalStateType| can optionally be used store custom per-sequence
// incremental data (e.g., interning tables). It should have a Clear() method
// for when incremental state needs to be cleared. See
// TraceContext::GetIncrementalState().
using IncrementalStateType = void;
// Allows overriding what type of thread-local state configuration the data
// source uses. By default every data source gets independent thread-local
// state, which means every instance uses separate trace writers and
// incremental state even on the same thread. Some data sources (most notably
// the track event data source) want to share trace writers and incremental
// state on the same thread.
static internal::DataSourceThreadLocalState* GetDataSourceTLS(
internal::DataSourceStaticState* static_state,
internal::TracingTLS* root_tls) {
auto* ds_tls = &root_tls->data_sources_tls[static_state->index];
// The per-type TLS is either zero-initialized or must have been initialized
// for this specific data source type.
assert(!ds_tls->static_state ||
ds_tls->static_state->index == static_state->index);
return ds_tls;
}
};
// Templated base class meant to be derived by embedders to create a custom data
// source. DataSourceType must be the type of the derived class itself, e.g.:
// class MyDataSource : public DataSourceBase<MyDataSource> {...}.
//
// |DataSourceTraits| allows customizing the behavior of the data source. See
// |DefaultDataSourceTraits|.
template <typename DataSourceType,
typename DataSourceTraits = DefaultDataSourceTraits>
class DataSource : public DataSourceBase {
struct DefaultTracePointTraits;
public:
// The BufferExhaustedPolicy to use for TraceWriters of this DataSource.
// Override this in your DataSource class to change the default, which is to
// drop data on shared memory overruns.
constexpr static BufferExhaustedPolicy kBufferExhaustedPolicy =
BufferExhaustedPolicy::kDrop;
// Argument passed to the lambda function passed to Trace() (below).
class TraceContext {
public:
using TracePacketHandle =
::protozero::MessageHandle<::perfetto::protos::pbzero::TracePacket>;
TraceContext(TraceContext&&) noexcept = default;
~TraceContext() = default;
TracePacketHandle NewTracePacket() {
return tls_inst_->trace_writer->NewTracePacket();
}
// Forces a commit of the thread-local tracing data written so far to the
// service. This is almost never required (tracing data is periodically
// committed as trace pages are filled up) and has a non-negligible
// performance hit (requires an IPC + refresh of the current thread-local
// chunk). The only case when this should be used is when handling OnStop()
// asynchronously, to ensure sure that the data is committed before the
// Stop timeout expires.
// The TracePacketHandle obtained by the last NewTracePacket() call must be
// finalized before calling Flush() (either implicitly by going out of scope
// or by explicitly calling Finalize()).
// |cb| is an optional callback. When non-null it will request the
// service to ACK the flush and will be invoked on an internal thread after
// the service has acknowledged it. The callback might be NEVER INVOKED if
// the service crashes or the IPC connection is dropped.
void Flush(std::function<void()> cb = {}) {
tls_inst_->trace_writer->Flush(cb);
}
// Returns the number of bytes written on the current thread by the current
// data-source since its creation.
// This can be useful for splitting protos that might grow very large.
uint64_t written() { return tls_inst_->trace_writer->written(); }
// Returns a RAII handle to access the data source instance, guaranteeing
// that it won't be deleted on another thread (because of trace stopping)
// while accessing it from within the Trace() lambda.
// The returned handle can be invalid (nullptr) if tracing is stopped
// immediately before calling this. The caller is supposed to check for its
// validity before using it. After checking, the handle is guaranteed to
// remain valid until the handle goes out of scope.
LockedHandle<DataSourceType> GetDataSourceLocked() {
auto* internal_state = static_state_.TryGet(instance_index_);
if (!internal_state)
return LockedHandle<DataSourceType>();
return LockedHandle<DataSourceType>(
&internal_state->lock,
static_cast<DataSourceType*>(internal_state->data_source.get()));
}
typename DataSourceTraits::IncrementalStateType* GetIncrementalState() {
return reinterpret_cast<typename DataSourceTraits::IncrementalStateType*>(
tls_inst_->incremental_state.get());
}
private:
friend class DataSource;
template <typename, const internal::TrackEventCategoryRegistry*>
friend class internal::TrackEventDataSource;
TraceContext(internal::DataSourceInstanceThreadLocalState* tls_inst,
uint32_t instance_index)
: tls_inst_(tls_inst), instance_index_(instance_index) {}
TraceContext(const TraceContext&) = delete;
TraceContext& operator=(const TraceContext&) = delete;
internal::DataSourceInstanceThreadLocalState* const tls_inst_;
uint32_t const instance_index_;
};
// The main tracing method. Tracing code should call this passing a lambda as
// argument, with the following signature: void(TraceContext).
// The lambda will be called synchronously (i.e., always before Trace()
// returns) only if tracing is enabled and the data source has been enabled in
// the tracing config.
// The lambda can be called more than once per Trace() call, in the case of
// concurrent tracing sessions (or even if the data source is instantiated
// twice within the same trace config).
template <typename Lambda>
static void Trace(Lambda tracing_fn) {
CallIfEnabled<DefaultTracePointTraits>([&tracing_fn](uint32_t instances) {
TraceWithInstances<DefaultTracePointTraits>(instances,
std::move(tracing_fn));
});
}
// An efficient trace point guard for checking if this data source is active.
// |callback| is a function which will only be called if there are active
// instances. It is given an instance state parameter, which should be passed
// to TraceWithInstances() to actually record trace data.
template <typename Traits = DefaultTracePointTraits, typename Callback>
static void CallIfEnabled(Callback callback) PERFETTO_ALWAYS_INLINE {
// |instances| is a per-class bitmap that tells:
// 1. If the data source is enabled at all.
// 2. The index of the slot within |static_state_| that holds the instance
// state. In turn this allows to map the data source to the tracing
// session and buffers.
// memory_order_relaxed is okay because:
// - |instances| is re-read with an acquire barrier below if this succeeds.
// - The code between this point and the acquire-load is based on static
// storage which has indefinite lifetime.
uint32_t instances =
Traits::GetActiveInstances()->load(std::memory_order_relaxed);
// This is the tracing fast-path. Bail out immediately if tracing is not
// enabled (or tracing is enabled but not for this data source).
if (PERFETTO_LIKELY(!instances))
return;
callback(instances);
}
// The "lower half" of a trace point which actually performs tracing after
// this data source has been determined to be active.
// |instances| must be the instance state value retrieved through
// CallIfEnabled().
// |tracing_fn| will be called to record trace data as in Trace().
//
// TODO(primiano): all the stuff below should be outlined from the trace
// point. Or at least we should have some compile-time traits like
// kOptimizeBinarySize / kOptimizeTracingLatency.
template <typename Traits = DefaultTracePointTraits, typename Lambda>
static void TraceWithInstances(uint32_t instances, Lambda tracing_fn) {
PERFETTO_DCHECK(instances);
constexpr auto kMaxDataSourceInstances = internal::kMaxDataSourceInstances;
// See tracing_muxer.h for the structure of the TLS.
auto* tracing_impl = internal::TracingMuxer::Get();
if (PERFETTO_UNLIKELY(!tls_state_))
tls_state_ = GetOrCreateDataSourceTLS(&static_state_);
// TracingTLS::generation is a global monotonic counter that is incremented
// every time a tracing session is stopped. We use that as a signal to force
// a slow-path garbage collection of all the trace writers for the current
// thread and to destroy the ones that belong to tracing sessions that have
// ended. This is to avoid having too many TraceWriter instances alive, each
// holding onto one chunk of the shared memory buffer.
// Rationale why memory_order_relaxed should be fine:
// - The TraceWriter object that we use is always constructed and destructed
// on the current thread. There is no risk of accessing a half-initialized
// TraceWriter (which would be really bad).
// - In the worst case, in the case of a race on the generation check, we
// might end up using a TraceWriter for the same data source that belongs
// to a stopped session. This is not really wrong, as we don't give any
// guarantee on the global atomicity of the stop. In the worst case the
// service will reject the data commit if this arrives too late.
if (PERFETTO_UNLIKELY(
tls_state_->root_tls->generation !=
tracing_impl->generation(std::memory_order_relaxed))) {
// Will update root_tls->generation.
tracing_impl->DestroyStoppedTraceWritersForCurrentThread();
}
for (uint32_t i = 0; i < kMaxDataSourceInstances; i++) {
internal::DataSourceState* instance_state =
static_state_.TryGetCached(instances, i);
if (!instance_state)
continue;
// Even if we passed the check above, the DataSourceInstance might be
// still destroyed concurrently while this code runs. The code below is
// designed to deal with such race, as follows:
// - We don't access the user-defined data source instance state. The only
// bits of state we use are |backend_id| and |buffer_id|.
// - Beyond those two integers, we access only the TraceWriter here. The
// TraceWriter is always safe because it lives on the TLS.
// - |instance_state| is backed by static storage, so the pointer is
// always valid, even after the data source instance is destroyed.
// - In the case of a race-on-destruction, we'll still see the latest
// backend_id and buffer_id and in the worst case keep trying writing
// into the tracing shared memory buffer after stopped. But this isn't
// really any worse than the case of the stop IPC being delayed by the
// kernel scheduler. The tracing service is robust against data commit
// attemps made after tracing is stopped.
// There is a theoretical race that would case the wrong behavior w.r.t
// writing data in the wrong buffer, but it's so rare that we ignore it:
// if the data source is stopped and started kMaxDataSourceInstances
// times (so that the same id is recycled) while we are in this function,
// we might end up reusing the old data source's backend_id and buffer_id
// for the new one, because we don't see the generation change past this
// point. But stopping and starting tracing (even once) takes so much
// handshaking to make this extremely unrealistic.
auto& tls_inst = tls_state_->per_instance[i];
if (PERFETTO_UNLIKELY(!tls_inst.trace_writer)) {
// Here we need an acquire barrier, which matches the release-store made
// by TracingMuxerImpl::SetupDataSource(), to ensure that the backend_id
// and buffer_id are consistent.
instances =
Traits::GetActiveInstances()->load(std::memory_order_acquire);
instance_state = static_state_.TryGetCached(instances, i);
if (!instance_state || !instance_state->trace_lambda_enabled)
return;
tls_inst.backend_id = instance_state->backend_id;
tls_inst.buffer_id = instance_state->buffer_id;
tls_inst.trace_writer = tracing_impl->CreateTraceWriter(
instance_state, DataSourceType::kBufferExhaustedPolicy);
CreateIncrementalState(
&tls_inst,
static_cast<typename DataSourceTraits::IncrementalStateType*>(
nullptr));
// Even in the case of out-of-IDs, SharedMemoryArbiterImpl returns a
// NullTraceWriter. The returned pointer should never be null.
assert(tls_inst.trace_writer);
}
tracing_fn(TraceContext(&tls_inst, i));
}
}
// Registers the data source on all tracing backends, including ones that
// connect after the registration. Doing so enables the data source to receive
// Setup/Start/Stop notifications and makes the Trace() method work when
// tracing is enabled and the data source is selected.
// This must be called after Tracing::Initialize().
// Can return false to signal failure if attemping to register more than
// kMaxDataSources (32) data sources types.
static bool Register(const DataSourceDescriptor& descriptor) {
// Silences -Wunused-variable warning in case the trace method is not used
// by the translation unit that declares the data source.
(void)static_state_;
(void)tls_state_;
auto factory = [] {
return std::unique_ptr<DataSourceBase>(new DataSourceType());
};
auto* tracing_impl = internal::TracingMuxer::Get();
return tracing_impl->RegisterDataSource(descriptor, factory,
&static_state_);
}
private:
// Traits for customizing the behavior of a specific trace point.
struct DefaultTracePointTraits {
// By default, every call to DataSource::Trace() will record trace events
// for every active instance of that data source. A single trace point can,
// however, use a custom set of enable flags for more fine grained control
// of when that trace point is active.
//
// DANGER: when doing this, the data source must use the appropriate memory
// fences when changing the state of the bitmap.
static constexpr std::atomic<uint32_t>* GetActiveInstances() {
return &static_state_.valid_instances;
}
};
// Create the user provided incremental state in the given thread-local
// storage. Note: The second parameter here is used to specialize the case
// where there is no incremental state type.
template <typename T>
static void CreateIncrementalState(
internal::DataSourceInstanceThreadLocalState* tls_inst,
const T*) {
PERFETTO_DCHECK(!tls_inst->incremental_state);
tls_inst->incremental_state =
internal::DataSourceInstanceThreadLocalState::IncrementalStatePointer(
reinterpret_cast<void*>(new T()),
[](void* p) { delete reinterpret_cast<T*>(p); });
}
static void CreateIncrementalState(
internal::DataSourceInstanceThreadLocalState*,
const void*) {}
// Note that the returned object is one per-thread per-data-source-type, NOT
// per data-source *instance*.
static internal::DataSourceThreadLocalState* GetOrCreateDataSourceTLS(
internal::DataSourceStaticState* static_state) {
auto* tracing_impl = internal::TracingMuxer::Get();
internal::TracingTLS* root_tls = tracing_impl->GetOrCreateTracingTLS();
internal::DataSourceThreadLocalState* ds_tls =
DataSourceTraits::GetDataSourceTLS(static_state, root_tls);
// We keep re-initializing as the initialization is idempotent and not worth
// the code for extra checks.
ds_tls->static_state = static_state;
assert(!ds_tls->root_tls || ds_tls->root_tls == root_tls);
ds_tls->root_tls = root_tls;
return ds_tls;
}
// Static state. Accessed by the static Trace() method fastpaths.
static internal::DataSourceStaticState static_state_;
// This TLS object is a cached raw pointer and has deliberately no destructor.
// The Platform implementation is supposed to create and manage the lifetime
// of the Platform::ThreadLocalObject and take care of destroying it.
// This is because non-POD thread_local variables have subtleties (global
// destructors) that we need to defer to the embedder. In chromium's platform
// implementation, for instance, the tls slot is implemented using
// chromium's base::ThreadLocalStorage.
static thread_local internal::DataSourceThreadLocalState* tls_state_;
};
template <typename T, typename D>
internal::DataSourceStaticState DataSource<T, D>::static_state_;
template <typename T, typename D>
thread_local internal::DataSourceThreadLocalState* DataSource<T, D>::tls_state_;
} // namespace perfetto
// If placed at the end of a macro declaration, eats the semicolon at the end of
// the macro invocation (e.g., "MACRO(...);") to avoid warnings about extra
// semicolons.
#define PERFETTO_INTERNAL_SWALLOW_SEMICOLON() \
extern int perfetto_internal_unused
// Not needed -- only here for backwards compatibility.
// TODO(skyostil): Remove this macro.
#define PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(...) \
PERFETTO_INTERNAL_SWALLOW_SEMICOLON()
// Not needed -- only here for backwards compatibility.
// TODO(skyostil): Remove this macro.
#define PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(...) \
PERFETTO_INTERNAL_SWALLOW_SEMICOLON()
#endif // INCLUDE_PERFETTO_TRACING_DATA_SOURCE_H_