blob: 007b68e9482c0299b725a6a013ff4328e3a17761 [file] [log] [blame]
syntax = "proto3";
package tensorflow;
import "tensorflow/core/profiler/profiler_service_monitor_result.proto";
// The ProfilerService service retrieves performance information about
// the programs running on connected devices over a period of time.
service ProfilerService {
// Starts a profiling session, blocks until it completes, and returns data.
rpc Profile(ProfileRequest) returns (ProfileResponse) {}
// Collects profiling data and returns user-friendly metrics.
rpc Monitor(MonitorRequest) returns (MonitorResponse) {}
}
message ProfileOptions {
// Some default value of option are not proto3 default value. Use this version
// to determine if we should use default option value instead of proto3
// default value.
uint32 version = 5;
// We don't collect the dataset ops by default for better trace-viewer
// scalability. The caller can mannually set this field to include the ops.
bool include_dataset_ops = 1;
// Levels of host tracing: (version >= 1)
// - Level 0 is used to disable host traces.
// - Level 1 enables tracing of only user instrumented (or default) TraceMe.
// - Level 2 enables tracing of all level 1 TraceMe(s) and instrumented high
// level program execution details (expensive TF ops, XLA ops, etc).
// This is the default.
// - Level 3 enables tracing of all level 2 TraceMe(s) and more verbose
// (low-level) program execution details (cheap TF ops, etc).
uint32 host_tracer_level = 2;
// Levels of device tracing: (version >= 1)
// - Level 0 is used to disable device traces.
// - Level 1 is used to enable device traces.
// - More levels might be defined for specific device for controlling the
// verbosity of the trace.
uint32 device_tracer_level = 3;
// Whether enable python function calls tracing. Runtime overhead ensues if
// enabled. Default off. (version >= 1)
uint32 python_tracer_level = 4;
// next-field: 6
}
message ToolRequestOptions {
// Required formats for the tool, it should be one of "json", "proto", "raw"
// etc. If not specified (backward compatible), use default format, i.e. most
// tools use json format.
string output_formats = 2;
// Whether save the result directly to repository or pass it back to caller.
// Default to false for backward compatibilities.
bool save_to_repo = 3;
}
message ProfileRequest {
// In future, the caller will be able to customize when profiling starts and
// stops. For now, it collects `duration_ms` milliseconds worth of data.
uint64 duration_ms = 1;
// The maximum number of events to return. By default (value 0), return all
// events.
uint64 max_events = 2;
// Required profiling tools name such as "input_pipeline_analyzer" etc
repeated string tools = 3;
// Specifies the requirement for each tools.
map<string, ToolRequestOptions> tool_options = 8;
// Optional profiling options that control how a TF session will be profiled.
ProfileOptions opts = 4;
// The place where we will dump profile data. We will normally use
// MODEL_DIR/plugin/profile/ as our repository root.
string repository_root = 5;
// The user provided profile session identifier.
string session_id = 6;
// The hostname of system where the profile should happen.
// We use it as identifier in part of our output filename.
string host_name = 7;
// In future, the caller will indicate which TF session is being profiled, and
// only data relating to that program will be returned. For now, we assume
// all activity during the profiling period is relevant.
// next-field: 9
}
message ProfileToolData {
// The file name which this data is associated (e.g. "input_pipeline.json",
// "cluster_xxx.memory_viewer.json").
string name = 1;
// The data payload (likely json) for the specific tool.
bytes data = 2;
}
message ProfileResponse {
// Data payload for each required tools.
repeated ProfileToolData tool_data = 6;
// When we write profiling data directly to repository directory, we need a
// way to figure out whether the captured trace is empty.
bool empty_trace = 7;
reserved 1, 2, 3, 4, 5;
// next-field: 8
}
message MonitorRequest {
// Duration for which to profile between each update.
uint64 duration_ms = 1;
// Indicates the level at which we want to monitor. Currently, two levels are
// supported:
// Level 1: An ultra lightweight mode that captures only some utilization
// metrics.
// Level 2: More verbose than level 1. Collects utilization metrics, device
// information, step time information, etc. Do not use this option if the TPU
// host is being very heavily used.
int32 monitoring_level = 2;
// True to display timestamp in monitoring result.
bool timestamp = 3;
// next-field: 4
}
message MonitorResponse {
// Properly formatted string data that can be directly returned back to user.
string data = 1;
// A collection of monitoring results for each field show in data.
ProfilerServiceMonitorResult monitor_result = 10;
reserved 2, 3, 4, 5, 6, 7, 8, 9;
// next-field: 11
}