tensorflow/core/profiler/protobuf/op_stats.proto - platform/external/tensorflow - Git at Google

 syntax = "proto3";

 package tensorflow.profiler;

 import "tensorflow/core/profiler/protobuf/kernel_stats.proto";
 import "tensorflow/core/profiler/protobuf/op_metrics.proto";
 import "tensorflow/core/profiler/protobuf/steps_db.proto";
 import "tensorflow/core/profiler/protobuf/tf_function.proto";

 // Performance environment, e.g the peak performance capabilities of the device.
 message PerfEnv {
   // Peak performance of a TPU core or a GPU in TFLOP/s.
   double peak_tera_flops_per_second = 1;
   // Peak memory bandwidth of a TPU core or a GPU in GiBs/s.
   double peak_hbm_bw_giga_bytes_per_second = 2;
   // The ridge point of roofline model in FLOP/Byte. (i.e., minimum operational
   // intensity required to achieve maximum performance).
   double ridge_point = 3;
 }

 // Result proto for host-independent job information.
 message HostIndependentJobInfoResult {
   // The change-list number of this build.
   int64 change_list = 1;
   // The time of this build (nanoseconds since the Unix epoch).
   int64 build_time = 2;
   // The target of this build.
   string build_target = 3;
   // Profiling duration (in ms).
   uint32 profile_duration_ms = 4;
 }

 // Result proto for host-dependent job information.
 message HostDependentJobInfoResult {
   // This ID of the host where the job was run on.
   string host_id = 1;
   // The command line used to run the job.
   string command_line = 2;
   // The start time of this run (nanoseconds since the Unix epoch).
   int64 start_time = 3;
   // BNS address specified by client at time of profiling request.
   string bns_address = 4;
   // Profiling start walltime (in ns).
   uint64 profile_time_ns = 5;
 }

 // System topology, which describes the number of chips in a pod
 // and the connectivity style.
 message SystemTopology {
   // The X, Y, and Z dimensions of this topology. 0 means that dimension does
   // not exist.
   int64 x_dimension = 1;
   int64 y_dimension = 2;
   int64 z_dimension = 3;
   // The number of expected bad chips in this system.
   int64 num_expected_reduced_chips = 4;
 }

 // The run environment of a profiling session.
 message RunEnvironment {
   // Number of hosts used.
   int32 host_count = 1;
   // Number of tasks used.
   int32 task_count = 2;
   // Distinct hostnames seen.
   map<string, bool> hostnames = 3;
   // The type of device used.
   string device_type = 4;
   // The number of device cores used.
   //   In TPU case, this corresponds to the number of TPU cores
   //   In GPU case, this corresponds to the number of GPUs (not the number of
   //   SMs).
   int32 device_core_count = 5;
   // The per-device-core batch size.
   int32 per_core_batch_size = 6;
   // Host-independent information about this job.
   HostIndependentJobInfoResult host_independent_job_info = 7;
   // Host-dependent information about this job.
   repeated HostDependentJobInfoResult host_dependent_job_info = 8;
   // The number of replicas, corresponds to input parallelism.
   // If there is no model parallelism, replica_count = device_core_count
   int32 replica_count = 9;
   // The number of cores used for a single replica, e.g. model parallelism.
   // If there is no model parallelism, then num_cores_per_replica = 1
   int32 num_cores_per_replica = 10;
   // The chip interconnection topology.
   SystemTopology topology = 11;
   // Host trace level.
   uint32 host_trace_level = 12;
 }

 // Operator Statistics.
 message OpStats {
   // The database for the op metrics collected from the host over the entire
   // profiling session including incomplete steps.
   OpMetricsDb host_op_metrics_db = 1;
   // The database for the op metrics collected from the device over the entire
   // profiling session including incomplete steps.
   OpMetricsDb device_op_metrics_db = 2;
   // Performance environment of the op metrics collected.
   PerfEnv perf_env = 3;
   // The database of step sequences.
   StepDatabaseResult step_db = 4;
   // The run environment of this profiling session.
   RunEnvironment run_environment = 5;
   // Kernel stats results from all GPUs.
   KernelStatsDb kernel_stats_db = 6;
   // Statistics for all tf-functions.
   TfFunctionDb tf_function_db = 8;
   // Errors seen.
   repeated string errors = 7;
 }
	syntax = "proto3";

	package tensorflow.profiler;

	import "tensorflow/core/profiler/protobuf/kernel_stats.proto";
	import "tensorflow/core/profiler/protobuf/op_metrics.proto";
	import "tensorflow/core/profiler/protobuf/steps_db.proto";
	import "tensorflow/core/profiler/protobuf/tf_function.proto";

	// Performance environment, e.g the peak performance capabilities of the device.
	message PerfEnv {
	// Peak performance of a TPU core or a GPU in TFLOP/s.
	double peak_tera_flops_per_second = 1;
	// Peak memory bandwidth of a TPU core or a GPU in GiBs/s.
	double peak_hbm_bw_giga_bytes_per_second = 2;
	// The ridge point of roofline model in FLOP/Byte. (i.e., minimum operational
	// intensity required to achieve maximum performance).
	double ridge_point = 3;
	}

	// Result proto for host-independent job information.
	message HostIndependentJobInfoResult {
	// The change-list number of this build.
	int64 change_list = 1;
	// The time of this build (nanoseconds since the Unix epoch).
	int64 build_time = 2;
	// The target of this build.
	string build_target = 3;
	// Profiling duration (in ms).
	uint32 profile_duration_ms = 4;
	}

	// Result proto for host-dependent job information.
	message HostDependentJobInfoResult {
	// This ID of the host where the job was run on.
	string host_id = 1;
	// The command line used to run the job.
	string command_line = 2;
	// The start time of this run (nanoseconds since the Unix epoch).
	int64 start_time = 3;
	// BNS address specified by client at time of profiling request.
	string bns_address = 4;
	// Profiling start walltime (in ns).
	uint64 profile_time_ns = 5;
	}

	// System topology, which describes the number of chips in a pod
	// and the connectivity style.
	message SystemTopology {
	// The X, Y, and Z dimensions of this topology. 0 means that dimension does
	// not exist.
	int64 x_dimension = 1;
	int64 y_dimension = 2;
	int64 z_dimension = 3;
	// The number of expected bad chips in this system.
	int64 num_expected_reduced_chips = 4;
	}

	// The run environment of a profiling session.
	message RunEnvironment {
	// Number of hosts used.
	int32 host_count = 1;
	// Number of tasks used.
	int32 task_count = 2;
	// Distinct hostnames seen.
	map<string, bool> hostnames = 3;
	// The type of device used.
	string device_type = 4;
	// The number of device cores used.
	// In TPU case, this corresponds to the number of TPU cores
	// In GPU case, this corresponds to the number of GPUs (not the number of
	// SMs).
	int32 device_core_count = 5;
	// The per-device-core batch size.
	int32 per_core_batch_size = 6;
	// Host-independent information about this job.
	HostIndependentJobInfoResult host_independent_job_info = 7;
	// Host-dependent information about this job.
	repeated HostDependentJobInfoResult host_dependent_job_info = 8;
	// The number of replicas, corresponds to input parallelism.
	// If there is no model parallelism, replica_count = device_core_count
	int32 replica_count = 9;
	// The number of cores used for a single replica, e.g. model parallelism.
	// If there is no model parallelism, then num_cores_per_replica = 1
	int32 num_cores_per_replica = 10;
	// The chip interconnection topology.
	SystemTopology topology = 11;
	// Host trace level.
	uint32 host_trace_level = 12;
	}

	// Operator Statistics.
	message OpStats {
	// The database for the op metrics collected from the host over the entire
	// profiling session including incomplete steps.
	OpMetricsDb host_op_metrics_db = 1;
	// The database for the op metrics collected from the device over the entire
	// profiling session including incomplete steps.
	OpMetricsDb device_op_metrics_db = 2;
	// Performance environment of the op metrics collected.
	PerfEnv perf_env = 3;
	// The database of step sequences.
	StepDatabaseResult step_db = 4;
	// The run environment of this profiling session.
	RunEnvironment run_environment = 5;
	// Kernel stats results from all GPUs.
	KernelStatsDb kernel_stats_db = 6;
	// Statistics for all tf-functions.
	TfFunctionDb tf_function_db = 8;
	// Errors seen.
	repeated string errors = 7;
	}