| syntax = "proto3"; |
| |
| package tensorflow; |
| option cc_enable_arenas = true; |
| option java_outer_classname = "StepStatsProtos"; |
| option java_multiple_files = true; |
| option java_package = "org.tensorflow.framework"; |
| option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework"; |
| import "tensorflow/core/framework/allocation_description.proto"; |
| import "tensorflow/core/framework/tensor_description.proto"; |
| |
| // An allocation/de-allocation operation performed by the allocator. |
| message AllocationRecord { |
| // The timestamp of the operation. |
| int64 alloc_micros = 1; |
| // Number of bytes allocated, or de-allocated if negative. |
| int64 alloc_bytes = 2; |
| } |
| |
| message AllocatorMemoryUsed { |
| string allocator_name = 1; |
| // These are per-node allocator memory stats. |
| int64 total_bytes = 2; |
| int64 peak_bytes = 3; |
| // The bytes that are not deallocated. |
| int64 live_bytes = 4; |
| // The allocation and deallocation timeline. |
| repeated AllocationRecord allocation_records = 6; |
| |
| // These are snapshots of the overall allocator memory stats. |
| // The number of live bytes currently allocated by the allocator. |
| int64 allocator_bytes_in_use = 5; |
| } |
| |
| // Output sizes recorded for a single execution of a graph node. |
| message NodeOutput { |
| int32 slot = 1; |
| TensorDescription tensor_description = 3; |
| }; |
| |
| // For memory tracking. |
| message MemoryStats { |
| int64 temp_memory_size = 1; |
| int64 persistent_memory_size = 3; |
| repeated int64 persistent_tensor_alloc_ids = 5; |
| |
| int64 device_temp_memory_size = 2 [deprecated = true]; |
| int64 device_persistent_memory_size = 4 [deprecated = true]; |
| repeated int64 device_persistent_tensor_alloc_ids = 6 [deprecated = true]; |
| } |
| |
| // Time/size stats recorded for a single execution of a graph node. |
| message NodeExecStats { |
| // TODO(tucker): Use some more compact form of node identity than |
| // the full string name. Either all processes should agree on a |
| // global id (cost_id?) for each node, or we should use a hash of |
| // the name. |
| string node_name = 1; |
| int64 all_start_micros = 2; |
| int64 op_start_rel_micros = 3; |
| int64 op_end_rel_micros = 4; |
| int64 all_end_rel_micros = 5; |
| repeated AllocatorMemoryUsed memory = 6; |
| repeated NodeOutput output = 7; |
| string timeline_label = 8; |
| int64 scheduled_micros = 9; |
| uint32 thread_id = 10; |
| repeated AllocationDescription referenced_tensor = 11; |
| MemoryStats memory_stats = 12; |
| int64 all_start_nanos = 13; |
| int64 op_start_rel_nanos = 14; |
| int64 op_end_rel_nanos = 15; |
| int64 all_end_rel_nanos = 16; |
| int64 scheduled_nanos = 17; |
| }; |
| |
| message DeviceStepStats { |
| string device = 1; |
| repeated NodeExecStats node_stats = 2; |
| // Its key is thread id. |
| map<uint32, string> thread_names = 3; |
| } |
| |
| message StepStats { |
| repeated DeviceStepStats dev_stats = 1; |
| }; |