blob: af0008186bc128a7940ab8fb834bd2b355df058f [file] [log] [blame]
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/profiler/utils/xplane_schema.h"
#include "absl/container/flat_hash_map.h"
#include "absl/strings/string_view.h"
#include "tensorflow/core/lib/gtl/map_util.h"
namespace tensorflow {
namespace profiler {
const absl::string_view kHostThreads = "/host:CPU";
const absl::string_view kGpuPlanePrefix = "/device:GPU:";
const absl::string_view kCuptiDriverApiPlaneName = "/host:CUPTI";
const absl::string_view kMetadataPlane = "/host:metadata";
const absl::string_view kTFStreamzPlane = "/host:tfstreamz";
const absl::string_view kStepLineName = "Steps";
const absl::string_view kTensorFlowNameScopeLineName = "TensorFlow Name Scope";
const absl::string_view kTensorFlowOpLineName = "TensorFlow Ops";
const absl::string_view kXlaModuleLineName = "XLA Modules";
const absl::string_view kXlaOpLineName = "XLA Ops";
const absl::string_view kKernelLaunchLineName = "Launch Stats";
const int32 kHostPlaneId = 49;
const int32 kGpuPlaneBaseId = 0;
const int32 kCuptiDriverApiPlaneId = 50;
const int32 kMetadataPlaneId = 99;
const int32 kTFStreamzPlaneId = 98;
const int32 kThreadGroupMinPlaneId = kCuptiDriverApiPlaneId + 1;
const int32 kThreadGroupMaxPlaneId = kTFStreamzPlaneId - 1;
namespace {
constexpr int kNumHostEventTypes =
HostEventType::kLastHostEventType - HostEventType::kFirstHostEventType + 1;
constexpr int kNumStatTypes =
StatType::kLastStatType - StatType::kFirstStatType + 1;
using HostEventTypeMap = absl::flat_hash_map<absl::string_view, HostEventType>;
using HostEventTypeStrMap =
absl::flat_hash_map<HostEventType, absl::string_view>;
using StatTypeMap = absl::flat_hash_map<absl::string_view, StatType>;
using StatTypeStrMap = absl::flat_hash_map<StatType, absl::string_view>;
const HostEventTypeMap& GetHostEventTypeMap() {
static auto* host_event_type_map = new HostEventTypeMap({
{"UnknownHostEventType", kUnknownHostEventType},
{"TraceContext", kTraceContext},
{"SessionRun", kSessionRun},
{"FunctionRun", kFunctionRun},
{"RunGraph", kRunGraph},
{"RunGraphDone", kRunGraphDone},
{"TfOpRun", kTfOpRun},
{"EagerKernelExecute", kEagerKernelExecute},
{"ExecutorState::Process", kExecutorStateProcess},
{"ExecutorDoneCallback", kExecutorDoneCallback},
{"MemoryAllocation", kMemoryAllocation},
{"MemoryDeallocation", kMemoryDeallocation},
// Performance counter related.
{"RemotePerfCounter", kRemotePerf},
// tf data captured function events.
{"InstantiatedCapturedFunction::Run", kTfDataCapturedFunctionRun},
{"InstantiatedCapturedFunction::RunWithBorrowedArgs",
kTfDataCapturedFunctionRunWithBorrowedArgs},
{"InstantiatedCapturedFunction::RunInstantiated",
kTfDataCapturedFunctionRunInstantiated},
{"InstantiatedCapturedFunction::RunAsync",
kTfDataCapturedFunctionRunAsync},
// Functional ops.
{"CallOp", kCallOp},
{"ParallelForOp", kParallelForOp},
{"ForeverOp", kForeverOp},
{"NumericalGradientOp-EvalRight", kNumericalGradientOpEvalRight},
{"NumericalGradientOp-EvalLeft", kNumericalGradientOpEvalLeft},
{"SymbolicGradientOp", kSymbolicGradientOp},
{"RemoteCallOp", kRemoteCallOp},
{"IfOp", kIfOp},
{"CaseOp", kCaseOp},
{"WhileOp-EvalCond", kWhileOpEvalCond},
{"WhileOp-StartBody", kWhileOpStartBody},
{"ForOp", kForOp},
{"PartitionedCallOp", kPartitionedCallOp},
// XLA related.
{"LocalExecutable::ExecuteOnLocalDevices",
kLocalExecutableExecuteOnLocalDevice},
{"LocalExecutable::Execute", kLocalExecutableExecute},
// tf.data related.
{"IteratorGetNextOp::DoCompute", kIteratorGetNextOp},
// Virtual events for grouping.
{"HostTrainingLoopIteration", kHostTrainingLoopIteration},
{"AsyncExecutorTraceContext", kAsyncExecutorTraceContext},
// GPU related.
{"KernelLaunch", kKernelLaunch},
{"KernelExecute", kKernelExecute},
});
DCHECK_EQ(host_event_type_map->size(), kNumHostEventTypes);
return *host_event_type_map;
}
const StatTypeMap& GetStatTypeMap() {
static auto* stat_type_map = new StatTypeMap({
{"UnknownStatType", kUnknownStatType},
// TraceMe arguments.
{"id", kStepId},
{"parent_step_id", kParentStepId},
{"function_step_id", kFunctionStepId},
{"device_ordinal", kDeviceOrdinal},
{"chip_ordinal", kChipOrdinal},
{"node_ordinal", kNodeOrdinal},
{"model_id", kModelId},
{"queue_addr", kQueueAddr},
{"request_id", kRequestId},
{"run_id", kRunId},
{"graph_type", kGraphType},
{"step_num", kStepNum},
{"iter_num", kIterNum},
{"index_on_host", kIndexOnHost},
{"allocator_name", kAllocatorName},
{"bytes_reserved", kBytesReserved},
{"bytes_allocated", kBytesAllocated},
{"bytes_available", kBytesAvailable},
{"fragmentation", kFragmentation},
{"peak_bytes_in_use", kPeakBytesInUse},
{"requested_bytes", kRequestedBytes},
{"allocation_bytes", kAllocationBytes},
{"addr", kAddress},
{"region_type", kRegionType},
{"data_type", kDataType},
{"shape", kTensorShapes},
// Device trace arguments.
{"device_id", kDeviceId},
{"context_id", kContextId},
{"correlation_id", kCorrelationId},
{"memcpy_details", kMemcpyDetails},
{"memalloc_details", kMemallocDetails},
{"kernel_details", kKernelDetails},
{"annotation", kKernelAnnotation},
{"stream", kStream},
// Stats added when processing traces.
{"group_id", kGroupId},
{"step_name", kStepName},
{"level 0", kLevel0},
{"tf_op", kTfOp},
{"hlo_op", kHloOp},
{"hlo_module", kHloModule},
{"equation", kEquation},
{"is_eager", kIsEager},
{"tf_function_call", kTfFunctionCall},
{"tracing_count", kTfFunctionTracingCount},
// Performance counter related.
{"Raw Value", kRawValue},
{"Scaled Value", kScaledValue},
{"Thread Id", kThreadId},
// XLA metadata map related.
{"SELF_DURATION_PS", kSelfDurationPs},
{"MIN_DURATION_PS", kMinDurationPs},
{"Hlo Proto", kHloProto},
// Device capability related.
{"clock_rate", kDevCapClockRateKHz},
{"core_count", kDevCapCoreCount},
{"memory_bandwidth", kDevCapMemoryBandwidth},
{"memory_size", kDevCapMemorySize},
{"compute_cap_major", kDevCapComputeCapMajor},
{"compute_cap_minor", kDevCapComputeCapMinor},
});
DCHECK_EQ(stat_type_map->size(), kNumStatTypes);
return *stat_type_map;
}
const HostEventTypeStrMap& GetHostEventTypeStrMap() {
static auto* host_event_type_str_map = new HostEventTypeStrMap(
gtl::ReverseMap<HostEventTypeStrMap>(GetHostEventTypeMap()));
return *host_event_type_str_map;
}
const StatTypeStrMap& GetStatTypeStrMap() {
static auto* stat_type_str_map =
new StatTypeStrMap(gtl::ReverseMap<StatTypeStrMap>(GetStatTypeMap()));
return *stat_type_str_map;
}
} // namespace
absl::string_view GetHostEventTypeStr(HostEventType event_type) {
return GetHostEventTypeStrMap().at(event_type);
}
absl::optional<int64> FindHostEventType(absl::string_view event_name) {
if (auto event_type = gtl::FindOrNull(GetHostEventTypeMap(), event_name)) {
return *event_type;
}
return absl::nullopt;
}
absl::string_view GetStatTypeStr(StatType stat_type) {
return GetStatTypeStrMap().at(stat_type);
}
absl::optional<int64> FindStatType(absl::string_view stat_name) {
if (auto stat_type = gtl::FindOrNull(GetStatTypeMap(), stat_name)) {
return *stat_type;
}
return absl::nullopt;
}
} // namespace profiler
} // namespace tensorflow