Adding hooks in the Stream Executor API to get/set the AMDGPU gcnArchName device property
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc
index 50647b8..1fc99c5 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_device.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc
@@ -1770,15 +1770,11 @@
<< strings::HumanReadableNumBytes(description->memory_bandwidth())
<< "/s";
#elif TENSORFLOW_USE_ROCM
- int isa_version;
- if (!description->rocm_amdgpu_isa_version(&isa_version)) {
- // Logs internally on failure.
- isa_version = 0;
- }
+ std::string gcn_arch_name = description->rocm_amdgpu_gcn_arch_name();
LOG(INFO) << "Found device " << i << " with properties: "
<< "\npciBusID: " << description->pci_bus_id()
<< " name: " << description->name()
- << " ROCm AMD GPU ISA: gfx" << isa_version
+ << " ROCm AMDGPU Arch: " << gcn_arch_name
<< "\ncoreClock: " << description->clock_rate_ghz() << "GHz"
<< " coreCount: " << description->core_count()
<< " deviceMemorySize: "
diff --git a/tensorflow/stream_executor/cuda/cuda_driver.cc b/tensorflow/stream_executor/cuda/cuda_driver.cc
index 42db563..2b64db1 100644
--- a/tensorflow/stream_executor/cuda/cuda_driver.cc
+++ b/tensorflow/stream_executor/cuda/cuda_driver.cc
@@ -1388,6 +1388,13 @@
"Feature not supported on CUDA platform (GetGpuISAVersion)"};
}
+/* static */ port::Status GpuDriver::GetGpuGCNArchName(
+ CUdevice device, std::string* gcnArchName) {
+ return port::Status{
+ port::error::INTERNAL,
+ "Feature not supported on CUDA platform (GetGpuGCNArchName)"};
+}
+
// Helper function that turns the integer output of cuDeviceGetAttribute to type
// T and wraps it in a StatusOr.
template <typename T>
diff --git a/tensorflow/stream_executor/device_description.cc b/tensorflow/stream_executor/device_description.cc
index 130e2e6..4ffe02e 100644
--- a/tensorflow/stream_executor/device_description.cc
+++ b/tensorflow/stream_executor/device_description.cc
@@ -51,6 +51,7 @@
cuda_compute_capability_major_(-1),
cuda_compute_capability_minor_(-1),
rocm_amdgpu_isa_version_(-1),
+ rocm_amdgpu_gcn_arch_name_(kUndefinedString),
numa_node_(-1),
core_count_(-1),
ecc_enabled_(false) {}
@@ -95,6 +96,8 @@
result["CUDA Compute Capability"] = absl::StrCat(
cuda_compute_capability_major_, ".", cuda_compute_capability_minor_);
+ result["AMDGPU GCN Arch Name"] = absl::StrCat(rocm_amdgpu_gcn_arch_name_);
+
result["NUMA Node"] = absl::StrCat(numa_node());
result["Core Count"] = absl::StrCat(core_count());
result["ECC Enabled"] = absl::StrCat(ecc_enabled());
diff --git a/tensorflow/stream_executor/device_description.h b/tensorflow/stream_executor/device_description.h
index fa7426e..fef4be4 100644
--- a/tensorflow/stream_executor/device_description.h
+++ b/tensorflow/stream_executor/device_description.h
@@ -138,6 +138,13 @@
// and the return value will be false.
bool rocm_amdgpu_isa_version(int *version) const;
+ // Returns the
+ // * AMDGPU GCN Architecture Name if we're running on the ROCm platform.
+ // * kUndefinedString otherwise
+ const std::string rocm_amdgpu_gcn_arch_name() const {
+ return rocm_amdgpu_gcn_arch_name_;
+ }
+
// Returns the maximum amount of shared memory present on a single core
// (i.e. Streaming Multiprocessor on NVIDIA GPUs; Compute Unit for OpenCL
// devices). Note that some devices, such as NVIDIA's have a configurable
@@ -203,6 +210,9 @@
// ROCM AMDGPU ISA version, 0 if not available.
int rocm_amdgpu_isa_version_;
+ // ROCm AMDGPU GCN Architecture name, "" if not available.
+ std::string rocm_amdgpu_gcn_arch_name_;
+
int numa_node_;
int core_count_;
bool ecc_enabled_;
@@ -294,6 +304,10 @@
device_description_->rocm_amdgpu_isa_version_ = version;
}
+ void set_rocm_amdgpu_gcn_arch_name(const std::string& gcn_arch_name) {
+ device_description_->rocm_amdgpu_gcn_arch_name_ = gcn_arch_name;
+ }
+
void set_numa_node(int value) { device_description_->numa_node_ = value; }
void set_core_count(int value) { device_description_->core_count_ = value; }
void set_ecc_enabled(bool value) {
diff --git a/tensorflow/stream_executor/gpu/gpu_driver.h b/tensorflow/stream_executor/gpu/gpu_driver.h
index 3cd13dc..955ed59 100644
--- a/tensorflow/stream_executor/gpu/gpu_driver.h
+++ b/tensorflow/stream_executor/gpu/gpu_driver.h
@@ -460,6 +460,12 @@
// (supported on ROCm only)
static port::Status GetGpuISAVersion(int* version, GpuDeviceHandle device);
+ // Return the full GCN Architecture Name for the the device
+ // for eg: amdgcn-amd-amdhsa--gfx908:sramecc+:xnack-
+ // (supported on ROCm only)
+ static port::Status GetGpuGCNArchName(GpuDeviceHandle device,
+ std::string* gcnArchName);
+
// Returns the number of multiprocessors on the device (note that the device
// may be multi-GPU-per-board).
static port::StatusOr<int> GetMultiprocessorCount(GpuDeviceHandle device);
diff --git a/tensorflow/stream_executor/rocm/rocm_driver.cc b/tensorflow/stream_executor/rocm/rocm_driver.cc
index a070979..f7be297 100644
--- a/tensorflow/stream_executor/rocm/rocm_driver.cc
+++ b/tensorflow/stream_executor/rocm/rocm_driver.cc
@@ -1080,6 +1080,21 @@
device)};
}
+/* static */ port::Status GpuDriver::GetGpuGCNArchName(
+ hipDevice_t device, std::string* gcnArchName) {
+ hipDeviceProp_t props;
+ hipError_t result = tensorflow::wrap::hipGetDeviceProperties(&props, device);
+ if (result == hipSuccess) {
+ *gcnArchName = props.gcnArchName;
+ return port::Status::OK();
+ }
+ *gcnArchName = "";
+ return port::Status{
+ port::error::INTERNAL,
+ absl::StrFormat("failed to determine AMDGpu GCN Arch Name for device %d",
+ device)};
+}
+
// Helper function that turns the integer output of hipDeviceGetAttribute to
// type T and wraps it in a StatusOr.
template <typename T>
diff --git a/tensorflow/stream_executor/rocm/rocm_gpu_executor.cc b/tensorflow/stream_executor/rocm/rocm_gpu_executor.cc
index dbab030..3926aee 100644
--- a/tensorflow/stream_executor/rocm/rocm_gpu_executor.cc
+++ b/tensorflow/stream_executor/rocm/rocm_gpu_executor.cc
@@ -820,6 +820,12 @@
return status;
}
+ string gcn_arch_name;
+ status = GpuDriver::GetGpuGCNArchName(device, &gcn_arch_name);
+ if (!status.ok()) {
+ return status;
+ }
+
internal::DeviceDescriptionBuilder builder;
{
@@ -888,7 +894,7 @@
}
builder.set_platform_version(
- absl::StrCat("AMDGPU ISA version: gfx", version));
+ absl::StrCat("AMDGPU ISA version: ", gcn_arch_name));
// TODO(leary) should be a way to query this from the driver, but this is
// unlikely to change for us any time soon.
@@ -896,6 +902,8 @@
builder.set_device_vendor("Advanced Micro Devices, Inc");
builder.set_rocm_amdgpu_isa_version(version);
+ builder.set_rocm_amdgpu_gcn_arch_name(gcn_arch_name);
+
builder.set_shared_memory_per_core(
GpuDriver::GetMaxSharedMemoryPerCore(device).ValueOrDie());
builder.set_shared_memory_per_block(
diff --git a/tensorflow/stream_executor/tpu/c_api_decl.h b/tensorflow/stream_executor/tpu/c_api_decl.h
index 71a725f..95af730 100644
--- a/tensorflow/stream_executor/tpu/c_api_decl.h
+++ b/tensorflow/stream_executor/tpu/c_api_decl.h
@@ -140,6 +140,7 @@
int cuda_compute_capability_minor;
int rocm_amdgpu_isa_version;
+ char* rocm_amdgpu_gcn_arch_name;
int numa_node;
int core_count;