Addressing PR feedback
diff --git a/tensorflow/core/profiler/internal/gpu/BUILD b/tensorflow/core/profiler/internal/gpu/BUILD
index 98d028b..074db67 100644
--- a/tensorflow/core/profiler/internal/gpu/BUILD
+++ b/tensorflow/core/profiler/internal/gpu/BUILD
@@ -144,7 +144,7 @@
     name = "rocm_tracer",
     srcs = if_rocm(["rocm_tracer.cc"]),
     hdrs = if_rocm(["rocm_tracer.h"]),
-    copts = tf_copts(),
+    copts = tf_profiler_copts() + tf_copts(),
     visibility = ["//visibility:public"],
     deps = [
         "//tensorflow/core:lib",
diff --git a/tensorflow/core/profiler/internal/gpu/device_tracer_test.cc b/tensorflow/core/profiler/internal/gpu/device_tracer_test.cc
index d70b902..3ba5a92 100644
--- a/tensorflow/core/profiler/internal/gpu/device_tracer_test.cc
+++ b/tensorflow/core/profiler/internal/gpu/device_tracer_test.cc
@@ -243,6 +243,7 @@
   EXPECT_GE(run_metadata.step_stats().dev_stats_size(), 1);
 }
 
+#if TENSORFLOW_USE_ROCM
 TEST_F(DeviceTracerTest, TraceToXSpace) {
   auto tracer = CreateGpuTracer();
   if (!tracer) return;
@@ -266,13 +267,8 @@
   XSpace space;
   TF_ASSERT_OK(tracer->CollectData(&space));
   // At least one gpu plane and one host plane for launching events.
-#if GOOGLE_CUDA
-  const XPlane* host_plane = FindPlaneWithName(space, kCuptiDriverApiPlaneName);
-  ASSERT_NE(host_plane, nullptr);
-#elif TENSORFLOW_USE_ROCM
   const XPlane* host_plane = FindPlaneWithName(space, kRoctracerApiPlaneName);
   ASSERT_NE(host_plane, nullptr);
-#endif
 
   const XPlane* device_plane =
       FindPlaneWithName(space, strings::StrCat(kGpuPlanePrefix, 0));
@@ -283,14 +279,6 @@
   EXPECT_GE(device_plane->event_metadata_size(), 5);
   // Check if device capacity is serialized.
   XPlaneVisitor plane = CreateTfXPlaneVisitor(device_plane);
-#if GOOGLE_CUDA
-  EXPECT_TRUE(plane.GetStat(kDevCapClockRateKHz).has_value());
-  EXPECT_TRUE(plane.GetStat(kDevCapCoreCount).has_value());
-  EXPECT_TRUE(plane.GetStat(kDevCapMemoryBandwidth).has_value());
-  EXPECT_TRUE(plane.GetStat(kDevCapMemorySize).has_value());
-  EXPECT_TRUE(plane.GetStat(kDevCapComputeCapMajor).has_value());
-  EXPECT_TRUE(plane.GetStat(kDevCapComputeCapMinor).has_value());
-#endif
 
   // Check if the device events timestamps are set.
   int total_events = 0;
@@ -303,6 +291,60 @@
   });
   EXPECT_GE(total_events, 5);
 }
+#else // TENSORFLOW_USE_ROCM
+TEST_F(DeviceTracerTest, TraceToXSpace) {
+  auto tracer = CreateGpuTracer();
+  if (!tracer) return;
+
+  Initialize({3, 2, -1, 0});
+  auto session = CreateSession();
+  ASSERT_TRUE(session != nullptr);
+  TF_ASSERT_OK(session->Create(def_));
+  std::vector<std::pair<string, Tensor>> inputs;
+
+  // Request two targets: one fetch output and one non-fetched output.
+  std::vector<string> output_names = {y_ + ":0"};
+  std::vector<string> target_nodes = {y_neg_};
+  std::vector<Tensor> outputs;
+
+  TF_ASSERT_OK(tracer->Start());
+  Status s = session->Run(inputs, output_names, target_nodes, &outputs);
+  TF_ASSERT_OK(s);
+
+  TF_ASSERT_OK(tracer->Stop());
+  XSpace space;
+  TF_ASSERT_OK(tracer->CollectData(&space));
+  // At least one gpu plane and one host plane for launching events.
+  const XPlane* host_plane = FindPlaneWithName(space, kCuptiDriverApiPlaneName);
+  ASSERT_NE(host_plane, nullptr);
+
+  const XPlane* device_plane =
+      FindPlaneWithName(space, strings::StrCat(kGpuPlanePrefix, 0));
+  ASSERT_NE(device_plane, nullptr);  // Check if device plane is serialized.
+  // one for MemcpyH2D, one for MemcpyD2H, two for Matmul (one from Eigen, one
+  // from cudnn), one for memset.
+  EXPECT_EQ(device_plane->event_metadata_size(), 5);
+  // Check if device capacity is serialized.
+  XPlaneVisitor plane = CreateTfXPlaneVisitor(device_plane);
+  EXPECT_TRUE(plane.GetStat(kDevCapClockRateKHz).has_value());
+  EXPECT_TRUE(plane.GetStat(kDevCapCoreCount).has_value());
+  EXPECT_TRUE(plane.GetStat(kDevCapMemoryBandwidth).has_value());
+  EXPECT_TRUE(plane.GetStat(kDevCapMemorySize).has_value());
+  EXPECT_TRUE(plane.GetStat(kDevCapComputeCapMajor).has_value());
+  EXPECT_TRUE(plane.GetStat(kDevCapComputeCapMinor).has_value());
+
+  // Check if the device events timestamps are set.
+  int total_events = 0;
+  plane.ForEachLine([&](const tensorflow::profiler::XLineVisitor& line) {
+    line.ForEachEvent([&](const tensorflow::profiler::XEventVisitor& event) {
+      EXPECT_GT(event.TimestampNs(), 0);
+      EXPECT_GT(event.DurationNs(), 0);
+      ++total_events;
+    });
+  });
+  EXPECT_GE(total_events, 5);
+}
+#endif // TENSORFLOW_USE_ROCM
 
 #if GOOGLE_CUDA
 TEST_F(DeviceTracerTest, CudaRuntimeResource) {