Set the correct GPU context in the continuation used in Unique
PiperOrigin-RevId: 364824284
Change-Id: I2ef887bb9f44bdbafe7ab25e72632742c5ab0242
diff --git a/tensorflow/core/kernels/unique_op_gpu.cu.cc b/tensorflow/core/kernels/unique_op_gpu.cu.cc
index 0e9d4bd..9113e9d 100644
--- a/tensorflow/core/kernels/unique_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/unique_op_gpu.cu.cc
@@ -30,7 +30,9 @@
#if GOOGLE_CUDA
#include "tensorflow/core/util/cuda_solvers.h" // For ScratchSpace
+#include "tensorflow/stream_executor/cuda/cuda_activation.h"
#elif TENSORFLOW_USE_ROCM
+#include "tensorflow/core/platform/rocm.h"
#include "tensorflow/core/util/rocm_solvers.h"
#endif
@@ -326,6 +328,9 @@
const GPUDevice& device = context->eigen_gpu_device();
int64 uniq_size = (*last_idx_host.data()) + 1;
+ se::cuda::ScopedActivateExecutorContext scoped_activation{
+ context->op_device_context()->stream()->parent()};
+
Tensor unique_input_inds;
TIndex* unique_input_inds_ptr = nullptr;
AllocateTemp(context, uniq_size, &unique_input_inds,