Set the correct GPU context in the continuation used in Unique

PiperOrigin-RevId: 364824284
Change-Id: I2ef887bb9f44bdbafe7ab25e72632742c5ab0242
diff --git a/tensorflow/core/kernels/unique_op_gpu.cu.cc b/tensorflow/core/kernels/unique_op_gpu.cu.cc
index 0e9d4bd..9113e9d 100644
--- a/tensorflow/core/kernels/unique_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/unique_op_gpu.cu.cc
@@ -30,7 +30,9 @@
 
 #if GOOGLE_CUDA
 #include "tensorflow/core/util/cuda_solvers.h"  // For ScratchSpace
+#include "tensorflow/stream_executor/cuda/cuda_activation.h"
 #elif TENSORFLOW_USE_ROCM
+#include "tensorflow/core/platform/rocm.h"
 #include "tensorflow/core/util/rocm_solvers.h"
 #endif
 
@@ -326,6 +328,9 @@
       const GPUDevice& device = context->eigen_gpu_device();
       int64 uniq_size = (*last_idx_host.data()) + 1;
 
+      se::cuda::ScopedActivateExecutorContext scoped_activation{
+          context->op_device_context()->stream()->parent()};
+
       Tensor unique_input_inds;
       TIndex* unique_input_inds_ptr = nullptr;
       AllocateTemp(context, uniq_size, &unique_input_inds,