Changed GPUOperation interface.
AddToQueue changed to non virtual method.
New virtual methods BindArguments and GetGridSize.
Using default Tuning method for majority of ops.
PiperOrigin-RevId: 321427369
Change-Id: I7186945a1f9e744c9ea6ec0c8d29612622845c77
diff --git a/tensorflow/lite/delegates/gpu/cl/inference_context.cc b/tensorflow/lite/delegates/gpu/cl/inference_context.cc
index 7ff10f1..9e57dd1 100644
--- a/tensorflow/lite/delegates/gpu/cl/inference_context.cc
+++ b/tensorflow/lite/delegates/gpu/cl/inference_context.cc
@@ -197,6 +197,7 @@
RETURN_IF_ERROR(AllocateMemory(env->device(), creation_context.context));
BindMemoryToOperations();
RETURN_IF_ERROR(Compile(creation_context));
+ RETURN_IF_ERROR(UpdateParams());
TuningParameters tuning_parameters;
tuning_parameters.queue = env->profiling_queue();
@@ -554,6 +555,13 @@
return absl::OkStatus();
}
+absl::Status InferenceContext::UpdateParams() {
+ for (auto& node : nodes_) {
+ RETURN_IF_ERROR(node.operations[0]->UpdateParams());
+ }
+ return absl::OkStatus();
+}
+
absl::Status InferenceContext::AddToQueue(CLCommandQueue* queue) {
if (need_manual_release_) {
if (prev_enqueue_start_point_.is_valid()) {
diff --git a/tensorflow/lite/delegates/gpu/cl/inference_context.h b/tensorflow/lite/delegates/gpu/cl/inference_context.h
index 7536525..3f05026 100644
--- a/tensorflow/lite/delegates/gpu/cl/inference_context.h
+++ b/tensorflow/lite/delegates/gpu/cl/inference_context.h
@@ -114,6 +114,7 @@
void BindMemoryToOperations();
absl::Status Compile(const CreationContext& creation_context);
absl::Status Tune(const TuningParameters& tuning_parameters);
+ absl::Status UpdateParams();
// performance hacks
bool need_flush_ = false;
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/cl_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/cl_test.cc
index deb0ebf..f864a73 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/cl_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/cl_test.cc
@@ -56,6 +56,7 @@
}
RETURN_IF_ERROR(operation->Compile(creation_context));
+ RETURN_IF_ERROR(operation->UpdateParams());
RETURN_IF_ERROR(operation->AddToQueue(creation_context.queue));
RETURN_IF_ERROR(creation_context.queue->WaitForCompletion());
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/concat_xy.cc b/tensorflow/lite/delegates/gpu/cl/kernels/concat_xy.cc
index 0a84d8a..9feb3ac 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/concat_xy.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/concat_xy.cc
@@ -154,9 +154,7 @@
RETURN_IF_ERROR(
args_.SetObjectRef("src_tensor_" + std::to_string(i), src_[i]));
}
- RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0]));
- RETURN_IF_ERROR(SetArguments(linked_operations_, &args_));
- return args_.Bind(kernel_.kernel());
+ return args_.SetObjectRef("dst_tensor", dst_[0]);
}
int3 ConcatXY::GetGridSize() const {
@@ -166,16 +164,6 @@
return int3(grid_x, grid_y, grid_z);
}
-absl::Status ConcatXY::Tune(const TuningParameters& params) {
- RETURN_IF_ERROR(BindArguments());
- return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
-}
-
-absl::Status ConcatXY::AddToQueue(CLCommandQueue* queue) {
- RETURN_IF_ERROR(BindArguments());
- return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
-}
-
ConcatXY CreateConcatXY(const OperationDef& definition,
const ConcatAttributes& attr, int tensors_count) {
return ConcatXY(definition, attr, tensors_count);
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/concat_xy.h b/tensorflow/lite/delegates/gpu/cl/kernels/concat_xy.h
index a82ffb2..011d8fb 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/concat_xy.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/concat_xy.h
@@ -31,10 +31,9 @@
ConcatXY(const OperationDef& definition, const ConcatAttributes& attr,
int tensors_count)
: GPUOperation(definition), attr_(attr), tensors_count_(tensors_count) {}
- absl::Status AddToQueue(CLCommandQueue* queue) override;
- absl::Status Tune(const TuningParameters& params) override;
-
absl::Status Compile(const CreationContext& creation_context) override;
+ absl::Status BindArguments() override;
+ int3 GetGridSize() const override;
// Move only
ConcatXY(ConcatXY&& operation);
@@ -43,9 +42,6 @@
ConcatXY& operator=(const ConcatXY&) = delete;
private:
- absl::Status BindArguments();
- int3 GetGridSize() const;
-
ConcatAttributes attr_;
int tensors_count_;
};
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/concat_z.cc b/tensorflow/lite/delegates/gpu/cl/kernels/concat_z.cc
index 93bc7b4..7878919 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/concat_z.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/concat_z.cc
@@ -176,9 +176,7 @@
RETURN_IF_ERROR(
args_.SetObjectRef("src_tensor_" + std::to_string(i), src_[i]));
}
- RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0]));
- RETURN_IF_ERROR(SetArguments(linked_operations_, &args_));
- return args_.Bind(kernel_.kernel());
+ return args_.SetObjectRef("dst_tensor", dst_[0]);
}
int3 ConcatZ::GetGridSize() const {
@@ -188,16 +186,6 @@
return int3(grid_x, grid_y, grid_z);
}
-absl::Status ConcatZ::Tune(const TuningParameters& params) {
- RETURN_IF_ERROR(BindArguments());
- return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
-}
-
-absl::Status ConcatZ::AddToQueue(CLCommandQueue* queue) {
- RETURN_IF_ERROR(BindArguments());
- return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
-}
-
ConcatZ CreateConcatZ(const OperationDef& definition,
const std::vector<int>& channels) {
return ConcatZ(definition, channels);
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/concat_z.h b/tensorflow/lite/delegates/gpu/cl/kernels/concat_z.h
index 6595432..496b943 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/concat_z.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/concat_z.h
@@ -32,10 +32,9 @@
public:
ConcatZ(const OperationDef& definition, const std::vector<int>& channels)
: GPUOperation(definition), channels_(channels) {}
- absl::Status AddToQueue(CLCommandQueue* queue) override;
- absl::Status Tune(const TuningParameters& params) override;
-
absl::Status Compile(const CreationContext& creation_context) override;
+ absl::Status BindArguments() override;
+ int3 GetGridSize() const override;
// Move only
ConcatZ(ConcatZ&& kernel);
@@ -44,9 +43,6 @@
ConcatZ& operator=(const ConcatZ&) = delete;
private:
- absl::Status BindArguments();
- int3 GetGridSize() const;
-
std::vector<int> channels_;
};
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.cc
index 1d9eaef..788b56c 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.cc
@@ -67,6 +67,7 @@
definition_.IsBatchSupported() && stride_.x != 1;
std::string code =
GenerateConv3D(definition_, stride_correction, conv_params_, &args_);
+ work_group_size_ = conv_params_.work_group_size;
std::string element_wise_code;
RETURN_IF_ERROR(
MergeOperations(linked_operations_, &args_, &element_wise_code));
@@ -105,11 +106,8 @@
RETURN_IF_ERROR(args_.SetInt("kernel_size_z", kernel_size_.z));
RETURN_IF_ERROR(args_.SetInt("dilation_z", dilation_.z));
}
- RETURN_IF_ERROR(args_.SetInt(
- "grid_size_s",
- DivideRoundUp(dst_[0]->Slices(), conv_params_.block_size.w)));
- RETURN_IF_ERROR(SetArguments(linked_operations_, &args_));
- return args_.Bind(kernel_.kernel());
+ return args_.SetInt("grid_size_s", DivideRoundUp(dst_[0]->Slices(),
+ conv_params_.block_size.w));
}
int3 Conv3D::GetGridSize() const {
@@ -142,19 +140,14 @@
if (conv_params_.work_group_launch_order[0] == 0 &&
conv_params_.work_group_launch_order[1] == 1 &&
conv_params_.work_group_launch_order[2] == 2) {
- RETURN_IF_ERROR(BindArguments());
- return GetBestWorkGroupConv(params, kernel_, GetGridSize(),
- &conv_params_.work_group_size);
+ RETURN_IF_ERROR(args_.Bind(kernel_.kernel()));
+ RETURN_IF_ERROR(GetBestWorkGroupConv(params, kernel_, grid_size_,
+ &conv_params_.work_group_size));
+ work_group_size_ = conv_params_.work_group_size;
}
return absl::OkStatus();
}
-absl::Status Conv3D::AddToQueue(CLCommandQueue* queue) {
- RETURN_IF_ERROR(BindArguments());
- return queue->DispatchImplicit(kernel_, GetGridSize(),
- conv_params_.work_group_size);
-}
-
namespace {
std::string GenerateUploadByThreads(const std::string& local_ptr_name,
const std::string& global_ptr_name,
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.h
index 501aa05..720f1ed 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.h
@@ -39,9 +39,10 @@
class Conv3D : public GPUOperation {
public:
Conv3D() = default;
- absl::Status AddToQueue(CLCommandQueue* queue) override;
absl::Status Tune(const TuningParameters& params) override;
absl::Status Compile(const CreationContext& creation_context) override;
+ absl::Status BindArguments() override;
+ int3 GetGridSize() const override;
// Move only
Conv3D(Conv3D&& operation);
@@ -105,9 +106,6 @@
int dst_slices, bool x_kernel_is_1,
bool y_kernel_is_1, bool z_kernel_is_1) const;
- absl::Status BindArguments();
- int3 GetGridSize() const;
-
int3 stride_;
int3 padding_;
int3 kernel_size_;
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc
index 6fab26a..9007155 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc
@@ -293,6 +293,7 @@
absl::Status ConvBuffer1x1::Compile(const CreationContext& creation_context) {
std::string code = GenerateConvBuffer1x1(definition_, conv_params_, &args_);
+ work_group_size_ = conv_params_.work_group_size;
std::string element_wise_code;
RETURN_IF_ERROR(
MergeOperations(linked_operations_, &args_, &element_wise_code));
@@ -310,9 +311,7 @@
RETURN_IF_ERROR(args_.SetObjectRef("weights", src_[1]));
}
RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0]));
- RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0]));
- RETURN_IF_ERROR(SetArguments(linked_operations_, &args_));
- return args_.Bind(kernel_.kernel());
+ return args_.SetObjectRef("dst_tensor", dst_[0]);
}
int3 ConvBuffer1x1::GetGridSize() const {
@@ -328,15 +327,11 @@
}
absl::Status ConvBuffer1x1::Tune(const TuningParameters& params) {
- RETURN_IF_ERROR(BindArguments());
- return GetBestWorkGroupConv(params, kernel_, GetGridSize(),
- &conv_params_.work_group_size);
-}
-
-absl::Status ConvBuffer1x1::AddToQueue(CLCommandQueue* queue) {
- RETURN_IF_ERROR(BindArguments());
- return queue->DispatchImplicit(kernel_, GetGridSize(),
- conv_params_.work_group_size);
+ RETURN_IF_ERROR(args_.Bind(kernel_.kernel()));
+ RETURN_IF_ERROR(GetBestWorkGroupConv(params, kernel_, grid_size_,
+ &conv_params_.work_group_size));
+ work_group_size_ = conv_params_.work_group_size;
+ return absl::OkStatus();
}
bool IsConvBuffer1x1Supported(const OperationDef& definition,
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.h
index 1be023f..9f549d3 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.h
@@ -47,9 +47,10 @@
ConvBuffer1x1(const ConvBuffer1x1&) = delete;
ConvBuffer1x1& operator=(const ConvBuffer1x1&) = delete;
- absl::Status AddToQueue(CLCommandQueue* queue) override;
absl::Status Tune(const TuningParameters& params) override;
absl::Status Compile(const CreationContext& creation_context) override;
+ absl::Status BindArguments() override;
+ int3 GetGridSize() const override;
ConvWeightsDescription GetConvWeightsDescription() const {
ConvWeightsDescription desc;
@@ -106,9 +107,6 @@
absl::Status UploadBiases(const tflite::gpu::Tensor<Linear, T>& biases,
CLContext* context);
- absl::Status BindArguments();
- int3 GetGridSize() const;
-
ConvParams conv_params_;
};
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc
index e2d0e82..83c4300 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc
@@ -251,9 +251,7 @@
RETURN_IF_ERROR(args_.SetInt("padding_x", padding_.x * src_[0]->Batch()));
RETURN_IF_ERROR(args_.SetInt("padding_y", padding_.y));
RETURN_IF_ERROR(args_.SetInt("dilation_x", dilation_.x * src_[0]->Batch()));
- RETURN_IF_ERROR(args_.SetInt("dilation_y", dilation_.y));
- RETURN_IF_ERROR(SetArguments(linked_operations_, &args_));
- return args_.Bind(kernel_.kernel());
+ return args_.SetInt("dilation_y", dilation_.y);
}
int3 ConvConstants::GetGridSize() const {
@@ -262,16 +260,6 @@
return int3(grid_x, grid_y, 1);
}
-absl::Status ConvConstants::Tune(const TuningParameters& params) {
- RETURN_IF_ERROR(BindArguments());
- return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
-}
-
-absl::Status ConvConstants::AddToQueue(CLCommandQueue* queue) {
- RETURN_IF_ERROR(BindArguments());
- return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
-}
-
bool IsConvConstantsSupported(const CLDevice& device,
const OperationDef& definition,
const Convolution2DAttributes& attr) {
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.h
index f3f0025..d434af0 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.h
@@ -35,10 +35,10 @@
class ConvConstants : public GPUOperation {
public:
ConvConstants() = default;
- absl::Status AddToQueue(CLCommandQueue* queue) override;
- absl::Status Tune(const TuningParameters& params) override;
absl::Status Compile(const CreationContext& creation_context) override;
+ absl::Status BindArguments() override;
+ int3 GetGridSize() const override;
// Move only
ConvConstants(ConvConstants&& kernel);
@@ -68,9 +68,6 @@
void RearrangeWeightsData(const tflite::gpu::Tensor<OHWI, S>& weights,
absl::Span<T> dst);
- absl::Status BindArguments();
- int3 GetGridSize() const;
-
int2 kernel_size_;
int2 stride_;
int2 padding_;
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc
index 551f5f3..76ae58a 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc
@@ -184,6 +184,7 @@
definition_.IsBatchSupported() && stride_padding_.x != 1;
std::string code = GenerateConv(*creation_context.device, definition_,
stride_correction, conv_params_, &args_);
+ work_group_size_ = conv_params_.work_group_size;
std::string element_wise_code;
RETURN_IF_ERROR(
MergeOperations(linked_operations_, &args_, &element_wise_code));
@@ -226,8 +227,6 @@
conv_params_.block_size.x);
RETURN_IF_ERROR(args_.SetInt("task_size_x", grid_x));
}
- RETURN_IF_ERROR(SetArguments(linked_operations_, &args_));
- RETURN_IF_ERROR(args_.Bind(kernel_.kernel()));
return absl::OkStatus();
}
@@ -272,19 +271,14 @@
if (conv_params_.work_group_launch_order[0] == 0 &&
conv_params_.work_group_launch_order[1] == 1 &&
conv_params_.work_group_launch_order[2] == 2) {
- RETURN_IF_ERROR(BindArguments());
- return GetBestWorkGroupConv(params, kernel_, GetGridSize(),
- &conv_params_.work_group_size);
+ RETURN_IF_ERROR(args_.Bind(kernel_.kernel()));
+ RETURN_IF_ERROR(GetBestWorkGroupConv(params, kernel_, grid_size_,
+ &conv_params_.work_group_size));
+ work_group_size_ = conv_params_.work_group_size;
}
return absl::OkStatus();
}
-absl::Status ConvPowerVR::AddToQueue(CLCommandQueue* queue) {
- RETURN_IF_ERROR(BindArguments());
- return queue->DispatchImplicit(kernel_, GetGridSize(),
- conv_params_.work_group_size);
-}
-
std::string GenerateConv(const CLDevice& device, const OperationDef& op_def,
bool stride_correction,
const ConvPowerVR::ConvParams& conv_params,
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h
index 07bcf2c..8ef8bc6 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h
@@ -41,9 +41,10 @@
class ConvPowerVR : public GPUOperation {
public:
ConvPowerVR() = default;
- absl::Status AddToQueue(CLCommandQueue* queue) override;
absl::Status Tune(const TuningParameters& params) override;
absl::Status Compile(const CreationContext& creation_context) override;
+ absl::Status BindArguments() override;
+ int3 GetGridSize() const override;
ConvWeightsDescription GetConvWeightsDescription() const {
ConvWeightsDescription desc;
@@ -205,9 +206,6 @@
bool different_weights_for_height,
const BHWC* dst_shape = nullptr) const;
- absl::Status BindArguments();
- int3 GetGridSize() const;
-
int4 stride_padding_;
int4 kernel_dilation_;
ConvParams conv_params_;
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc
index d81c7e8..a31674d 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc
@@ -420,8 +420,6 @@
RETURN_IF_ERROR(args_.SetInt("stride_y", stride_.y));
RETURN_IF_ERROR(args_.SetInt("padding_x", padding_.x * src_[0]->Batch()));
RETURN_IF_ERROR(args_.SetInt("padding_y", padding_.y));
- RETURN_IF_ERROR(SetArguments(linked_operations_, &args_));
- RETURN_IF_ERROR(args_.Bind(kernel_.kernel()));
return absl::OkStatus();
}
@@ -434,14 +432,8 @@
}
absl::Status ConvTexture::Tune(const TuningParameters& params) {
- RETURN_IF_ERROR(BindArguments());
- return GetBestWorkGroupConv(params, kernel_, GetGridSize(),
- &work_group_size_);
-}
-
-absl::Status ConvTexture::AddToQueue(CLCommandQueue* queue) {
- RETURN_IF_ERROR(BindArguments());
- return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
+ RETURN_IF_ERROR(args_.Bind(kernel_.kernel()));
+ return GetBestWorkGroupConv(params, kernel_, grid_size_, &work_group_size_);
}
absl::Status CreateConvTexture(const CreationContext& creation_context,
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.h
index c21d5b1..80a328e 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.h
@@ -42,10 +42,10 @@
class ConvTexture : public GPUOperation {
public:
ConvTexture() = default;
- absl::Status AddToQueue(CLCommandQueue* queue) override;
absl::Status Tune(const TuningParameters& params) override;
-
absl::Status Compile(const CreationContext& creation_context) override;
+ absl::Status BindArguments() override;
+ int3 GetGridSize() const override;
// Move only
ConvTexture(ConvTexture&& operation);
@@ -89,9 +89,6 @@
absl::Span<T> dst_0, absl::Span<T> dst_1,
absl::Span<T> dst_2, absl::Span<T> dst_3);
- absl::Status BindArguments();
- int3 GetGridSize() const;
-
int2 kernel_size_;
int2 stride_;
int2 padding_;
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.cc
index 063b20e..ce97311 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.cc
@@ -127,9 +127,7 @@
RETURN_IF_ERROR(args_.SetFloat("mask_x", mask.x));
RETURN_IF_ERROR(args_.SetFloat("mask_y", mask.y));
RETURN_IF_ERROR(args_.SetFloat("mask_z", mask.z));
- RETURN_IF_ERROR(args_.SetFloat("mask_w", mask.w));
- RETURN_IF_ERROR(SetArguments(linked_operations_, &args_));
- return args_.Bind(kernel_.kernel());
+ return args_.SetFloat("mask_w", mask.w);
}
int3 ConverterToConvWeights::GetGridSize() const {
@@ -140,16 +138,6 @@
return int3(grid_x, grid_y, grid_z);
}
-absl::Status ConverterToConvWeights::Tune(const TuningParameters& params) {
- RETURN_IF_ERROR(BindArguments());
- return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
-}
-
-absl::Status ConverterToConvWeights::AddToQueue(CLCommandQueue* queue) {
- RETURN_IF_ERROR(BindArguments());
- return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
-}
-
ConverterToConvWeights CreateConverterToConvWeights(
const OperationDef& definition,
const ConvWeightsDescription& conv_weights_desc) {
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.h
index 3bf17fa..d8d84b8 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.h
@@ -32,10 +32,9 @@
ConverterToConvWeights(const OperationDef& definition,
const ConvWeightsDescription& conv_weights_desc)
: GPUOperation(definition), conv_weights_desc_(conv_weights_desc) {}
- absl::Status AddToQueue(CLCommandQueue* queue) override;
- absl::Status Tune(const TuningParameters& params) override;
-
absl::Status Compile(const CreationContext& creation_context) override;
+ absl::Status BindArguments() override;
+ int3 GetGridSize() const override;
// Move only
ConverterToConvWeights(ConverterToConvWeights&& operation);
@@ -44,9 +43,6 @@
ConverterToConvWeights& operator=(const ConverterToConvWeights&) = delete;
private:
- absl::Status BindArguments();
- int3 GetGridSize() const;
-
ConvWeightsDescription conv_weights_desc_;
};
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc
index 85456fc..dc146c4 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc
@@ -362,9 +362,7 @@
RETURN_IF_ERROR(args_.SetInt("padding_x", padding_.x));
RETURN_IF_ERROR(args_.SetInt("padding_y", padding_.y));
RETURN_IF_ERROR(args_.SetInt("kernel_size_x", kernel_size_.x));
- RETURN_IF_ERROR(args_.SetInt("kernel_size_y", kernel_size_.y));
- RETURN_IF_ERROR(SetArguments(linked_operations_, &args_));
- return args_.Bind(kernel_.kernel());
+ return args_.SetInt("kernel_size_y", kernel_size_.y);
}
int3 ConvolutionTransposed::GetGridSize() const {
@@ -377,14 +375,8 @@
}
absl::Status ConvolutionTransposed::Tune(const TuningParameters& params) {
- RETURN_IF_ERROR(BindArguments());
- return GetBestWorkGroupConv(params, kernel_, GetGridSize(),
- &work_group_size_);
-}
-
-absl::Status ConvolutionTransposed::AddToQueue(CLCommandQueue* queue) {
- RETURN_IF_ERROR(BindArguments());
- return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
+ RETURN_IF_ERROR(args_.Bind(kernel_.kernel()));
+ return GetBestWorkGroupConv(params, kernel_, grid_size_, &work_group_size_);
}
absl::Status CreateConvolutionTransposed(
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.h
index cf70799..fc53884 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.h
@@ -38,10 +38,10 @@
class ConvolutionTransposed : public GPUOperation {
public:
ConvolutionTransposed() = default;
- absl::Status AddToQueue(CLCommandQueue* queue) override;
absl::Status Tune(const TuningParameters& params) override;
-
absl::Status Compile(const CreationContext& creation_context) override;
+ absl::Status BindArguments() override;
+ int3 GetGridSize() const override;
// Move only
ConvolutionTransposed(ConvolutionTransposed&& operation);
@@ -65,9 +65,6 @@
void RearrangeWeightsData(const tflite::gpu::Tensor<OHWI, S>& weights,
absl::Span<T> dst);
- absl::Status BindArguments();
- int3 GetGridSize() const;
-
bool weights_are_buffer_;
int2 kernel_size_;
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.cc
index 53f24cb..409f7e3 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.cc
@@ -399,10 +399,8 @@
RETURN_IF_ERROR(args_.SetInt("kernel_size_x", kernel_size_.x));
RETURN_IF_ERROR(args_.SetInt("kernel_size_y", kernel_size_.y));
RETURN_IF_ERROR(args_.SetInt("kernel_size_z", kernel_size_.z));
- RETURN_IF_ERROR(args_.SetInt(
- "grid_size_s", DivideRoundUp(dst_[0]->Slices(), block_size_.w)));
- RETURN_IF_ERROR(SetArguments(linked_operations_, &args_));
- return args_.Bind(kernel_.kernel());
+ return args_.SetInt("grid_size_s",
+ DivideRoundUp(dst_[0]->Slices(), block_size_.w));
}
int3 ConvolutionTransposed3D::GetGridSize() const {
@@ -417,14 +415,8 @@
}
absl::Status ConvolutionTransposed3D::Tune(const TuningParameters& params) {
- RETURN_IF_ERROR(BindArguments());
- return GetBestWorkGroupConv(params, kernel_, GetGridSize(),
- &work_group_size_);
-}
-
-absl::Status ConvolutionTransposed3D::AddToQueue(CLCommandQueue* queue) {
- RETURN_IF_ERROR(BindArguments());
- return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
+ RETURN_IF_ERROR(args_.Bind(kernel_.kernel()));
+ return GetBestWorkGroupConv(params, kernel_, grid_size_, &work_group_size_);
}
absl::Status CreateConvolutionTransposed3D(
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.h
index 4b76e61..09f7e70 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.h
@@ -38,10 +38,10 @@
class ConvolutionTransposed3D : public GPUOperation {
public:
ConvolutionTransposed3D() = default;
- absl::Status AddToQueue(CLCommandQueue* queue) override;
absl::Status Tune(const TuningParameters& params) override;
-
absl::Status Compile(const CreationContext& creation_context) override;
+ absl::Status BindArguments() override;
+ int3 GetGridSize() const override;
// Move only
ConvolutionTransposed3D(ConvolutionTransposed3D&& operation);
@@ -65,9 +65,6 @@
void RearrangeWeightsData(const tflite::gpu::Tensor<OHWDI, S>& weights,
absl::Span<T> dst);
- absl::Status BindArguments();
- int3 GetGridSize() const;
-
bool weights_are_buffer_;
int3 kernel_size_;
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.cc
index 0da4ca6..9446f0f 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.cc
@@ -333,9 +333,7 @@
const int padding_y =
padding_.y >= 1 ? (padding_.y - 1) / 2 : (padding_.y - 2) / 2;
RETURN_IF_ERROR(args_.SetInt("padding_x", padding_x * src_[0]->Batch()));
- RETURN_IF_ERROR(args_.SetInt("padding_y", padding_y));
- RETURN_IF_ERROR(SetArguments(linked_operations_, &args_));
- return args_.Bind(kernel_.kernel());
+ return args_.SetInt("padding_y", padding_y);
}
int3 ConvolutionTransposed3x3::GetGridSize() const {
@@ -349,12 +347,6 @@
return int3(wg[work_group_launch_order_[0]] * work_group_size_.x,
wg[work_group_launch_order_[1]] * work_group_size_.y,
wg[work_group_launch_order_[2]] * work_group_size_.z);
- return int3(grid_x, grid_y, grid_z);
-}
-
-absl::Status ConvolutionTransposed3x3::AddToQueue(CLCommandQueue* queue) {
- RETURN_IF_ERROR(BindArguments());
- return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
}
bool IsConvolutionTransposed3x3Supported(
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.h
index 3792acd..0dc42a7 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.h
@@ -37,8 +37,12 @@
class ConvolutionTransposed3x3 : public GPUOperation {
public:
ConvolutionTransposed3x3() = default;
- absl::Status AddToQueue(CLCommandQueue* queue) override;
+ absl::Status Tune(const TuningParameters& params) override {
+ return absl::OkStatus();
+ }
absl::Status Compile(const CreationContext& creation_context) override;
+ absl::Status BindArguments() override;
+ int3 GetGridSize() const override;
// Move only
ConvolutionTransposed3x3(ConvolutionTransposed3x3&& operation);
@@ -68,9 +72,6 @@
void RearrangeWeightsData(const tflite::gpu::Tensor<OHWI, S>& weights,
absl::Span<T> dst);
- absl::Status BindArguments();
- int3 GetGridSize() const;
-
int2 padding_;
int3 work_group_launch_order_;
WeightsUploadType weights_upload_type_;
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.cc
index 934c719..56a21cb 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.cc
@@ -207,9 +207,7 @@
absl::Status ConvolutionTransposed3x3Thin::BindArguments() {
RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0]));
- RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0]));
- RETURN_IF_ERROR(SetArguments(linked_operations_, &args_));
- return args_.Bind(kernel_.kernel());
+ return args_.SetObjectRef("dst_tensor", dst_[0]);
}
int3 ConvolutionTransposed3x3Thin::GetGridSize() const {
@@ -219,17 +217,6 @@
return int3(grid_x, grid_y, grid_z);
}
-absl::Status ConvolutionTransposed3x3Thin::Tune(
- const TuningParameters& params) {
- RETURN_IF_ERROR(BindArguments());
- return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
-}
-
-absl::Status ConvolutionTransposed3x3Thin::AddToQueue(CLCommandQueue* queue) {
- RETURN_IF_ERROR(BindArguments());
- return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
-}
-
bool IsConvolutionTransposed3x3ThinSupported(
const CLDevice& device, const ConvolutionTransposedAttributes& attr) {
return attr.weights.shape.o <= 8 && attr.weights.shape.w == 3 &&
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.h
index 2e27283..282f1b3 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.h
@@ -37,10 +37,9 @@
class ConvolutionTransposed3x3Thin : public GPUOperation {
public:
ConvolutionTransposed3x3Thin() = default;
- absl::Status AddToQueue(CLCommandQueue* queue) override;
- absl::Status Tune(const TuningParameters& params) override;
-
absl::Status Compile(const CreationContext& creation_context) override;
+ absl::Status BindArguments() override;
+ int3 GetGridSize() const override;
// Move only
ConvolutionTransposed3x3Thin(ConvolutionTransposed3x3Thin&& operation);
@@ -67,9 +66,6 @@
void RearrangeWeightsData(const tflite::gpu::Tensor<OHWI, S>& weights,
absl::Span<T> dst);
- absl::Status BindArguments();
- int3 GetGridSize() const;
-
int src_channels_;
int dst_channels_;
};
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.cc
index 6c81457..d7660fc 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.cc
@@ -318,9 +318,7 @@
absl::Status ConvolutionTransposed4x4::BindArguments() {
RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0]));
RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0]));
- RETURN_IF_ERROR(args_.SetInt("filter_offset", 4 * 16 * src_[0]->Slices()));
- RETURN_IF_ERROR(SetArguments(linked_operations_, &args_));
- return args_.Bind(kernel_.kernel());
+ return args_.SetInt("filter_offset", 4 * 16 * src_[0]->Slices());
}
int3 ConvolutionTransposed4x4::GetGridSize() const {
@@ -330,11 +328,6 @@
return int3(grid_x, grid_y, grid_z);
}
-absl::Status ConvolutionTransposed4x4::AddToQueue(CLCommandQueue* queue) {
- RETURN_IF_ERROR(BindArguments());
- return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
-}
-
bool IsConvolutionTransposed4x4Supported(
const CLDevice& device, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr) {
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.h
index 1cf3b83..9829374 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.h
@@ -37,8 +37,12 @@
class ConvolutionTransposed4x4 : public GPUOperation {
public:
ConvolutionTransposed4x4() = default;
- absl::Status AddToQueue(CLCommandQueue* queue) override;
+ absl::Status Tune(const TuningParameters& params) override {
+ return absl::OkStatus();
+ }
absl::Status Compile(const CreationContext& creation_context) override;
+ absl::Status BindArguments() override;
+ int3 GetGridSize() const override;
// Move only
ConvolutionTransposed4x4(ConvolutionTransposed4x4&& operation);
@@ -68,9 +72,6 @@
void RearrangeWeightsData(const tflite::gpu::Tensor<OHWI, S>& weights,
absl::Span<T> dst);
- absl::Status BindArguments();
- int3 GetGridSize() const;
-
WeightsUploadType weights_upload_type_;
};
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.cc
index 90b1a4c..5b31c98 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.cc
@@ -183,9 +183,7 @@
absl::Status ConvolutionTransposedThin::BindArguments() {
RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0]));
- RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0]));
- RETURN_IF_ERROR(SetArguments(linked_operations_, &args_));
- return args_.Bind(kernel_.kernel());
+ return args_.SetObjectRef("dst_tensor", dst_[0]);
}
int3 ConvolutionTransposedThin::GetGridSize() const {
@@ -195,16 +193,6 @@
return int3(grid_x, grid_y, grid_z);
}
-absl::Status ConvolutionTransposedThin::Tune(const TuningParameters& params) {
- RETURN_IF_ERROR(BindArguments());
- return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
-}
-
-absl::Status ConvolutionTransposedThin::AddToQueue(CLCommandQueue* queue) {
- RETURN_IF_ERROR(BindArguments());
- return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
-}
-
bool IsConvolutionTransposedThinSupported(
const CLDevice& device, const ConvolutionTransposedAttributes& attr) {
return attr.weights.shape.o <= 4 && attr.weights.shape.w == attr.stride.w &&
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.h
index bb06202..90a1b02 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.h
@@ -37,10 +37,9 @@
class ConvolutionTransposedThin : public GPUOperation {
public:
ConvolutionTransposedThin() = default;
- absl::Status AddToQueue(CLCommandQueue* queue) override;
- absl::Status Tune(const TuningParameters& params) override;
-
absl::Status Compile(const CreationContext& creation_context) override;
+ absl::Status BindArguments() override;
+ int3 GetGridSize() const override;
// Move only
ConvolutionTransposedThin(ConvolutionTransposedThin&& operation);
@@ -65,9 +64,6 @@
void RearrangeWeightsData(const tflite::gpu::Tensor<OHWI, S>& weights,
absl::Span<T> dst);
- absl::Status BindArguments();
- int3 GetGridSize() const;
-
int2 kernel_size_;
int src_channels_;
int dst_channels_;
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.cc b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.cc
index 82658d6..7d6bee68 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.cc
@@ -306,8 +306,7 @@
if (!IsSpecializedCase(channel_multiplier_)) {
RETURN_IF_ERROR(args_.SetInt("ch_multiplier", channel_multiplier_));
}
- RETURN_IF_ERROR(SetArguments(linked_operations_, &args_));
- return args_.Bind(kernel_.kernel());
+ return absl::OkStatus();
}
int3 DepthwiseConvolution::GetGridSize() const {
@@ -317,16 +316,6 @@
return int3(grid_x, grid_y, grid_z);
}
-absl::Status DepthwiseConvolution::Tune(const TuningParameters& params) {
- RETURN_IF_ERROR(BindArguments());
- return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
-}
-
-absl::Status DepthwiseConvolution::AddToQueue(CLCommandQueue* queue) {
- RETURN_IF_ERROR(BindArguments());
- return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
-}
-
absl::Status CreateDepthwiseConvolution(
const CreationContext& creation_context, const OperationDef& definition,
const DepthwiseConvolution2DAttributes& attr,
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.h b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.h
index 6433e8d..51cf68a 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.h
@@ -38,10 +38,9 @@
class DepthwiseConvolution : public GPUOperation {
public:
DepthwiseConvolution() = default;
- absl::Status AddToQueue(CLCommandQueue* queue) override;
- absl::Status Tune(const TuningParameters& params) override;
-
absl::Status Compile(const CreationContext& creation_context) override;
+ absl::Status BindArguments() override;
+ int3 GetGridSize() const override;
// Move only
DepthwiseConvolution(DepthwiseConvolution&& operation);
@@ -81,9 +80,6 @@
void RearrangeWeightsData(const tflite::gpu::Tensor<OHWDI, S>& weights,
absl::Span<T> dst);
- absl::Status BindArguments();
- int3 GetGridSize() const;
-
bool weights_are_buffer_;
int4 kernel_size_;
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc
index 0494038..97afea4 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc
@@ -303,9 +303,7 @@
absl::Status DepthwiseConv3x3::BindArguments() {
RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0]));
- RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0]));
- RETURN_IF_ERROR(SetArguments(linked_operations_, &args_));
- return args_.Bind(kernel_.kernel());
+ return args_.SetObjectRef("dst_tensor", dst_[0]);
}
int3 DepthwiseConv3x3::GetGridSize() const {
@@ -319,15 +317,10 @@
if (local_mem_uploads_) {
return absl::OkStatus();
}
- RETURN_IF_ERROR(BindArguments());
+ RETURN_IF_ERROR(args_.Bind(kernel_.kernel()));
return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
}
-absl::Status DepthwiseConv3x3::AddToQueue(CLCommandQueue* queue) {
- RETURN_IF_ERROR(BindArguments());
- return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
-}
-
bool IsDepthwiseConv3x3Supported(const DepthwiseConvolution2DAttributes& attr) {
return attr.weights.shape.o == 1 && attr.dilations.w == 1 &&
attr.dilations.h == 1 && attr.weights.shape.w == 3 &&
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.h b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.h
index fd1dca4..ce5b2d8 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.h
@@ -38,10 +38,10 @@
class DepthwiseConv3x3 : public GPUOperation {
public:
DepthwiseConv3x3() = default;
- absl::Status AddToQueue(CLCommandQueue* queue) override;
absl::Status Tune(const TuningParameters& params) override;
-
absl::Status Compile(const CreationContext& creation_context) override;
+ absl::Status BindArguments() override;
+ int3 GetGridSize() const override;
// Move only
DepthwiseConv3x3(DepthwiseConv3x3&& operation);
@@ -66,9 +66,6 @@
const tflite::gpu::Tensor<OHWI, S>& weights,
const tflite::gpu::Tensor<Linear, S>& biases, absl::Span<T> dst);
- absl::Status BindArguments();
- int3 GetGridSize() const;
-
bool weights_are_buffer_;
bool local_mem_uploads_;
};
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.cc b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.cc
index 1685d4f..944af0a 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.cc
@@ -129,13 +129,13 @@
return absl::OkStatus();
}
-absl::Status FullyConnected::AddToQueue(CLCommandQueue* queue) {
+absl::Status FullyConnected::BindArguments() {
RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0]));
- RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0]));
- RETURN_IF_ERROR(SetArguments(linked_operations_, &args_));
- RETURN_IF_ERROR(args_.Bind(kernel_.kernel()));
- return queue->DispatchImplicit(kernel_, {dst_[0]->Slices(), 1, 1},
- work_group_size_);
+ return args_.SetObjectRef("dst_tensor", dst_[0]);
+}
+
+int3 FullyConnected::GetGridSize() const {
+ return int3(dst_[0]->Slices(), 1, 1);
}
absl::Status CreateFullyConnected(const CreationContext& creation_context,
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.h b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.h
index 2adff4f..138db00 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.h
@@ -37,8 +37,11 @@
class FullyConnected : public GPUOperation {
public:
FullyConnected() = default;
- absl::Status AddToQueue(CLCommandQueue* queue) override;
-
+ absl::Status Tune(const TuningParameters& params) override {
+ return absl::OkStatus();
+ }
+ absl::Status BindArguments() override;
+ int3 GetGridSize() const override;
absl::Status Compile(const CreationContext& creation_context) override;
// Move only
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc
index 2310ee5..d0d1f88 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc
@@ -125,6 +125,7 @@
args_(std::move(operation.args_)),
kernel_(std::move(operation.kernel_)),
work_group_size_(operation.work_group_size_),
+ grid_size_(operation.grid_size_),
linked_operations_(std::move(operation.linked_operations_)) {}
GPUOperation& GPUOperation::operator=(GPUOperation&& operation) {
@@ -135,6 +136,7 @@
args_ = std::move(operation.args_);
kernel_ = std::move(operation.kernel_);
std::swap(work_group_size_, operation.work_group_size_);
+ std::swap(grid_size_, operation.grid_size_);
linked_operations_ = std::move(operation.linked_operations_);
}
return *this;
@@ -162,10 +164,7 @@
absl::Status ElementwiseOperation::BindArguments() {
RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0]));
RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0]));
- RETURN_IF_ERROR(SetArgs("", &args_));
- RETURN_IF_ERROR(SetArguments(linked_operations_, &args_));
- RETURN_IF_ERROR(args_.Bind(kernel_.kernel()));
- return absl::OkStatus();
+ return SetArgs("", &args_);
}
int3 ElementwiseOperation::GetGridSize() const {
@@ -192,16 +191,6 @@
*creation_context.device, &kernel_);
}
-absl::Status ElementwiseOperation::AddToQueue(CLCommandQueue* queue) {
- RETURN_IF_ERROR(BindArguments());
- return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
-}
-
-absl::Status ElementwiseOperation::Tune(const TuningParameters& params) {
- RETURN_IF_ERROR(BindArguments());
- return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
-}
-
absl::Status MergeOperations(
const std::vector<ElementwiseOperation*>& linked_ops,
Arguments* merged_args, std::string* merged_code) {
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h
index 34d6d8c..88d0ff0 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h
@@ -24,6 +24,7 @@
#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
#include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
#include "tensorflow/lite/delegates/gpu/cl/kernels/tuning_parameters.h"
+#include "tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.h"
#include "tensorflow/lite/delegates/gpu/cl/precision.h"
#include "tensorflow/lite/delegates/gpu/cl/program_cache.h"
#include "tensorflow/lite/delegates/gpu/cl/tensor.h"
@@ -59,6 +60,9 @@
class ElementwiseOperation;
+absl::Status SetArguments(const std::vector<ElementwiseOperation*>& linked_ops,
+ Arguments* args);
+
// GPUOperation represents some implementation of neural network operation on
// GPU. GPUOperation can contain ElementwiseOperation operations, in this case,
// ElementwiseOperation still hold necessary data and should be alive.
@@ -86,11 +90,22 @@
void SetSrc(Tensor* ptr, int index = 0);
void SetDst(Tensor* ptr, int index = 0);
- virtual absl::Status AddToQueue(CLCommandQueue* queue) {
+ // should be called after changes of inputs/outputs.
+ absl::Status UpdateParams() {
+ RETURN_IF_ERROR(BindArguments());
+ RETURN_IF_ERROR(SetArguments(linked_operations_, &args_));
+ grid_size_ = GetGridSize();
return absl::OkStatus();
}
+
+ absl::Status AddToQueue(CLCommandQueue* queue) {
+ RETURN_IF_ERROR(args_.Bind(kernel_.kernel()));
+ return queue->DispatchImplicit(kernel_, grid_size_, work_group_size_);
+ }
+
virtual absl::Status Tune(const TuningParameters& params) {
- return absl::OkStatus();
+ RETURN_IF_ERROR(args_.Bind(kernel_.kernel()));
+ return GetBestWorkGroup(params, kernel_, grid_size_, &work_group_size_);
}
virtual absl::Status Compile(const CreationContext& creation_context) {
@@ -100,6 +115,9 @@
const OperationDef& GetDefinition() const { return definition_; }
protected:
+ virtual absl::Status BindArguments() = 0;
+ virtual int3 GetGridSize() const = 0;
+
// Defines operation calculation precision and format of src/dst tensors.
OperationDef definition_;
std::vector<Tensor*> src_;
@@ -107,6 +125,7 @@
Arguments args_;
CLKernel kernel_;
int3 work_group_size_ = int3(8, 4, 1);
+ int3 grid_size_ = int3(0, 0, 0);
std::vector<ElementwiseOperation*> linked_operations_;
};
@@ -124,10 +143,10 @@
: GPUOperation(definition) {}
virtual ~ElementwiseOperation() {}
- absl::Status AddToQueue(CLCommandQueue* queue) override;
- absl::Status Tune(const TuningParameters& params) override;
absl::Status Compile(const CreationContext& creation_context) override;
+ absl::Status BindArguments() override;
+ int3 GetGridSize() const override;
// Move only
ElementwiseOperation(ElementwiseOperation&& operation);
@@ -149,17 +168,12 @@
protected:
bool check_src_channels_size_ = false;
std::string code_;
- absl::Status BindArguments();
- int3 GetGridSize() const;
};
absl::Status MergeOperations(
const std::vector<ElementwiseOperation*>& linked_ops,
Arguments* merged_args, std::string* merged_code);
-absl::Status SetArguments(const std::vector<ElementwiseOperation*>& linked_ops,
- Arguments* args);
-
} // namespace cl
} // namespace gpu
} // namespace tflite
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/lstm.cc b/tensorflow/lite/delegates/gpu/cl/kernels/lstm.cc
index 66d6b3d..ab61fcb 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/lstm.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/lstm.cc
@@ -125,7 +125,7 @@
RETURN_IF_ERROR(args_.SetObjectRef("prev_state", src_[1]));
RETURN_IF_ERROR(args_.SetObjectRef("new_state", dst_[0]));
RETURN_IF_ERROR(args_.SetObjectRef("activation", dst_[1]));
- return args_.Bind(kernel_.kernel());
+ return absl::OkStatus();
}
int3 LSTM::GetGridSize() const {
@@ -135,16 +135,6 @@
return int3(grid_x, grid_y, grid_z);
}
-absl::Status LSTM::Tune(const TuningParameters& params) {
- RETURN_IF_ERROR(BindArguments());
- return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
-}
-
-absl::Status LSTM::AddToQueue(CLCommandQueue* queue) {
- RETURN_IF_ERROR(BindArguments());
- return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
-}
-
LSTM CreateLSTM(const OperationDef& definition) { return LSTM(definition); }
} // namespace cl
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/lstm.h b/tensorflow/lite/delegates/gpu/cl/kernels/lstm.h
index 5310e19..6490f39 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/lstm.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/lstm.h
@@ -28,8 +28,8 @@
class LSTM : public GPUOperation {
public:
explicit LSTM(const OperationDef& definition);
- absl::Status AddToQueue(CLCommandQueue* queue) override;
- absl::Status Tune(const TuningParameters& params) override;
+ absl::Status BindArguments() override;
+ int3 GetGridSize() const override;
absl::Status Compile(const CreationContext& creation_context) override;
// Move only
@@ -37,10 +37,6 @@
LSTM& operator=(LSTM&& kernel);
LSTM(const LSTM&) = delete;
LSTM& operator=(const LSTM&) = delete;
-
- private:
- absl::Status BindArguments();
- int3 GetGridSize() const;
};
LSTM CreateLSTM(const OperationDef& definition);
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.cc b/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.cc
index 58ace72..bef4c26 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.cc
@@ -205,8 +205,7 @@
RETURN_IF_ERROR(args_.SetInt("padding_z", padding_.z));
RETURN_IF_ERROR(args_.SetInt("kernel_size_z", kernel_size_.z));
}
- RETURN_IF_ERROR(SetArguments(linked_operations_, &args_));
- return args_.Bind(kernel_.kernel());
+ return absl::OkStatus();
}
int3 MaxUnpooling::GetGridSize() const {
@@ -216,16 +215,6 @@
return int3(grid_x, grid_y, grid_z);
}
-absl::Status MaxUnpooling::Tune(const TuningParameters& params) {
- RETURN_IF_ERROR(BindArguments());
- return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
-}
-
-absl::Status MaxUnpooling::AddToQueue(CLCommandQueue* queue) {
- RETURN_IF_ERROR(BindArguments());
- return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
-}
-
MaxUnpooling CreateMaxUnpooling(const OperationDef& definition,
const MaxUnpooling2DAttributes& attr) {
return MaxUnpooling(definition, attr);
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.h b/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.h
index dae35e9..38f47df 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.h
@@ -31,9 +31,9 @@
const MaxUnpooling2DAttributes& attr);
MaxUnpooling(const OperationDef& definition,
const MaxUnpooling3DAttributes& attr);
- absl::Status AddToQueue(CLCommandQueue* queue) override;
- absl::Status Tune(const TuningParameters& params) override;
+ absl::Status BindArguments() override;
+ int3 GetGridSize() const override;
absl::Status Compile(const CreationContext& creation_context) override;
// Move only
@@ -43,9 +43,6 @@
MaxUnpooling& operator=(const MaxUnpooling&) = delete;
private:
- absl::Status BindArguments();
- int3 GetGridSize() const;
-
int4 stride_;
int4 padding_;
int4 kernel_size_;
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/mean.cc b/tensorflow/lite/delegates/gpu/cl/kernels/mean.cc
index 334181b..e3fa023 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/mean.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/mean.cc
@@ -129,8 +129,7 @@
const double size_1 = total_size / size_0;
RETURN_IF_ERROR(args_.SetFloat("inv_multiplier_1", 1.0 / size_1));
RETURN_IF_ERROR(args_.SetFloat("inv_multiplier_2", 1.0 / size_0));
- RETURN_IF_ERROR(SetArguments(linked_operations_, &args_));
- return args_.Bind(kernel_.kernel());
+ return absl::OkStatus();
}
int3 Mean::GetGridSize() const {
@@ -140,11 +139,6 @@
return int3(grid_x, grid_y, grid_z);
}
-absl::Status Mean::AddToQueue(CLCommandQueue* queue) {
- RETURN_IF_ERROR(BindArguments());
- return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
-}
-
Mean CreateMean(const OperationDef& definition) { return Mean(definition); }
} // namespace cl
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/mean.h b/tensorflow/lite/delegates/gpu/cl/kernels/mean.h
index 028e001..0552f16 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/mean.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/mean.h
@@ -30,8 +30,12 @@
public:
Mean() = default;
explicit Mean(const OperationDef& definition) : GPUOperation(definition) {}
- absl::Status AddToQueue(CLCommandQueue* queue) override;
+ absl::Status Tune(const TuningParameters& params) override {
+ return absl::OkStatus();
+ }
+ absl::Status BindArguments() override;
+ int3 GetGridSize() const override;
absl::Status Compile(const CreationContext& creation_context) override;
// Move only
@@ -39,10 +43,6 @@
Mean& operator=(Mean&& operation);
Mean(const Mean&) = delete;
Mean& operator=(const Mean&) = delete;
-
- private:
- absl::Status BindArguments();
- int3 GetGridSize() const;
};
Mean CreateMean(const OperationDef& definition);
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/padding.cc b/tensorflow/lite/delegates/gpu/cl/kernels/padding.cc
index 8576475..ebd2809 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/padding.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/padding.cc
@@ -175,8 +175,7 @@
RETURN_IF_ERROR(args_.SetInt("prepended_y", attributes_.prepended.h));
RETURN_IF_ERROR(args_.SetInt("prepended_z", attributes_.prepended.c));
RETURN_IF_ERROR(args_.SetInt("prepended_w", attributes_.prepended.b));
- RETURN_IF_ERROR(SetArguments(linked_operations_, &args_));
- return args_.Bind(kernel_.kernel());
+ return absl::OkStatus();
}
int3 Padding::GetGridSize() const {
@@ -186,16 +185,6 @@
return int3(grid_x, grid_y, grid_z);
}
-absl::Status Padding::Tune(const TuningParameters& params) {
- RETURN_IF_ERROR(BindArguments());
- return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
-}
-
-absl::Status Padding::AddToQueue(CLCommandQueue* queue) {
- RETURN_IF_ERROR(BindArguments());
- return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
-}
-
Padding CreatePadding(const OperationDef& definition,
const PadAttributes& attr) {
return Padding(definition, attr);
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/padding.h b/tensorflow/lite/delegates/gpu/cl/kernels/padding.h
index d87a3a8..12a83a4 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/padding.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/padding.h
@@ -28,9 +28,9 @@
class Padding : public GPUOperation {
public:
Padding(const OperationDef& definition, const PadAttributes& attr);
- absl::Status AddToQueue(CLCommandQueue* queue) override;
- absl::Status Tune(const TuningParameters& params) override;
+ absl::Status BindArguments() override;
+ int3 GetGridSize() const override;
absl::Status Compile(const CreationContext& creation_context) override;
// Move only
@@ -40,9 +40,6 @@
Padding& operator=(const Padding&) = delete;
private:
- absl::Status BindArguments();
- int3 GetGridSize() const;
-
PadAttributes attributes_;
};
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/pooling.cc b/tensorflow/lite/delegates/gpu/cl/kernels/pooling.cc
index 966c655..6ba49e3 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/pooling.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/pooling.cc
@@ -399,8 +399,7 @@
if (output_indices_) {
RETURN_IF_ERROR(args_.SetObjectRef("dst_indices", dst_[1]));
}
- RETURN_IF_ERROR(SetArguments(linked_operations_, &args_));
- return args_.Bind(kernel_.kernel());
+ return absl::OkStatus();
}
int3 Pooling::GetGridSize() const {
@@ -410,16 +409,6 @@
return int3(grid_x, grid_y, grid_z);
}
-absl::Status Pooling::Tune(const TuningParameters& params) {
- RETURN_IF_ERROR(BindArguments());
- return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
-}
-
-absl::Status Pooling::AddToQueue(CLCommandQueue* queue) {
- RETURN_IF_ERROR(BindArguments());
- return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
-}
-
Pooling CreatePooling(const OperationDef& definition,
const Pooling2DAttributes& attr) {
return Pooling(definition, attr);
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/pooling.h b/tensorflow/lite/delegates/gpu/cl/kernels/pooling.h
index 67d290e..c0199d6 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/pooling.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/pooling.h
@@ -31,9 +31,9 @@
public:
Pooling(const OperationDef& definition, const Pooling2DAttributes& attr);
Pooling(const OperationDef& definition, const Pooling3DAttributes& attr);
- absl::Status AddToQueue(CLCommandQueue* queue) override;
- absl::Status Tune(const TuningParameters& params) override;
+ absl::Status BindArguments() override;
+ int3 GetGridSize() const override;
absl::Status Compile(const CreationContext& creation_context) override;
// Move only
@@ -43,9 +43,6 @@
Pooling& operator=(const Pooling&) = delete;
private:
- absl::Status BindArguments();
- int3 GetGridSize() const;
-
int4 stride_;
int4 padding_;
int4 kernel_size_;
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/reshape.cc b/tensorflow/lite/delegates/gpu/cl/kernels/reshape.cc
index 4cc5b12..a2e1092 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/reshape.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/reshape.cc
@@ -114,8 +114,7 @@
absl::Status Reshape::BindArguments() {
RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0]));
RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0]));
- RETURN_IF_ERROR(SetArguments(linked_operations_, &args_));
- return args_.Bind(kernel_.kernel());
+ return absl::OkStatus();
}
int3 Reshape::GetGridSize() const {
@@ -125,16 +124,6 @@
return int3(grid_x, grid_y, grid_z);
}
-absl::Status Reshape::Tune(const TuningParameters& params) {
- RETURN_IF_ERROR(BindArguments());
- return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
-}
-
-absl::Status Reshape::AddToQueue(CLCommandQueue* queue) {
- RETURN_IF_ERROR(BindArguments());
- return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
-}
-
Reshape CreateReshape(const OperationDef& definition) {
return Reshape(definition);
}
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/reshape.h b/tensorflow/lite/delegates/gpu/cl/kernels/reshape.h
index 8d95bbc..571a225 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/reshape.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/reshape.h
@@ -28,9 +28,9 @@
class Reshape : public GPUOperation {
public:
explicit Reshape(const OperationDef& definition) : GPUOperation(definition) {}
- absl::Status AddToQueue(CLCommandQueue* queue) override;
- absl::Status Tune(const TuningParameters& params) override;
+ absl::Status BindArguments() override;
+ int3 GetGridSize() const override;
absl::Status Compile(const CreationContext& creation_context) override;
// Move only
@@ -38,10 +38,6 @@
Reshape& operator=(Reshape&& operation);
Reshape(const Reshape&) = delete;
Reshape& operator=(const Reshape&) = delete;
-
- private:
- absl::Status BindArguments();
- int3 GetGridSize() const;
};
Reshape CreateReshape(const OperationDef& definition);
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.cc b/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.cc
index e4c47b7..1036dd8 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.cc
@@ -99,8 +99,7 @@
absl::Status Reshapex4::BindArguments() {
RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0]));
RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0]));
- RETURN_IF_ERROR(SetArguments(linked_operations_, &args_));
- return args_.Bind(kernel_.kernel());
+ return absl::OkStatus();
}
int3 Reshapex4::GetGridSize() const {
@@ -110,16 +109,6 @@
return int3(grid_x, grid_y, grid_z);
}
-absl::Status Reshapex4::Tune(const TuningParameters& params) {
- RETURN_IF_ERROR(BindArguments());
- return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
-}
-
-absl::Status Reshapex4::AddToQueue(CLCommandQueue* queue) {
- RETURN_IF_ERROR(BindArguments());
- return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
-}
-
Reshapex4 CreateReshapex4(const OperationDef& definition) {
return Reshapex4(definition);
}
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.h b/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.h
index f7c98ab..040b5b8 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.h
@@ -30,9 +30,9 @@
public:
explicit Reshapex4(const OperationDef& definition)
: GPUOperation(definition) {}
- absl::Status AddToQueue(CLCommandQueue* queue) override;
- absl::Status Tune(const TuningParameters& params) override;
+ absl::Status BindArguments() override;
+ int3 GetGridSize() const override;
absl::Status Compile(const CreationContext& creation_context) override;
// Move only
@@ -40,10 +40,6 @@
Reshapex4& operator=(Reshapex4&& operation);
Reshapex4(const Reshapex4&) = delete;
Reshapex4& operator=(const Reshapex4&) = delete;
-
- private:
- absl::Status BindArguments();
- int3 GetGridSize() const;
};
// More optimized, but require src_channels % 4 == 0 and dst_channels % 4 == 0
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/resize.cc b/tensorflow/lite/delegates/gpu/cl/kernels/resize.cc
index a47fff9..33bb3b8 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/resize.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/resize.cc
@@ -227,8 +227,7 @@
RETURN_IF_ERROR(args_.SetFloat(
"scale_factor_y",
CalculateResizeScale(src_[0]->Height(), dst_[0]->Height(), attr_)));
- RETURN_IF_ERROR(SetArguments(linked_operations_, &args_));
- return args_.Bind(kernel_.kernel());
+ return absl::OkStatus();
}
int3 Resize::GetGridSize() const {
@@ -238,16 +237,6 @@
return int3(grid_x, grid_y, grid_z);
}
-absl::Status Resize::AddToQueue(CLCommandQueue* queue) {
- RETURN_IF_ERROR(BindArguments());
- return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
-}
-
-absl::Status Resize::Tune(const TuningParameters& params) {
- RETURN_IF_ERROR(BindArguments());
- return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
-}
-
Resize CreateResize(const OperationDef& definition,
const Resize2DAttributes& attr) {
return Resize(definition, attr);
@@ -292,8 +281,7 @@
RETURN_IF_ERROR(args_.SetFloat(
"scale_factor_z",
CalculateResizeScale(src_[0]->Depth(), dst_[0]->Depth(), attr_)));
- RETURN_IF_ERROR(SetArguments(linked_operations_, &args_));
- return args_.Bind(kernel_.kernel());
+ return absl::OkStatus();
}
int3 Resize3D::GetGridSize() const {
@@ -303,16 +291,6 @@
return int3(grid_x, grid_y, grid_z);
}
-absl::Status Resize3D::AddToQueue(CLCommandQueue* queue) {
- RETURN_IF_ERROR(BindArguments());
- return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
-}
-
-absl::Status Resize3D::Tune(const TuningParameters& params) {
- RETURN_IF_ERROR(BindArguments());
- return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
-}
-
Resize3D CreateResize3D(const OperationDef& definition,
const Resize3DAttributes& attr) {
return Resize3D(definition, attr);
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/resize.h b/tensorflow/lite/delegates/gpu/cl/kernels/resize.h
index 10fb414..899c85b 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/resize.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/resize.h
@@ -27,9 +27,8 @@
class Resize : public GPUOperation {
public:
- absl::Status AddToQueue(CLCommandQueue* queue) override;
- absl::Status Tune(const TuningParameters& params) override;
-
+ absl::Status BindArguments() override;
+ int3 GetGridSize() const override;
absl::Status Compile(const CreationContext& creation_context) override;
// Move only
@@ -45,9 +44,6 @@
Resize(const OperationDef& definition, const Resize2DAttributes& attr)
: GPUOperation(definition), attr_(attr) {}
- absl::Status BindArguments();
- int3 GetGridSize() const;
-
Resize2DAttributes attr_;
};
@@ -56,9 +52,8 @@
class Resize3D : public GPUOperation {
public:
- absl::Status AddToQueue(CLCommandQueue* queue) override;
- absl::Status Tune(const TuningParameters& params) override;
-
+ absl::Status BindArguments() override;
+ int3 GetGridSize() const override;
absl::Status Compile(const CreationContext& creation_context) override;
// Move only
@@ -74,9 +69,6 @@
Resize3D(const OperationDef& definition, const Resize3DAttributes& attr)
: GPUOperation(definition), attr_(attr) {}
- absl::Status BindArguments();
- int3 GetGridSize() const;
-
Resize3DAttributes attr_;
};
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/softmax.cc b/tensorflow/lite/delegates/gpu/cl/kernels/softmax.cc
index ea8671b..edc720d 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/softmax.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/softmax.cc
@@ -91,8 +91,7 @@
absl::Status Softmax::BindArguments() {
RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0]));
RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0]));
- RETURN_IF_ERROR(SetArguments(linked_operations_, &args_));
- return args_.Bind(kernel_.kernel());
+ return absl::OkStatus();
}
int3 Softmax::GetGridSize() const {
@@ -102,16 +101,6 @@
return int3(grid_x, grid_y, grid_z);
}
-absl::Status Softmax::Tune(const TuningParameters& params) {
- RETURN_IF_ERROR(BindArguments());
- return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
-}
-
-absl::Status Softmax::AddToQueue(CLCommandQueue* queue) {
- RETURN_IF_ERROR(BindArguments());
- return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
-}
-
Softmax CreateSoftmax(const OperationDef& definition) {
return Softmax(definition);
}
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/softmax.h b/tensorflow/lite/delegates/gpu/cl/kernels/softmax.h
index 5f974ef..eac06ca 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/softmax.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/softmax.h
@@ -30,9 +30,9 @@
public:
Softmax() = default;
explicit Softmax(const OperationDef& definition) : GPUOperation(definition) {}
- absl::Status AddToQueue(CLCommandQueue* queue) override;
- absl::Status Tune(const TuningParameters& params) override;
+ absl::Status BindArguments() override;
+ int3 GetGridSize() const override;
absl::Status Compile(const CreationContext& creation_context) override;
// Move only
@@ -42,10 +42,6 @@
Softmax& operator=(const Softmax&) = delete;
friend Softmax CreateSoftmax();
-
- private:
- absl::Status BindArguments();
- int3 GetGridSize() const;
};
Softmax CreateSoftmax(const OperationDef& definition);
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.cc b/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.cc
index 28ebd8a..33dd285 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.cc
@@ -112,6 +112,7 @@
absl::Status Softmax1x1::Compile(const CreationContext& creation_context) {
std::string code = GetSoftmaxKernelCode(definition_, &args_);
std::string element_wise_code;
+ work_group_size_ = int3(32, 1, 1);
RETURN_IF_ERROR(
MergeOperations(linked_operations_, &args_, &element_wise_code));
RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(),
@@ -122,7 +123,7 @@
*creation_context.device, &kernel_);
}
-absl::Status Softmax1x1::AddToQueue(CLCommandQueue* queue) {
+absl::Status Softmax1x1::BindArguments() {
RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0]));
RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0]));
float4 mask = GetMaskForLastPlane(src_[0]->Channels());
@@ -132,12 +133,11 @@
RETURN_IF_ERROR(args_.SetFloat("mask_w", mask.w));
RETURN_IF_ERROR(
args_.SetInt("slices_x32", DivideRoundUp(src_[0]->Slices(), 32)));
- RETURN_IF_ERROR(SetArguments(linked_operations_, &args_));
- RETURN_IF_ERROR(args_.Bind(kernel_.kernel()));
- return queue->DispatchImplicit(kernel_, {32, dst_[0]->Batch(), 1},
- {32, 1, 1});
+ return absl::OkStatus();
}
+int3 Softmax1x1::GetGridSize() const { return int3(32, dst_[0]->Batch(), 1); }
+
Softmax1x1 CreateSoftmax1x1(const OperationDef& definition) {
return Softmax1x1(definition);
}
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.h b/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.h
index d5ae037..f749a7b 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.h
@@ -30,8 +30,11 @@
Softmax1x1() = default;
explicit Softmax1x1(const OperationDef& definition)
: GPUOperation(definition) {}
- absl::Status AddToQueue(CLCommandQueue* queue) override;
-
+ absl::Status Tune(const TuningParameters& params) override {
+ return absl::OkStatus();
+ }
+ absl::Status BindArguments() override;
+ int3 GetGridSize() const override;
absl::Status Compile(const CreationContext& creation_context) override;
// Move only
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/space_to_depth.cc b/tensorflow/lite/delegates/gpu/cl/kernels/space_to_depth.cc
index 6b5cc9f..37c3e09 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/space_to_depth.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/space_to_depth.cc
@@ -106,8 +106,7 @@
RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0]));
RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0]));
RETURN_IF_ERROR(args_.SetInt("block_size", attr_.block_size));
- RETURN_IF_ERROR(SetArguments(linked_operations_, &args_));
- return args_.Bind(kernel_.kernel());
+ return absl::OkStatus();
}
int3 SpaceToDepth::GetGridSize() const {
@@ -117,16 +116,6 @@
return int3(grid_x, grid_y, grid_z);
}
-absl::Status SpaceToDepth::Tune(const TuningParameters& params) {
- RETURN_IF_ERROR(BindArguments());
- return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
-}
-
-absl::Status SpaceToDepth::AddToQueue(CLCommandQueue* queue) {
- RETURN_IF_ERROR(BindArguments());
- return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
-}
-
SpaceToDepth CreateSpaceToDepth(const OperationDef& op_def,
const SpaceToDepthAttributes& attr) {
return SpaceToDepth(op_def, attr);
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/space_to_depth.h b/tensorflow/lite/delegates/gpu/cl/kernels/space_to_depth.h
index 6268920..99a0ca0 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/space_to_depth.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/space_to_depth.h
@@ -30,8 +30,8 @@
public:
SpaceToDepth(const OperationDef& op_def, const SpaceToDepthAttributes& attr)
: GPUOperation(op_def), attr_(attr) {}
- absl::Status AddToQueue(CLCommandQueue* queue) override;
- absl::Status Tune(const TuningParameters& params) override;
+ absl::Status BindArguments() override;
+ int3 GetGridSize() const override;
absl::Status Compile(const CreationContext& creation_context) override;
SpaceToDepth(SpaceToDepth&& operation);
@@ -40,9 +40,6 @@
SpaceToDepth& operator=(const SpaceToDepth&) = delete;
private:
- absl::Status BindArguments();
- int3 GetGridSize() const;
-
SpaceToDepthAttributes attr_;
};
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice.cc b/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice.cc
index 904e7fc..443c4a4 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice.cc
@@ -185,8 +185,7 @@
RETURN_IF_ERROR(args_.SetInt("stride_y", attributes_.strides.h));
RETURN_IF_ERROR(args_.SetInt("stride_z", attributes_.strides.c));
RETURN_IF_ERROR(args_.SetInt("stride_b", attributes_.strides.b));
- RETURN_IF_ERROR(SetArguments(linked_operations_, &args_));
- return args_.Bind(kernel_.kernel());
+ return absl::OkStatus();
}
int3 StridedSlice::GetGridSize() const {
@@ -196,16 +195,6 @@
return int3(grid_x, grid_y, grid_z);
}
-absl::Status StridedSlice::Tune(const TuningParameters& params) {
- RETURN_IF_ERROR(BindArguments());
- return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
-}
-
-absl::Status StridedSlice::AddToQueue(CLCommandQueue* queue) {
- RETURN_IF_ERROR(BindArguments());
- return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
-}
-
StridedSlice CreateStridedSlice(const OperationDef& definition,
const SliceAttributes& attr) {
return StridedSlice(definition, attr);
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice.h b/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice.h
index 3d88bd9..40005db 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice.h
@@ -27,9 +27,8 @@
class StridedSlice : public GPUOperation {
public:
StridedSlice(const OperationDef& definition, const SliceAttributes& attr);
- absl::Status AddToQueue(CLCommandQueue* queue) override;
- absl::Status Tune(const TuningParameters& params) override;
-
+ absl::Status BindArguments() override;
+ int3 GetGridSize() const override;
absl::Status Compile(const CreationContext& creation_context) override;
// Move only
@@ -39,9 +38,6 @@
StridedSlice& operator=(const StridedSlice&) = delete;
private:
- absl::Status BindArguments();
- int3 GetGridSize() const;
-
SliceAttributes attributes_;
};
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/transpose.cc b/tensorflow/lite/delegates/gpu/cl/kernels/transpose.cc
index bd5df56..eb62e1e 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/transpose.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/transpose.cc
@@ -130,8 +130,7 @@
absl::Status Transpose::BindArguments() {
RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0]));
RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0]));
- RETURN_IF_ERROR(SetArguments(linked_operations_, &args_));
- return args_.Bind(kernel_.kernel());
+ return absl::OkStatus();
}
int3 Transpose::GetGridSize() const {
@@ -141,16 +140,6 @@
return int3(grid_x, grid_y, grid_z);
}
-absl::Status Transpose::Tune(const TuningParameters& params) {
- RETURN_IF_ERROR(BindArguments());
- return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
-}
-
-absl::Status Transpose::AddToQueue(CLCommandQueue* queue) {
- RETURN_IF_ERROR(BindArguments());
- return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
-}
-
Transpose CreateTranspose(const OperationDef& definition,
const TransposeAttributes& attr) {
return Transpose(definition, attr);
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/transpose.h b/tensorflow/lite/delegates/gpu/cl/kernels/transpose.h
index 2c32fc4..36976d5 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/transpose.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/transpose.h
@@ -28,8 +28,8 @@
public:
Transpose(const OperationDef& definition, const TransposeAttributes& attr)
: GPUOperation(definition), attr_(attr) {}
- absl::Status AddToQueue(CLCommandQueue* queue) override;
- absl::Status Tune(const TuningParameters& params) override;
+ absl::Status BindArguments() override;
+ int3 GetGridSize() const override;
absl::Status Compile(const CreationContext& creation_context) override;
// Move only
@@ -39,9 +39,6 @@
Transpose& operator=(const Transpose&) = delete;
private:
- absl::Status BindArguments();
- int3 GetGridSize() const;
-
TransposeAttributes attr_;
};
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc
index a0f9238..d64b61a 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc
@@ -403,8 +403,7 @@
RETURN_IF_ERROR(args_.SetInt("padding_y", -padding_.prepended.h));
RETURN_IF_ERROR(args_.SetInt("tiles_total", tiles_total));
RETURN_IF_ERROR(args_.SetInt("tiles_x", tiles_x));
- RETURN_IF_ERROR(SetArguments(linked_operations_, &args_));
- return args_.Bind(kernel_.kernel());
+ return absl::OkStatus();
}
int3 Winograd4x4To36::GetGridSize() const {
@@ -417,9 +416,8 @@
absl::Status Winograd4x4To36::Tune(const TuningParameters& params) {
switch (params.tuning_type) {
case TuningType::EXHAUSTIVE:
- RETURN_IF_ERROR(BindArguments());
- return GetBestWorkGroup(params, kernel_, GetGridSize(),
- &work_group_size_);
+ RETURN_IF_ERROR(args_.Bind(kernel_.kernel()));
+ return GetBestWorkGroup(params, kernel_, grid_size_, &work_group_size_);
case TuningType::FAST:
default:
work_group_size_ = SelectBestWorkGroup();
@@ -427,11 +425,6 @@
}
}
-absl::Status Winograd4x4To36::AddToQueue(CLCommandQueue* queue) {
- RETURN_IF_ERROR(BindArguments());
- return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
-}
-
absl::Status CreateWinograd4x4To36(const CreationContext& creation_context,
const OperationDef& definition,
const Padding2D& padding,
@@ -506,8 +499,7 @@
RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0]));
const int tiles_x = DivideRoundUp(dst_[0]->Width(), 4);
RETURN_IF_ERROR(args_.SetInt("tiles_x", tiles_x));
- RETURN_IF_ERROR(SetArguments(linked_operations_, &args_));
- return args_.Bind(kernel_.kernel());
+ return absl::OkStatus();
}
int3 Winograd36To4x4::GetGridSize() const {
@@ -522,9 +514,8 @@
absl::Status Winograd36To4x4::Tune(const TuningParameters& params) {
switch (params.tuning_type) {
case TuningType::EXHAUSTIVE:
- RETURN_IF_ERROR(BindArguments());
- return GetBestWorkGroup(params, kernel_, GetGridSize(),
- &work_group_size_);
+ RETURN_IF_ERROR(args_.Bind(kernel_.kernel()));
+ return GetBestWorkGroup(params, kernel_, grid_size_, &work_group_size_);
case TuningType::FAST:
default:
work_group_size_ = SelectBestWorkGroup();
@@ -532,10 +523,6 @@
}
}
-absl::Status Winograd36To4x4::AddToQueue(CLCommandQueue* queue) {
- RETURN_IF_ERROR(BindArguments());
- return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
-}
absl::Status CreateWinograd36To4x4(
const CreationContext& creation_context, const OperationDef& definition,
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.h b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.h
index 3f57342..7fe0fc0 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.h
@@ -38,7 +38,8 @@
: GPUOperation(definition), padding_(padding) {
work_group_size_ = int3(128, 1, 1);
}
- absl::Status AddToQueue(CLCommandQueue* queue) override;
+ absl::Status BindArguments() override;
+ int3 GetGridSize() const override;
absl::Status Tune(const TuningParameters& params) override;
absl::Status Compile(const CreationContext& creation_context) override;
@@ -58,9 +59,6 @@
// Must be called after kernel compilation
int3 SelectBestWorkGroup();
- absl::Status BindArguments();
- int3 GetGridSize() const;
-
Padding2D padding_;
};
@@ -76,7 +74,8 @@
: GPUOperation(definition) {
work_group_size_ = int3(128, 1, 1);
}
- absl::Status AddToQueue(CLCommandQueue* queue) override;
+ absl::Status BindArguments() override;
+ int3 GetGridSize() const override;
absl::Status Tune(const TuningParameters& params) override;
absl::Status Compile(const CreationContext& creation_context) override;
@@ -96,9 +95,6 @@
// Must be called after kernel compilation
int3 SelectBestWorkGroup();
-
- absl::Status BindArguments();
- int3 GetGridSize() const;
};
absl::Status CreateWinograd36To4x4(