Longer fusion for Adreno in ThinPointwiseFuser.
PiperOrigin-RevId: 467909285
diff --git a/tensorflow/lite/delegates/gpu/common/tasks/special/depthwise_conv_plus_1x1_conv.cc b/tensorflow/lite/delegates/gpu/common/tasks/special/depthwise_conv_plus_1x1_conv.cc
index 8a7241c..a8826d3 100644
--- a/tensorflow/lite/delegates/gpu/common/tasks/special/depthwise_conv_plus_1x1_conv.cc
+++ b/tensorflow/lite/delegates/gpu/common/tasks/special/depthwise_conv_plus_1x1_conv.cc
@@ -62,6 +62,7 @@
bool IsNodeSupported(const GpuInfo& gpu_info, Node* node) const;
bool IsElementwiseNode(Node* node) const;
bool IsConvNode(Node* node) const;
+ bool IsDwConvNode(Node* node) const;
void AddNode(const GpuInfo& gpu_info, Node* node);
void AddElementwiseNode(ElementwiseDescriptor&& op_desc);
void AddConvNode(const GpuInfo& gpu_info,
@@ -73,7 +74,6 @@
void AddConvData(const Convolution2DAttributes& conv_attr);
void AddDepthwiseConvData(const DepthwiseConvolution2DAttributes& dw_attr);
void CreateConstantsGpuBuffer(const GpuInfo& gpu_info);
- bool HasConvNode() const;
std::vector<Node*> nodes_;
OperationDef op_def_;
Arguments args_;
@@ -81,6 +81,7 @@
std::vector<std::string> outputs_;
std::vector<float> gpu_data_;
int weights_counter_ = 0;
+ int buffer_size_ = 0;
std::string op_name_;
int link_counter_ = 0;
uint64_t flops_ = 0;
@@ -270,6 +271,24 @@
if (!good_conv) {
return false;
}
+ if (gpu_info.IsAdreno() && gpu_info.IsApiOpenCl()) {
+ int conv_src_ch_aligned = AlignByN(conv_attr->weights.shape.i, 4);
+ int conv_dst_ch_aligned = AlignByN(conv_attr->weights.shape.o, 4);
+ int conv_weights_count =
+ conv_dst_ch_aligned + conv_src_ch_aligned * conv_dst_ch_aligned;
+
+ DataType data_type = op_def_.precision == CalculationsPrecision::F32
+ ? DataType::FLOAT32
+ : DataType::FLOAT16;
+ int weights_size = conv_weights_count * SizeOf(data_type);
+ if (convs_count_ >= 3 || buffer_size_ + weights_size > 1024 * 3) {
+ return false;
+ }
+ } else {
+ if (convs_count_ >= 1) {
+ return false;
+ }
+ }
if (gpu_info.IsApple()) {
if (op_def_.precision == CalculationsPrecision::F16) {
return conv_shape.o <= 16 && conv_shape.i * conv_shape.o <= 16 * 16;
@@ -295,26 +314,42 @@
}
bool ThinPointwiseFuser::ReserveNode(const GpuInfo& gpu_info, Node* node) {
- if (convs_count_ >= 1 || !IsNodeSupported(gpu_info, node)) {
+ if (!IsNodeSupported(gpu_info, node)) {
return false;
}
nodes_.push_back(node);
if (IsConvNode(node)) {
convs_count_++;
+ Convolution2DAttributes* conv_attr =
+ absl::any_cast<Convolution2DAttributes>(&node->operation.attributes);
+
+ int conv_src_ch_aligned = AlignByN(conv_attr->weights.shape.i, 4);
+ int conv_dst_ch_aligned = AlignByN(conv_attr->weights.shape.o, 4);
+ int conv_weights_count =
+ conv_dst_ch_aligned + conv_src_ch_aligned * conv_dst_ch_aligned;
+
+ DataType data_type = op_def_.precision == CalculationsPrecision::F32
+ ? DataType::FLOAT32
+ : DataType::FLOAT16;
+ buffer_size_ += conv_weights_count * SizeOf(data_type);
+ }
+ if (IsDwConvNode(node)) {
+ DepthwiseConvolution2DAttributes* dw_attr =
+ absl::any_cast<DepthwiseConvolution2DAttributes>(
+ &node->operation.attributes);
+
+ int dw_dst_ch_aligned = AlignByN(dw_attr->weights.shape.i, 4);
+ int dw_weights_count = dw_dst_ch_aligned + dw_dst_ch_aligned *
+ dw_attr->weights.shape.h *
+ dw_attr->weights.shape.w;
+ DataType data_type = op_def_.precision == CalculationsPrecision::F32
+ ? DataType::FLOAT32
+ : DataType::FLOAT16;
+ buffer_size_ += dw_weights_count * SizeOf(data_type);
}
return true;
}
-bool ThinPointwiseFuser::HasConvNode() const {
- for (auto& node : nodes_) {
- if (OperationTypeFromString(node->operation.type) ==
- OperationType::CONVOLUTION_2D) {
- return true;
- }
- }
- return false;
-}
-
void ThinPointwiseFuser::AddNode(const GpuInfo& gpu_info, Node* node) {
auto op_type = OperationTypeFromString(node->operation.type);
if (op_type == OperationType::RELU) {
@@ -347,6 +382,11 @@
return op_type == OperationType::CONVOLUTION_2D;
}
+bool ThinPointwiseFuser::IsDwConvNode(Node* node) const {
+ auto op_type = OperationTypeFromString(node->operation.type);
+ return op_type == OperationType::DEPTHWISE_CONVOLUTION;
+}
+
void ThinPointwiseFuser::AddDepthwiseConvNode(
const GpuInfo& gpu_info, const DepthwiseConvolution2DAttributes& attr) {
AddDepthwiseConvData(attr);