blob: 65636fe6467a653982d26ffc41b5a059452ff248 [file] [log] [blame]
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/delegates/gpu/gl/workgroups/ideal_workgroup_picker.h"
#include <map>
#include <vector>
#include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
#include "tensorflow/lite/delegates/gpu/common/operations.h"
#include "tensorflow/lite/delegates/gpu/common/shape.h"
#include "tensorflow/lite/delegates/gpu/common/types.h"
#include "tensorflow/lite/delegates/gpu/gl/workgroups/calculator.h"
namespace tflite {
namespace gpu {
namespace gl {
namespace {
// This code employs the results the workgroup performance reseach
// (b/117291356).
// Describes the ideal convolution for the specific operation case
// Case here means specific "kernel + strides" conbination for specific
// operatoins type, not sizes of input and output tensors, they can be any.
struct IdealByCase {
bool ParamsAccepted(OperationType in_op_type, HW in_kernel,
HW in_strides) const {
return operation_type == in_op_type && kernel == in_kernel &&
strides == in_strides;
}
OperationType operation_type;
HW kernel;
HW strides;
uint3 ideal_workgroup;
};
// Describes the ideal convolution for the type of operations. It means that
// any configuration of operation of this type will be working with top 10%
// performance with the particular GPU.
struct IdealByType {
bool ParamsAccepted(OperationType in_op_type) const {
return operation_type == in_op_type;
}
OperationType operation_type;
uint3 ideal_workgroup;
};
// Describes ideal workgroups for the particular GPU model.
struct IdealWorkgroups {
std::vector<IdealByType> by_type;
std::vector<IdealByCase> by_case;
};
// List of Ideal workgroups which is received after the research mentioned
// above.
// Ideal workgroups for Adreno 630.
std::vector<IdealByType>* kIdealByTypeAdreno630Ptr =
new std::vector<IdealByType>{
{OperationType::CONVOLUTION_2D, uint3(4, 8, 4)},
{OperationType::DEPTHWISE_CONVOLUTION, uint3(4, 4, 8)},
};
std::vector<IdealByCase>* kIdealByCaseAdreno630Ptr =
new std::vector<IdealByCase>{
{OperationType::CONVOLUTION_2D, HW(1, 1), HW(1, 1), uint3(4, 8, 4)},
{OperationType::CONVOLUTION_2D, HW(3, 3), HW(2, 2), uint3(8, 4, 4)},
{OperationType::DEPTHWISE_CONVOLUTION, HW(1, 1), HW(1, 1),
uint3(8, 4, 4)},
{OperationType::DEPTHWISE_CONVOLUTION, HW(3, 3), HW(2, 2),
uint3(4, 4, 4)},
};
// Ideal workgroups for Adreno 540.
std::vector<IdealByType>* kIdealByTypeAdreno540Ptr =
new std::vector<IdealByType>{
{OperationType::CONVOLUTION_2D, uint3(8, 2, 2)},
{OperationType::DEPTHWISE_CONVOLUTION, uint3(8, 8, 2)},
};
std::vector<IdealByCase>* kIdealByCaseAdreno540Ptr =
new std::vector<IdealByCase>{
{OperationType::CONVOLUTION_2D, HW(1, 1), HW(1, 1), uint3(4, 2, 8)},
{OperationType::CONVOLUTION_2D, HW(3, 3), HW(2, 2), uint3(8, 2, 8)},
{OperationType::DEPTHWISE_CONVOLUTION, HW(1, 1), HW(1, 1),
uint3(8, 4, 8)},
{OperationType::DEPTHWISE_CONVOLUTION, HW(3, 3), HW(2, 2),
uint3(4, 4, 8)},
};
// Ideal workgroups for Adreno 510.
std::vector<IdealByType>* kIdealByTypeAdreno510Ptr =
new std::vector<IdealByType>{
{OperationType::CONVOLUTION_2D, uint3(8, 4, 4)},
{OperationType::DEPTHWISE_CONVOLUTION, uint3(8, 4, 4)},
};
std::vector<IdealByCase>* kIdealByCaseAdreno510Ptr =
new std::vector<IdealByCase>{
{OperationType::CONVOLUTION_2D, HW(1, 1), HW(1, 1), uint3(4, 2, 8)},
{OperationType::CONVOLUTION_2D, HW(3, 3), HW(2, 2), uint3(8, 2, 8)},
{OperationType::DEPTHWISE_CONVOLUTION, HW(1, 1), HW(1, 1),
uint3(8, 4, 8)},
{OperationType::DEPTHWISE_CONVOLUTION, HW(3, 3), HW(2, 2),
uint3(4, 4, 8)},
};
// Ideal workgroups for Adreno 509.
std::vector<IdealByType>* kIdealByTypeAdreno509Ptr =
new std::vector<IdealByType>{
{OperationType::CONVOLUTION_2D, uint3(8, 4, 8)},
{OperationType::DEPTHWISE_CONVOLUTION, uint3(8, 8, 2)},
};
// Ideal workgroups for Adreno 508, 506, 505, 418, 405
std::vector<IdealByType>* kIdealByTypeAdreno508Ptr =
new std::vector<IdealByType>{
{OperationType::CONVOLUTION_2D, uint3(8, 4, 8)},
{OperationType::DEPTHWISE_CONVOLUTION, uint3(8, 4, 8)},
};
std::vector<IdealByType>* kIdealByTypeAdreno506Ptr = kIdealByTypeAdreno508Ptr;
std::vector<IdealByType>* kIdealByTypeAdreno505Ptr = kIdealByTypeAdreno508Ptr;
std::vector<IdealByType>* kIdealByTypeAdreno418Ptr = kIdealByTypeAdreno508Ptr;
std::vector<IdealByType>* kIdealByTypeAdreno405Ptr = kIdealByTypeAdreno508Ptr;
// Put all ideal workgroups from the list together.
const std::map<GpuModel, IdealWorkgroups>* kIdealWorkgroupsInfoPtr =
new std::map<GpuModel, IdealWorkgroups>{
{GpuModel::ADRENO630,
{*kIdealByTypeAdreno630Ptr, *kIdealByCaseAdreno630Ptr}},
{GpuModel::ADRENO540, {*kIdealByTypeAdreno540Ptr, {}}},
{GpuModel::ADRENO510,
{*kIdealByTypeAdreno510Ptr, *kIdealByCaseAdreno510Ptr}},
{GpuModel::ADRENO509, {*kIdealByTypeAdreno509Ptr, {}}},
{GpuModel::ADRENO508, {*kIdealByTypeAdreno508Ptr, {}}},
{GpuModel::ADRENO506, {*kIdealByTypeAdreno506Ptr, {}}},
{GpuModel::ADRENO505, {*kIdealByTypeAdreno505Ptr, {}}},
{GpuModel::ADRENO418, {*kIdealByTypeAdreno418Ptr, {}}},
{GpuModel::ADRENO405, {*kIdealByTypeAdreno405Ptr, {}}},
};
} // namespace
uint3 GetIdealWorkgroupIfPossible(GpuModel gpu_model, OperationType op_type,
HW kernel, HW strides, uint3 default_wg,
OHWI workload) {
// Research showed that ideal workgroup approach doesn't work well with
// convolutions, which have small amount of output channels or output
// height/width dimensions
if (workload.o < 32 || workload.h <= 5 || workload.w <= 5) return default_wg;
// If GPU was investigated
if (!kIdealWorkgroupsInfoPtr->count(gpu_model)) {
return default_wg;
}
// Try to find the ideal workgroup by the specific operation case, cause they
// are expected to be better tuned than default "by type" cases
for (const auto& specific_case :
kIdealWorkgroupsInfoPtr->at(gpu_model).by_case) {
if (specific_case.ParamsAccepted(op_type, kernel, strides)) {
return specific_case.ideal_workgroup;
}
}
// Try to find the ideal workgroup by the operation type
for (const auto& default_case :
kIdealWorkgroupsInfoPtr->at(gpu_model).by_type) {
if (default_case.ParamsAccepted(op_type)) {
return default_case.ideal_workgroup;
}
}
// If no ideal workgroup is found, use the default workgroup suggested by each
// operation.
return default_wg;
}
uint3 GetIdealWorkgroupIfPossible(GpuModel gpu_model, OperationType op_type,
HW kernel, HW strides, OHWI workload) {
return GetIdealWorkgroupIfPossible(gpu_model, op_type, kernel, strides,
kEmptyWorkgroupSize, workload);
}
} // namespace gl
} // namespace gpu
} // namespace tflite