Device and DeviceInfo separated.
DeviceInfo doesn't has OpenCL API calls/elements.
PiperOrigin-RevId: 324719724
Change-Id: I0ffb6eaf6cd7c1edc77e14b28528710aced34519
diff --git a/tensorflow/lite/delegates/gpu/cl/BUILD b/tensorflow/lite/delegates/gpu/cl/BUILD
index 2344a7c..ebfb2cf 100644
--- a/tensorflow/lite/delegates/gpu/cl/BUILD
+++ b/tensorflow/lite/delegates/gpu/cl/BUILD
@@ -166,6 +166,7 @@
srcs = ["cl_device.cc"],
hdrs = ["cl_device.h"],
deps = [
+ ":device_info",
":opencl_wrapper",
":util",
"//tensorflow/lite/delegates/gpu/common:status",
@@ -252,6 +253,15 @@
)
cc_library(
+ name = "device_info",
+ srcs = ["device_info.cc"],
+ hdrs = ["device_info.h"],
+ deps = [
+ "@com_google_absl//absl/strings",
+ ],
+)
+
+cc_library(
name = "egl_sync",
srcs = ["egl_sync.cc"],
hdrs = ["egl_sync.h"],
diff --git a/tensorflow/lite/delegates/gpu/cl/arguments.cc b/tensorflow/lite/delegates/gpu/cl/arguments.cc
index 7924109..ed72bcc 100644
--- a/tensorflow/lite/delegates/gpu/cl/arguments.cc
+++ b/tensorflow/lite/delegates/gpu/cl/arguments.cc
@@ -690,7 +690,7 @@
void Arguments::ResolveArgsPass(const DeviceInfo& device_info,
std::string* code) {
- bool use_f32_for_half_arguments = device_info.vendor == Vendor::POWERVR;
+ bool use_f32_for_half_arguments = device_info.IsPowerVR();
size_t position = 0;
size_t next_position = code->find(kArgsPrefix);
while (next_position != std::string::npos) {
diff --git a/tensorflow/lite/delegates/gpu/cl/cl_command_queue.cc b/tensorflow/lite/delegates/gpu/cl/cl_command_queue.cc
index f7501da..a1795b1 100644
--- a/tensorflow/lite/delegates/gpu/cl/cl_command_queue.cc
+++ b/tensorflow/lite/delegates/gpu/cl/cl_command_queue.cc
@@ -216,16 +216,14 @@
const CLKernel& kernel, const DeviceInfo& device_info, const int3& grid,
const std::vector<int3>& work_group_sizes, int* index) {
// Some Adreno 3xx can have wrong numbers for some events
- const bool possible_bug_with_events =
- device_info.vendor == Vendor::QUALCOMM &&
- device_info.adreno_info.gpu_version < 400;
+ const bool possible_bug_with_events = device_info.IsAdreno3xx();
events_.resize(work_group_sizes.size());
for (int i = 0; i < work_group_sizes.size(); ++i) {
RETURN_IF_ERROR(CLCommandQueue::DispatchImplicit(
kernel, grid, work_group_sizes[i], &events_[i]));
// reducing the speed of memory leak on Mali for some kernels
- if (device_info.vendor == Vendor::MALI && i % 8 == 7) {
+ if (device_info.IsMali() && i % 8 == 7) {
events_[i - 7].Wait();
}
if (possible_bug_with_events) {
@@ -237,7 +235,7 @@
RETURN_IF_ERROR(WaitForCompletion());
// To release memory of some kernel pool on Mali.
- if (device_info.vendor == Vendor::MALI) {
+ if (device_info.IsMali()) {
RETURN_IF_ERROR(kernel.ReInit());
}
diff --git a/tensorflow/lite/delegates/gpu/cl/cl_device.cc b/tensorflow/lite/delegates/gpu/cl/cl_device.cc
index f4f1f1c..b93bfb2 100644
--- a/tensorflow/lite/delegates/gpu/cl/cl_device.cc
+++ b/tensorflow/lite/delegates/gpu/cl/cl_device.cc
@@ -128,24 +128,24 @@
std::transform(v_name.begin(), v_name.end(), v_name.begin(), ::tolower);
if (d_name.find("qualcomm") != std::string::npos ||
v_name.find("qualcomm") != std::string::npos) {
- return Vendor::QUALCOMM;
+ return Vendor::kQualcomm;
} else if (d_name.find("mali") != std::string::npos ||
v_name.find("mali") != std::string::npos) {
- return Vendor::MALI;
+ return Vendor::kMali;
} else if (d_name.find("power") != std::string::npos ||
v_name.find("power") != std::string::npos) {
- return Vendor::POWERVR;
+ return Vendor::kPowerVR;
} else if (d_name.find("nvidia") != std::string::npos ||
v_name.find("nvidia") != std::string::npos) {
- return Vendor::NVIDIA;
+ return Vendor::kNvidia;
} else if (d_name.find("advanced micro devices") != std::string::npos ||
v_name.find("advanced micro devices") != std::string::npos) {
- return Vendor::AMD;
+ return Vendor::kAMD;
} else if (d_name.find("intel") != std::string::npos ||
v_name.find("intel") != std::string::npos) {
- return Vendor::INTEL;
+ return Vendor::kIntel;
} else {
- return Vendor::UNKNOWN;
+ return Vendor::kUnknown;
}
}
@@ -156,316 +156,99 @@
}
} // namespace
-// There is no rule for gpu version encoding, but we found these samples:
-// Version: OpenCL C 2.0 Adreno(TM) 540 // Pixel 2
-// Version: OpenCL C 2.0 Adreno(TM) 630 // Sony Compact XZ2
-// Version: OpenCL C 2.0 Adreno(TM) 630 // Pixel 3
-// Version: OpenCL C 2.0 Adreno(TM) 540 // Samsung S8
-// Version: OpenCL C 1.2 Adreno(TM) 430 // HTC One M9
-// Version: OpenCL C 2.0 Adreno(TM) 530 // Samsung S7 Edge
-// Version: OpenCL C 1.2 Adreno(TM) 405 // Motorola Moto G(4)
-// After the number string ends.
-// It is assumed that the <vendor-specific information> for Adreno GPUs has
-// the following format:
-// <text?><space?>Adreno(TM)<space><text?><version>
-// Returns -1 if vendor-specific information cannot be parsed
-int GetAdrenoGPUVersion(const std::string& gpu_version) {
- const std::string gpu = absl::AsciiStrToLower(gpu_version);
- const std::vector<absl::string_view> words = absl::StrSplit(gpu, ' ');
- int i = 0;
- for (; i < words.size(); ++i) {
- if (words[i].find("adreno") != words[i].npos) {
- break;
- }
- }
- i += 1;
- for (; i < words.size(); ++i) {
- int number;
- bool is_number = absl::SimpleAtoi(words[i], &number);
- // Adreno GPUs starts from 2xx, but opencl support should be only from 3xx
- if (is_number && number >= 300) {
- return number;
- }
- }
- return -1;
-}
-
-MaliGPU GetMaliGPUVersion(const std::string& device_name) {
- const std::map<std::string, MaliGPU> kMapping = {
- {"T604", MaliGPU::T604}, {"T622", MaliGPU::T622}, {"T624", MaliGPU::T624},
- {"T628", MaliGPU::T628}, {"T658", MaliGPU::T658}, {"T678", MaliGPU::T678},
- {"T720", MaliGPU::T720}, {"T760", MaliGPU::T760}, {"T820", MaliGPU::T820},
- {"T830", MaliGPU::T830}, {"T860", MaliGPU::T860}, {"T880", MaliGPU::T880},
- {"G31", MaliGPU::G31}, {"G51", MaliGPU::G51}, {"G71", MaliGPU::G71},
- {"G52", MaliGPU::G52}, {"G72", MaliGPU::G72}, {"G76", MaliGPU::G76},
- {"G57", MaliGPU::G57}, {"G77", MaliGPU::G77},
- };
- for (const auto& v : kMapping) {
- if (device_name.find(v.first) != std::string::npos) {
- return v.second;
- }
- }
- return MaliGPU::UNKNOWN;
-}
-
-std::string VendorToString(Vendor v) {
- switch (v) {
- case Vendor::QUALCOMM:
- return "Qualcomm";
- case Vendor::MALI:
- return "Mali";
- case Vendor::POWERVR:
- return "PowerVR";
- case Vendor::NVIDIA:
- return "NVIDIA";
- case Vendor::AMD:
- return "AMD";
- case Vendor::INTEL:
- return "Intel";
- case Vendor::UNKNOWN:
- return "unknown vendor";
- }
-}
-
-std::string OpenCLVersionToString(OpenCLVersion version) {
- switch (version) {
- case OpenCLVersion::CL_1_0:
- return "1.0";
- case OpenCLVersion::CL_1_1:
- return "1.1";
- case OpenCLVersion::CL_1_2:
- return "1.2";
- case OpenCLVersion::CL_2_0:
- return "2.0";
- case OpenCLVersion::CL_2_1:
- return "2.1";
- case OpenCLVersion::CL_2_2:
- return "2.2";
- case OpenCLVersion::CL_3_0:
- return "3.0";
- }
-}
-
-AdrenoInfo::AdrenoInfo(const std::string& device_version)
- : gpu_version(GetAdrenoGPUVersion(device_version)) {}
-
-int AdrenoInfo::GetMaximumWavesCount() const {
- if (gpu_version < 400) {
- return -1; // Adreno 3xx does not support it currently
- } else if (gpu_version >= 400 && gpu_version < 500) {
- return -1; // Adreno 4xx does not support it currently
- } else if (gpu_version >= 500 && gpu_version < 600) {
- return -1; // Adreno 5xx does not support it currently
- } else if (gpu_version >= 600 && gpu_version < 700) {
- return gpu_version == 640 ? 30 : 16;
- } else {
- return -1; // Adreno 7xx and higher does not exist yet
- }
-}
-
-int AdrenoInfo::GetRegisterMemorySizePerComputeUnit() const {
- if (gpu_version < 400) {
- return -1; // Adreno 3xx does not support it currently
- } else if (gpu_version >= 400 && gpu_version < 500) {
- return -1; // Adreno 4xx does not support it currently
- } else if (gpu_version >= 500 && gpu_version < 600) {
- return -1; // Adreno 5xx does not support it currently
- } else if (gpu_version >= 600 && gpu_version < 700) {
- return gpu_version == 640 ? 128 * 144 * 16 : 128 * 96 * 16;
- } else {
- return -1; // Adreno 7xx and higher does not exist yet
- }
-}
-
-int AdrenoInfo::GetMaximumWavesCount(int register_footprint_per_tread,
- bool full_wave) const {
- const int register_usage_per_wave =
- GetWaveSize(full_wave) * register_footprint_per_tread;
- const int possible_waves_count =
- GetRegisterMemorySizePerComputeUnit() / register_usage_per_wave;
- return std::min(possible_waves_count, GetMaximumWavesCount());
-}
-
-int AdrenoInfo::GetWaveSize(bool full_wave) const {
- if (gpu_version < 400) {
- return -1; // Adreno 3xx does not support it currently
- } else if (gpu_version < 600) {
- return full_wave ? 64 : 32;
- } else {
- return full_wave ? 128 : 64;
- }
-}
-
-MaliInfo::MaliInfo(const std::string& device_name)
- : gpu_version(GetMaliGPUVersion(device_name)) {}
-
-bool MaliInfo::IsMaliT6xx() const {
- return gpu_version == MaliGPU::T604 || gpu_version == MaliGPU::T622 ||
- gpu_version == MaliGPU::T624 || gpu_version == MaliGPU::T628 ||
- gpu_version == MaliGPU::T658 || gpu_version == MaliGPU::T678;
-}
-
-bool MaliInfo::IsMaliT7xx() const {
- return gpu_version == MaliGPU::T720 || gpu_version == MaliGPU::T760;
-}
-
-bool MaliInfo::IsMaliT8xx() const {
- return gpu_version == MaliGPU::T820 || gpu_version == MaliGPU::T830 ||
- gpu_version == MaliGPU::T860 || gpu_version == MaliGPU::T880;
-}
-
-bool MaliInfo::IsMidgard() const {
- return IsMaliT6xx() || IsMaliT7xx() || IsMaliT8xx();
-}
-
-bool MaliInfo::IsBifrostGen1() const {
- return gpu_version == MaliGPU::G31 || gpu_version == MaliGPU::G51 ||
- gpu_version == MaliGPU::G71;
-}
-
-bool MaliInfo::IsBifrostGen2() const {
- return gpu_version == MaliGPU::G52 || gpu_version == MaliGPU::G72;
-}
-
-bool MaliInfo::IsBifrostGen3() const { return gpu_version == MaliGPU::G76; }
-
-bool MaliInfo::IsBifrost() const {
- return IsBifrostGen1() || IsBifrostGen2() || IsBifrostGen3();
-}
-
-bool MaliInfo::IsValhall() const {
- return gpu_version == MaliGPU::G57 || gpu_version == MaliGPU::G77;
-}
-
-DeviceInfo::DeviceInfo(cl_device_id id) {
+DeviceInfo DeviceInfoFromDeviceID(cl_device_id id) {
+ DeviceInfo info;
const auto device_name = GetDeviceInfo<std::string>(id, CL_DEVICE_NAME);
const auto vendor_name = GetDeviceInfo<std::string>(id, CL_DEVICE_VENDOR);
const auto opencl_c_version =
GetDeviceInfo<std::string>(id, CL_DEVICE_OPENCL_C_VERSION);
- vendor = ParseVendor(device_name, vendor_name);
- if (vendor == Vendor::QUALCOMM) {
- adreno_info = AdrenoInfo(opencl_c_version);
- } else if (vendor == Vendor::MALI) {
- mali_info = MaliInfo(device_name);
+ info.vendor = ParseVendor(device_name, vendor_name);
+ if (info.vendor == Vendor::kQualcomm) {
+ info.adreno_info = AdrenoInfo(opencl_c_version);
+ } else if (info.vendor == Vendor::kMali) {
+ info.mali_info = MaliInfo(device_name);
}
- cl_version = ParseCLVersion(opencl_c_version);
- extensions =
+ info.cl_version = ParseCLVersion(opencl_c_version);
+ info.extensions =
absl::StrSplit(GetDeviceInfo<std::string>(id, CL_DEVICE_EXTENSIONS), ' ');
- supports_fp16 = false;
- supports_image3d_writes = false;
- for (const auto& ext : extensions) {
+ info.supports_fp16 = false;
+ info.supports_image3d_writes = false;
+ for (const auto& ext : info.extensions) {
if (ext == "cl_khr_fp16") {
- supports_fp16 = true;
+ info.supports_fp16 = true;
}
if (ext == "cl_khr_3d_image_writes") {
- supports_image3d_writes = true;
+ info.supports_image3d_writes = true;
}
}
- f32_config =
+ cl_device_fp_config f32_config =
GetDeviceInfo<cl_device_fp_config>(id, CL_DEVICE_SINGLE_FP_CONFIG);
- supports_fp32_rtn = f32_config & CL_FP_ROUND_TO_NEAREST;
+ info.supports_fp32_rtn = f32_config & CL_FP_ROUND_TO_NEAREST;
- if (supports_fp16) {
+ if (info.supports_fp16) {
+ cl_device_fp_config f16_config;
auto status = GetDeviceInfo<cl_device_fp_config>(
id, CL_DEVICE_HALF_FP_CONFIG, &f16_config);
// AMD supports cl_khr_fp16 but CL_DEVICE_HALF_FP_CONFIG is empty.
- if (status.ok() && vendor != Vendor::AMD) {
- supports_fp16_rtn = f16_config & CL_FP_ROUND_TO_NEAREST;
+ if (status.ok() && info.vendor != Vendor::kAMD) {
+ info.supports_fp16_rtn = f16_config & CL_FP_ROUND_TO_NEAREST;
} else { // happens on PowerVR
f16_config = f32_config;
- supports_fp16_rtn = supports_fp32_rtn;
+ info.supports_fp16_rtn = info.supports_fp32_rtn;
}
} else {
- f16_config = 0;
- supports_fp16_rtn = false;
+ info.supports_fp16_rtn = false;
}
- if (vendor == Vendor::POWERVR && !supports_fp16) {
+ if (info.vendor == Vendor::kPowerVR && !info.supports_fp16) {
// PowerVR doesn't have full support of fp16 and so doesn't list this
// extension. But it can support fp16 in MADs and as buffers/textures types,
// so we will use it.
- supports_fp16 = true;
- f16_config = f32_config;
- supports_fp16_rtn = supports_fp32_rtn;
+ info.supports_fp16 = true;
+ info.supports_fp16_rtn = info.supports_fp32_rtn;
}
- if (!supports_image3d_writes &&
- ((vendor == Vendor::QUALCOMM &&
- IsGPUVersionInRange(adreno_info.gpu_version, 400, 500)) ||
- vendor == Vendor::NVIDIA)) {
+ if (!info.supports_image3d_writes &&
+ ((info.vendor == Vendor::kQualcomm &&
+ IsGPUVersionInRange(info.adreno_info.gpu_version, 400, 500)) ||
+ info.vendor == Vendor::kNvidia)) {
// in local tests Adreno 430 can write in image 3d, at least on small sizes,
// but it doesn't have cl_khr_3d_image_writes in list of available
// extensions
// The same for NVidia
- supports_image3d_writes = true;
+ info.supports_image3d_writes = true;
}
- compute_units_count = GetDeviceInfo<cl_uint>(id, CL_DEVICE_MAX_COMPUTE_UNITS);
- image2d_max_width = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE2D_MAX_WIDTH);
- image2d_max_height = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE2D_MAX_HEIGHT);
- buffer_max_size = GetDeviceInfo<cl_ulong>(id, CL_DEVICE_MAX_MEM_ALLOC_SIZE);
- if (cl_version >= OpenCLVersion::CL_1_2) {
- image_buffer_max_size =
+ info.compute_units_count =
+ GetDeviceInfo<cl_uint>(id, CL_DEVICE_MAX_COMPUTE_UNITS);
+ info.image2d_max_width =
+ GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE2D_MAX_WIDTH);
+ info.image2d_max_height =
+ GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE2D_MAX_HEIGHT);
+ info.buffer_max_size =
+ GetDeviceInfo<cl_ulong>(id, CL_DEVICE_MAX_MEM_ALLOC_SIZE);
+ if (info.cl_version >= OpenCLVersion::CL_1_2) {
+ info.image_buffer_max_size =
GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE);
- image_array_max_layers =
+ info.image_array_max_layers =
GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE);
}
- image3d_max_width = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE3D_MAX_WIDTH);
- image3d_max_height = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE2D_MAX_HEIGHT);
- image3d_max_depth = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE3D_MAX_DEPTH);
+ info.image3d_max_width =
+ GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE3D_MAX_WIDTH);
+ info.image3d_max_height =
+ GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE2D_MAX_HEIGHT);
+ info.image3d_max_depth =
+ GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE3D_MAX_DEPTH);
+ int3 max_work_group_sizes;
GetDeviceWorkDimsSizes(id, &max_work_group_sizes);
+ info.max_work_group_size_x = max_work_group_sizes.x;
+ info.max_work_group_size_y = max_work_group_sizes.y;
+ info.max_work_group_size_z = max_work_group_sizes.z;
+ return info;
}
-bool DeviceInfo::SupportsTextureArray() const {
- return cl_version >= OpenCLVersion::CL_1_2;
-}
-
-bool DeviceInfo::SupportsImageBuffer() const {
- return cl_version >= OpenCLVersion::CL_1_2;
-}
-
-bool DeviceInfo::SupportsImage3D() const {
- if (vendor == Vendor::MALI) {
- // On Mali T880 read_imageh doesn't compile with image3d_t
- return false;
- }
- return supports_image3d_writes;
-}
-
-bool DeviceInfo::IsAdreno() const { return vendor == Vendor::QUALCOMM; }
-
-bool DeviceInfo::IsAdreno3xx() const {
- return IsAdreno() && IsGPUVersionInRange(adreno_info.gpu_version, 300, 400);
-}
-
-bool DeviceInfo::IsAdreno4xx() const {
- return IsAdreno() && IsGPUVersionInRange(adreno_info.gpu_version, 400, 500);
-}
-
-bool DeviceInfo::IsAdreno5xx() const {
- return IsAdreno() && IsGPUVersionInRange(adreno_info.gpu_version, 500, 600);
-}
-
-bool DeviceInfo::IsAdreno6xx() const {
- return IsAdreno() && IsGPUVersionInRange(adreno_info.gpu_version, 600, 700);
-}
-
-bool DeviceInfo::IsAdreno6xxOrHigher() const {
- return IsAdreno() && adreno_info.gpu_version >= 600;
-}
-
-bool DeviceInfo::IsPowerVR() const { return vendor == Vendor::POWERVR; }
-
-bool DeviceInfo::IsNvidia() const { return vendor == Vendor::NVIDIA; }
-
-bool DeviceInfo::IsMali() const { return vendor == Vendor::MALI; }
-
-bool DeviceInfo::IsAMD() const { return vendor == Vendor::AMD; }
-
-bool DeviceInfo::IsIntel() const { return vendor == Vendor::INTEL; }
-
CLDevice::CLDevice(cl_device_id id, cl_platform_id platform_id)
- : id_(id), platform_id_(platform_id), info_(id) {}
+ : id_(id), platform_id_(platform_id), info_(DeviceInfoFromDeviceID(id)) {}
CLDevice::CLDevice(const CLDevice& device)
: id_(device.id_), platform_id_(device.platform_id_), info_(device.info_) {}
diff --git a/tensorflow/lite/delegates/gpu/cl/cl_device.h b/tensorflow/lite/delegates/gpu/cl/cl_device.h
index 217111c..7e4792b 100644
--- a/tensorflow/lite/delegates/gpu/cl/cl_device.h
+++ b/tensorflow/lite/delegates/gpu/cl/cl_device.h
@@ -19,6 +19,7 @@
#include <string>
#include <vector>
+#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
#include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
#include "tensorflow/lite/delegates/gpu/cl/util.h"
#include "tensorflow/lite/delegates/gpu/common/status.h"
@@ -28,139 +29,6 @@
namespace gpu {
namespace cl {
-enum class Vendor { QUALCOMM, MALI, POWERVR, NVIDIA, AMD, INTEL, UNKNOWN };
-std::string VendorToString(Vendor v);
-
-enum class OpenCLVersion {
- CL_1_0,
- CL_1_1,
- CL_1_2,
- CL_2_0,
- CL_2_1,
- CL_2_2,
- CL_3_0
-};
-std::string OpenCLVersionToString(OpenCLVersion version);
-
-// for use only in cl_device.cc, but putted here to make tests
-int GetAdrenoGPUVersion(const std::string& gpu_version);
-
-struct AdrenoInfo {
- AdrenoInfo() = default;
- explicit AdrenoInfo(const std::string& device_version);
- int gpu_version = -1; // can be, for example, 405/430/540/530/630 etc.
-
- // This function returns some not very documented physical parameter of
- // Adreno6xx GPU.
- // We obtained it using Snapdragon Profiler.
- int GetMaximumWavesCount() const;
-
- // returns amount of register memory per CU(Compute Unit) in bytes.
- int GetRegisterMemorySizePerComputeUnit() const;
-
- // returns maximum possible amount of waves based on register usage.
- int GetMaximumWavesCount(int register_footprint_per_tread,
- bool full_wave = true) const;
-
- int GetWaveSize(bool full_wave) const;
-
- // Not supported on some Adreno devices with specific driver version.
- // b/131099086
- bool support_one_layer_texture_array = true;
-};
-
-enum class MaliGPU {
- T604,
- T622,
- T624,
- T628,
- T658,
- T678,
- T720,
- T760,
- T820,
- T830,
- T860,
- T880,
- G31,
- G51,
- G71,
- G52,
- G72,
- G76,
- G57,
- G77,
- UNKNOWN
-};
-
-struct MaliInfo {
- MaliInfo() = default;
- explicit MaliInfo(const std::string& device_name);
- MaliGPU gpu_version;
-
- bool IsMaliT6xx() const;
- bool IsMaliT7xx() const;
- bool IsMaliT8xx() const;
- bool IsMidgard() const;
- bool IsBifrostGen1() const;
- bool IsBifrostGen2() const;
- bool IsBifrostGen3() const;
- bool IsBifrost() const;
- bool IsValhall() const;
-};
-
-struct DeviceInfo {
- DeviceInfo() = default;
- explicit DeviceInfo(cl_device_id id);
-
- bool IsAdreno() const;
- bool IsAdreno3xx() const;
- bool IsAdreno4xx() const;
- bool IsAdreno5xx() const;
- bool IsAdreno6xx() const;
- bool IsAdreno6xxOrHigher() const;
- bool IsPowerVR() const;
- bool IsNvidia() const;
- bool IsMali() const;
- bool IsAMD() const;
- bool IsIntel() const;
-
- bool SupportsTextureArray() const;
- bool SupportsImageBuffer() const;
- bool SupportsImage3D() const;
-
- std::vector<std::string> extensions;
- bool supports_fp16;
- bool supports_image3d_writes;
- Vendor vendor;
- OpenCLVersion cl_version;
- int compute_units_count;
- uint64_t buffer_max_size;
- uint64_t image2d_max_width;
- uint64_t image2d_max_height;
- uint64_t image_buffer_max_size;
- uint64_t image_array_max_layers;
- uint64_t image3d_max_width;
- uint64_t image3d_max_height;
- uint64_t image3d_max_depth;
- int3 max_work_group_sizes;
-
- cl_device_fp_config f32_config;
- // valid only with cl_khr_fp16
- cl_device_fp_config f16_config;
-
- // rtn is ROUND_TO_NEAREST
- // with rtn precision is much better then with rtz (ROUND_TO_ZERO)
- // Adreno 3xx supports only rtz, Adreno 4xx and more support rtn
- // Mali from T6xx supports rtn
- // PowerVR supports only rtz
- bool supports_fp32_rtn;
- bool supports_fp16_rtn;
-
- AdrenoInfo adreno_info;
- MaliInfo mali_info;
-};
-
// A wrapper around opencl device id
class CLDevice {
public:
diff --git a/tensorflow/lite/delegates/gpu/cl/device_info.cc b/tensorflow/lite/delegates/gpu/cl/device_info.cc
new file mode 100644
index 0000000..7e0acb8
--- /dev/null
+++ b/tensorflow/lite/delegates/gpu/cl/device_info.cc
@@ -0,0 +1,268 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
+
+#include <algorithm>
+#include <string>
+#include <vector>
+
+#include "absl/strings/numbers.h"
+#include "absl/strings/str_split.h"
+
+namespace tflite {
+namespace gpu {
+namespace cl {
+namespace {
+// check that gpu_version belong to range min_version-max_version
+// min_version is included and max_version is excluded.
+bool IsGPUVersionInRange(int gpu_version, int min_version, int max_version) {
+ return gpu_version >= min_version && gpu_version < max_version;
+}
+
+MaliGPU GetMaliGPUVersion(const std::string& device_name) {
+ const std::map<std::string, MaliGPU> kMapping = {
+ {"T604", MaliGPU::T604}, {"T622", MaliGPU::T622}, {"T624", MaliGPU::T624},
+ {"T628", MaliGPU::T628}, {"T658", MaliGPU::T658}, {"T678", MaliGPU::T678},
+ {"T720", MaliGPU::T720}, {"T760", MaliGPU::T760}, {"T820", MaliGPU::T820},
+ {"T830", MaliGPU::T830}, {"T860", MaliGPU::T860}, {"T880", MaliGPU::T880},
+ {"G31", MaliGPU::G31}, {"G51", MaliGPU::G51}, {"G71", MaliGPU::G71},
+ {"G52", MaliGPU::G52}, {"G72", MaliGPU::G72}, {"G76", MaliGPU::G76},
+ {"G57", MaliGPU::G57}, {"G77", MaliGPU::G77},
+ };
+ for (const auto& v : kMapping) {
+ if (device_name.find(v.first) != std::string::npos) {
+ return v.second;
+ }
+ }
+ return MaliGPU::UNKNOWN;
+}
+
+} // namespace
+
+// There is no rule for gpu version encoding, but we found these samples:
+// Version: OpenCL C 2.0 Adreno(TM) 540 // Pixel 2
+// Version: OpenCL C 2.0 Adreno(TM) 630 // Sony Compact XZ2
+// Version: OpenCL C 2.0 Adreno(TM) 630 // Pixel 3
+// Version: OpenCL C 2.0 Adreno(TM) 540 // Samsung S8
+// Version: OpenCL C 1.2 Adreno(TM) 430 // HTC One M9
+// Version: OpenCL C 2.0 Adreno(TM) 530 // Samsung S7 Edge
+// Version: OpenCL C 1.2 Adreno(TM) 405 // Motorola Moto G(4)
+// After the number string ends.
+// It is assumed that the <vendor-specific information> for Adreno GPUs has
+// the following format:
+// <text?><space?>Adreno(TM)<space><text?><version>
+// Returns -1 if vendor-specific information cannot be parsed
+int GetAdrenoGPUVersion(const std::string& gpu_version) {
+ const std::string gpu = absl::AsciiStrToLower(gpu_version);
+ const std::vector<absl::string_view> words = absl::StrSplit(gpu, ' ');
+ int i = 0;
+ for (; i < words.size(); ++i) {
+ if (words[i].find("adreno") != words[i].npos) {
+ break;
+ }
+ }
+ i += 1;
+ for (; i < words.size(); ++i) {
+ int number;
+ bool is_number = absl::SimpleAtoi(words[i], &number);
+ // Adreno GPUs starts from 2xx, but opencl support should be only from 3xx
+ if (is_number && number >= 300) {
+ return number;
+ }
+ }
+ return -1;
+}
+
+std::string VendorToString(Vendor v) {
+ switch (v) {
+ case Vendor::kQualcomm:
+ return "Qualcomm";
+ case Vendor::kMali:
+ return "Mali";
+ case Vendor::kPowerVR:
+ return "PowerVR";
+ case Vendor::kNvidia:
+ return "NVIDIA";
+ case Vendor::kAMD:
+ return "AMD";
+ case Vendor::kIntel:
+ return "Intel";
+ case Vendor::kUnknown:
+ return "unknown vendor";
+ }
+}
+
+std::string OpenCLVersionToString(OpenCLVersion version) {
+ switch (version) {
+ case OpenCLVersion::CL_1_0:
+ return "1.0";
+ case OpenCLVersion::CL_1_1:
+ return "1.1";
+ case OpenCLVersion::CL_1_2:
+ return "1.2";
+ case OpenCLVersion::CL_2_0:
+ return "2.0";
+ case OpenCLVersion::CL_2_1:
+ return "2.1";
+ case OpenCLVersion::CL_2_2:
+ return "2.2";
+ case OpenCLVersion::CL_3_0:
+ return "3.0";
+ }
+}
+
+AdrenoInfo::AdrenoInfo(const std::string& device_version)
+ : gpu_version(GetAdrenoGPUVersion(device_version)) {}
+
+int AdrenoInfo::GetMaximumWavesCount() const {
+ if (gpu_version < 400) {
+ return -1; // Adreno 3xx does not support it currently
+ } else if (gpu_version >= 400 && gpu_version < 500) {
+ return -1; // Adreno 4xx does not support it currently
+ } else if (gpu_version >= 500 && gpu_version < 600) {
+ return -1; // Adreno 5xx does not support it currently
+ } else if (gpu_version >= 600 && gpu_version < 700) {
+ return gpu_version == 640 ? 30 : 16;
+ } else {
+ return -1; // Adreno 7xx and higher does not exist yet
+ }
+}
+
+int AdrenoInfo::GetRegisterMemorySizePerComputeUnit() const {
+ if (gpu_version < 400) {
+ return -1; // Adreno 3xx does not support it currently
+ } else if (gpu_version >= 400 && gpu_version < 500) {
+ return -1; // Adreno 4xx does not support it currently
+ } else if (gpu_version >= 500 && gpu_version < 600) {
+ return -1; // Adreno 5xx does not support it currently
+ } else if (gpu_version >= 600 && gpu_version < 700) {
+ return gpu_version == 640 ? 128 * 144 * 16 : 128 * 96 * 16;
+ } else {
+ return -1; // Adreno 7xx and higher does not exist yet
+ }
+}
+
+int AdrenoInfo::GetMaximumWavesCount(int register_footprint_per_tread,
+ bool full_wave) const {
+ const int register_usage_per_wave =
+ GetWaveSize(full_wave) * register_footprint_per_tread;
+ const int possible_waves_count =
+ GetRegisterMemorySizePerComputeUnit() / register_usage_per_wave;
+ return std::min(possible_waves_count, GetMaximumWavesCount());
+}
+
+int AdrenoInfo::GetWaveSize(bool full_wave) const {
+ if (gpu_version < 400) {
+ return -1; // Adreno 3xx does not support it currently
+ } else if (gpu_version < 600) {
+ return full_wave ? 64 : 32;
+ } else {
+ return full_wave ? 128 : 64;
+ }
+}
+
+MaliInfo::MaliInfo(const std::string& device_name)
+ : gpu_version(GetMaliGPUVersion(device_name)) {}
+
+bool MaliInfo::IsMaliT6xx() const {
+ return gpu_version == MaliGPU::T604 || gpu_version == MaliGPU::T622 ||
+ gpu_version == MaliGPU::T624 || gpu_version == MaliGPU::T628 ||
+ gpu_version == MaliGPU::T658 || gpu_version == MaliGPU::T678;
+}
+
+bool MaliInfo::IsMaliT7xx() const {
+ return gpu_version == MaliGPU::T720 || gpu_version == MaliGPU::T760;
+}
+
+bool MaliInfo::IsMaliT8xx() const {
+ return gpu_version == MaliGPU::T820 || gpu_version == MaliGPU::T830 ||
+ gpu_version == MaliGPU::T860 || gpu_version == MaliGPU::T880;
+}
+
+bool MaliInfo::IsMidgard() const {
+ return IsMaliT6xx() || IsMaliT7xx() || IsMaliT8xx();
+}
+
+bool MaliInfo::IsBifrostGen1() const {
+ return gpu_version == MaliGPU::G31 || gpu_version == MaliGPU::G51 ||
+ gpu_version == MaliGPU::G71;
+}
+
+bool MaliInfo::IsBifrostGen2() const {
+ return gpu_version == MaliGPU::G52 || gpu_version == MaliGPU::G72;
+}
+
+bool MaliInfo::IsBifrostGen3() const { return gpu_version == MaliGPU::G76; }
+
+bool MaliInfo::IsBifrost() const {
+ return IsBifrostGen1() || IsBifrostGen2() || IsBifrostGen3();
+}
+
+bool MaliInfo::IsValhall() const {
+ return gpu_version == MaliGPU::G57 || gpu_version == MaliGPU::G77;
+}
+
+bool DeviceInfo::SupportsTextureArray() const {
+ return cl_version >= OpenCLVersion::CL_1_2;
+}
+
+bool DeviceInfo::SupportsImageBuffer() const {
+ return cl_version >= OpenCLVersion::CL_1_2;
+}
+
+bool DeviceInfo::SupportsImage3D() const {
+ if (vendor == Vendor::kMali) {
+ // On Mali T880 read_imageh doesn't compile with image3d_t
+ return false;
+ }
+ return supports_image3d_writes;
+}
+
+bool DeviceInfo::IsAdreno() const { return vendor == Vendor::kQualcomm; }
+
+bool DeviceInfo::IsAdreno3xx() const {
+ return IsAdreno() && IsGPUVersionInRange(adreno_info.gpu_version, 300, 400);
+}
+
+bool DeviceInfo::IsAdreno4xx() const {
+ return IsAdreno() && IsGPUVersionInRange(adreno_info.gpu_version, 400, 500);
+}
+
+bool DeviceInfo::IsAdreno5xx() const {
+ return IsAdreno() && IsGPUVersionInRange(adreno_info.gpu_version, 500, 600);
+}
+
+bool DeviceInfo::IsAdreno6xx() const {
+ return IsAdreno() && IsGPUVersionInRange(adreno_info.gpu_version, 600, 700);
+}
+
+bool DeviceInfo::IsAdreno6xxOrHigher() const {
+ return IsAdreno() && adreno_info.gpu_version >= 600;
+}
+
+bool DeviceInfo::IsPowerVR() const { return vendor == Vendor::kPowerVR; }
+
+bool DeviceInfo::IsNvidia() const { return vendor == Vendor::kNvidia; }
+
+bool DeviceInfo::IsMali() const { return vendor == Vendor::kMali; }
+
+bool DeviceInfo::IsAMD() const { return vendor == Vendor::kAMD; }
+
+bool DeviceInfo::IsIntel() const { return vendor == Vendor::kIntel; }
+
+} // namespace cl
+} // namespace gpu
+} // namespace tflite
diff --git a/tensorflow/lite/delegates/gpu/cl/device_info.h b/tensorflow/lite/delegates/gpu/cl/device_info.h
new file mode 100644
index 0000000..b13fe3d
--- /dev/null
+++ b/tensorflow/lite/delegates/gpu/cl/device_info.h
@@ -0,0 +1,168 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_DEVICE_INFO_H_
+#define TENSORFLOW_LITE_DELEGATES_GPU_CL_DEVICE_INFO_H_
+
+#include <string>
+#include <vector>
+
+// for use only in device_info.cc, but keep here to make tests
+int GetAdrenoGPUVersion(const std::string& gpu_version);
+
+namespace tflite {
+namespace gpu {
+namespace cl {
+
+enum class Vendor {
+ kQualcomm,
+ kMali,
+ kPowerVR,
+ kNvidia,
+ kAMD,
+ kIntel,
+ kUnknown
+};
+std::string VendorToString(Vendor v);
+
+enum class OpenCLVersion {
+ CL_1_0,
+ CL_1_1,
+ CL_1_2,
+ CL_2_0,
+ CL_2_1,
+ CL_2_2,
+ CL_3_0
+};
+std::string OpenCLVersionToString(OpenCLVersion version);
+
+struct AdrenoInfo {
+ AdrenoInfo() = default;
+ explicit AdrenoInfo(const std::string& device_version);
+ int gpu_version = -1; // can be, for example, 405/430/540/530/630 etc.
+
+ // This function returns some not very documented physical parameter of
+ // Adreno6xx GPU.
+ // We obtained it using Snapdragon Profiler.
+ int GetMaximumWavesCount() const;
+
+ // returns amount of register memory per CU(Compute Unit) in bytes.
+ int GetRegisterMemorySizePerComputeUnit() const;
+
+ // returns maximum possible amount of waves based on register usage.
+ int GetMaximumWavesCount(int register_footprint_per_tread,
+ bool full_wave = true) const;
+
+ int GetWaveSize(bool full_wave) const;
+
+ // Not supported on some Adreno devices with specific driver version.
+ // b/131099086
+ bool support_one_layer_texture_array = true;
+};
+
+enum class MaliGPU {
+ T604,
+ T622,
+ T624,
+ T628,
+ T658,
+ T678,
+ T720,
+ T760,
+ T820,
+ T830,
+ T860,
+ T880,
+ G31,
+ G51,
+ G71,
+ G52,
+ G72,
+ G76,
+ G57,
+ G77,
+ UNKNOWN
+};
+
+struct MaliInfo {
+ MaliInfo() = default;
+ explicit MaliInfo(const std::string& device_name);
+ MaliGPU gpu_version;
+
+ bool IsMaliT6xx() const;
+ bool IsMaliT7xx() const;
+ bool IsMaliT8xx() const;
+ bool IsMidgard() const;
+ bool IsBifrostGen1() const;
+ bool IsBifrostGen2() const;
+ bool IsBifrostGen3() const;
+ bool IsBifrost() const;
+ bool IsValhall() const;
+};
+
+struct DeviceInfo {
+ DeviceInfo() = default;
+
+ bool IsAdreno() const;
+ bool IsAdreno3xx() const;
+ bool IsAdreno4xx() const;
+ bool IsAdreno5xx() const;
+ bool IsAdreno6xx() const;
+ bool IsAdreno6xxOrHigher() const;
+ bool IsPowerVR() const;
+ bool IsNvidia() const;
+ bool IsMali() const;
+ bool IsAMD() const;
+ bool IsIntel() const;
+
+ bool SupportsTextureArray() const;
+ bool SupportsImageBuffer() const;
+ bool SupportsImage3D() const;
+
+ std::vector<std::string> extensions;
+ bool supports_fp16;
+ bool supports_image3d_writes;
+ Vendor vendor;
+ OpenCLVersion cl_version;
+ int compute_units_count;
+ uint64_t buffer_max_size;
+ uint64_t image2d_max_width;
+ uint64_t image2d_max_height;
+ uint64_t image_buffer_max_size;
+ uint64_t image_array_max_layers;
+ uint64_t image3d_max_width;
+ uint64_t image3d_max_height;
+ uint64_t image3d_max_depth;
+ int max_work_group_size_x;
+ int max_work_group_size_y;
+ int max_work_group_size_z;
+
+ // rtn is ROUND_TO_NEAREST
+ // with rtn precision is much better then with rtz (ROUND_TO_ZERO)
+ // Adreno 3xx supports only rtz, Adreno 4xx and more support rtn
+ // Mali from T6xx supports rtn
+ // PowerVR supports only rtz
+ bool supports_fp32_rtn;
+ bool supports_fp16_rtn;
+
+ AdrenoInfo adreno_info;
+ MaliInfo mali_info;
+};
+
+} // namespace cl
+} // namespace gpu
+} // namespace tflite
+
+#endif // TENSORFLOW_LITE_DELEGATES_GPU_CL_DEVICE_INFO_H_
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc
index b93fe11..ed1ec8b 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc
@@ -37,7 +37,7 @@
}
int GetOptimalMaxConstantSize(const DeviceInfo& info) {
- if (info.vendor != Vendor::QUALCOMM) {
+ if (!info.IsAdreno()) {
// In general we do not expect that this kernel will be used with non Adreno
// so as it tuned for __constant memory that have big profit on Adreno
return 1024; // 1KB
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.cc b/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.cc
index 5e280d5..3771a5b 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.cc
@@ -80,9 +80,12 @@
const int3& grid,
int3* best_work_group) {
std::vector<int3> work_groups;
+ int3 max_wg_size;
+ max_wg_size.x = params.info->max_work_group_size_x;
+ max_wg_size.y = params.info->max_work_group_size_y;
+ max_wg_size.z = params.info->max_work_group_size_z;
RETURN_IF_ERROR(GenerateWorkGroupSizesAlignedToGrid(
- grid, params.info->max_work_group_sizes, kernel.GetMaxWorkGroupSize(),
- &work_groups));
+ grid, max_wg_size, kernel.GetMaxWorkGroupSize(), &work_groups));
int best_work_group_index;
RETURN_IF_ERROR(params.queue->GetBestWorkGroupIndex(
kernel, *params.info, grid, work_groups, &best_work_group_index));
@@ -268,10 +271,10 @@
switch (params.tuning_type) {
case TuningType::FAST: {
int max_z_size = 16;
- if (params.info->vendor == Vendor::QUALCOMM) {
+ if (params.info->IsAdreno()) {
max_z_size = params.info->adreno_info.gpu_version < 400 ? 16 : 64;
}
- max_z_size = std::min(max_z_size, params.info->max_work_group_sizes.z);
+ max_z_size = std::min(max_z_size, params.info->max_work_group_size_z);
*best_work_group =
GetWorkGroupConv(grid, kernel.GetMaxWorkGroupSize(), max_z_size);
return absl::OkStatus();
diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc
index 3e2531c..b577757 100644
--- a/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc
+++ b/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc
@@ -167,22 +167,21 @@
const CreationContext& creation_context,
const OperationDef& op_def, ModelHints hints,
std::unique_ptr<GPUOperation>* ptr) {
- switch (creation_context.device->vendor()) {
- case Vendor::QUALCOMM:
- return SelectConvolutionAdreno(attr, dst_shape, creation_context, op_def,
+ const auto& device_info = creation_context.device->GetInfo();
+ if (device_info.IsAdreno()) {
+ return SelectConvolutionAdreno(attr, dst_shape, creation_context, op_def,
hints, ptr);
- case Vendor::POWERVR:
- case Vendor::INTEL:
- case Vendor::AMD:
- return SelectConvolutionPowerVR(attr, creation_context, op_def, ptr);
- case Vendor::NVIDIA:
- return SelectConvolutionNVidia(attr, dst_shape, creation_context, op_def,
+ } else if (device_info.IsPowerVR() || device_info.IsAMD() ||
+ device_info.IsIntel()) {
+ return SelectConvolutionPowerVR(attr, creation_context, op_def, ptr);
+ } else if (device_info.IsNvidia()) {
+ return SelectConvolutionNVidia(attr, dst_shape, creation_context, op_def,
ptr);
- case Vendor::MALI:
- return SelectConvolutionMali(attr, dst_shape, creation_context, op_def,
+ } else if (device_info.IsMali()) {
+ return SelectConvolutionMali(attr, dst_shape, creation_context, op_def,
ptr);
- default:
- return SelectConvolutionAdreno(attr, dst_shape, creation_context, op_def,
+ } else {
+ return SelectConvolutionAdreno(attr, dst_shape, creation_context, op_def,
hints, ptr);
}
}
@@ -191,25 +190,22 @@
const Convolution2DAttributes& attr, const BHWC& dst_shape,
const CreationContext& creation_context, const OperationDef& op_def,
ModelHints hints, std::unique_ptr<GPUOperation>* ptr) {
- switch (creation_context.device->vendor()) {
- case Vendor::QUALCOMM:
- return SelectConvolutionWinogradAdreno(attr, dst_shape, creation_context,
+ const auto& device_info = creation_context.device->GetInfo();
+ if (device_info.IsAdreno()) {
+ return SelectConvolutionWinogradAdreno(attr, dst_shape, creation_context,
op_def, hints, ptr);
- case Vendor::POWERVR:
- case Vendor::AMD:
- case Vendor::INTEL:
- case Vendor::NVIDIA: {
- ConvPowerVR conv;
+ } else if (device_info.IsPowerVR() || device_info.IsAMD() ||
+ device_info.IsNvidia() || device_info.IsIntel()) {
+ ConvPowerVR conv;
RETURN_IF_ERROR(CreateConvPowerVRWino4x4To6x6(creation_context, op_def,
attr, &conv, &dst_shape));
*ptr = absl::make_unique<ConvPowerVR>(std::move(conv));
return absl::OkStatus();
- }
- case Vendor::MALI:
- return SelectConvolutionWinogradMali(attr, dst_shape, creation_context,
+ } else if (device_info.IsMali()) {
+ return SelectConvolutionWinogradMali(attr, dst_shape, creation_context,
op_def, ptr);
- default:
- return SelectConvolutionWinogradAdreno(attr, dst_shape, creation_context,
+ } else {
+ return SelectConvolutionWinogradAdreno(attr, dst_shape, creation_context,
op_def, hints, ptr);
}
}
@@ -219,23 +215,22 @@
const BHWC& dst_shape, const CreationContext& creation_context,
const OperationDef& op_def, ModelHints hints,
std::unique_ptr<GPUOperation>* ptr, ConvWeightsDescription* weights_desc) {
- switch (creation_context.device->vendor()) {
- case Vendor::QUALCOMM:
- return SelectConvolutionDynamicWeightsAdreno(
- attr, weights_shape, dst_shape, creation_context, op_def, hints, ptr,
- weights_desc);
- case Vendor::MALI:
- return SelectConvolutionDynamicWeightsMali(attr, weights_shape, dst_shape,
+ const auto& device_info = creation_context.device->GetInfo();
+ if (device_info.IsAdreno()) {
+ return SelectConvolutionDynamicWeightsAdreno(attr, weights_shape, dst_shape,
creation_context, op_def,
hints, ptr, weights_desc);
- default: {
- ConvPowerVR conv;
- RETURN_IF_ERROR(CreateConvPowerVRDynamicWeights(
- creation_context, op_def, attr, weights_shape, &conv, &dst_shape));
- *weights_desc = conv.GetConvWeightsDescription();
- *ptr = absl::make_unique<ConvPowerVR>(std::move(conv));
- return absl::OkStatus();
- }
+ } else if (device_info.IsMali()) {
+ return SelectConvolutionDynamicWeightsMali(attr, weights_shape, dst_shape,
+ creation_context, op_def, hints,
+ ptr, weights_desc);
+ } else {
+ ConvPowerVR conv;
+ RETURN_IF_ERROR(CreateConvPowerVRDynamicWeights(
+ creation_context, op_def, attr, weights_shape, &conv, &dst_shape));
+ *weights_desc = conv.GetConvWeightsDescription();
+ *ptr = absl::make_unique<ConvPowerVR>(std::move(conv));
+ return absl::OkStatus();
}
}
diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/convolution_transposed_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/convolution_transposed_selector.cc
index 5fdfdca..56864f2 100644
--- a/tensorflow/lite/delegates/gpu/cl/selectors/convolution_transposed_selector.cc
+++ b/tensorflow/lite/delegates/gpu/cl/selectors/convolution_transposed_selector.cc
@@ -105,22 +105,19 @@
const ConvolutionTransposedAttributes& attr,
const CreationContext& creation_context, const OperationDef& op_def,
std::unique_ptr<GPUOperation>* ptr) {
- switch (creation_context.device->vendor()) {
- case Vendor::QUALCOMM:
- return SelectConvolutionTransposedAdreno(attr, creation_context, op_def,
- ptr);
- case Vendor::POWERVR:
- case Vendor::NVIDIA:
- case Vendor::AMD:
- case Vendor::INTEL:
- return SelectConvolutionTransposedPowerVR(attr, creation_context, op_def,
- ptr);
- case Vendor::MALI:
- return SelectConvolutionTransposedMali(attr, creation_context, op_def,
+ const auto& device_info = creation_context.device->GetInfo();
+ if (device_info.IsAdreno()) {
+ return SelectConvolutionTransposedAdreno(attr, creation_context, op_def,
ptr);
- default:
- return SelectConvolutionTransposedAdreno(attr, creation_context, op_def,
- ptr);
+ } else if (device_info.IsPowerVR() || device_info.IsAMD() ||
+ device_info.IsNvidia() || device_info.IsIntel()) {
+ return SelectConvolutionTransposedPowerVR(attr, creation_context, op_def,
+ ptr);
+ } else if (device_info.IsMali()) {
+ return SelectConvolutionTransposedMali(attr, creation_context, op_def, ptr);
+ } else {
+ return SelectConvolutionTransposedAdreno(attr, creation_context, op_def,
+ ptr);
}
}
diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/dw_convolution_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/dw_convolution_selector.cc
index 54ff45d..fafd907 100644
--- a/tensorflow/lite/delegates/gpu/cl/selectors/dw_convolution_selector.cc
+++ b/tensorflow/lite/delegates/gpu/cl/selectors/dw_convolution_selector.cc
@@ -90,15 +90,15 @@
const CreationContext& creation_context,
const OperationDef& op_def,
std::unique_ptr<GPUOperation>* ptr) {
- switch (creation_context.device->vendor()) {
- case Vendor::QUALCOMM:
- return SelectDWConvolutionAdreno(attr, creation_context, op_def, ptr);
- case Vendor::POWERVR:
- return SelectDWConvolutionPowerVR(attr, creation_context, op_def, ptr);
- case Vendor::MALI:
- return SelectDWConvolutionMali(attr, creation_context, op_def, ptr);
- default:
- return SelectDWConvolutionAdreno(attr, creation_context, op_def, ptr);
+ const auto& device_info = creation_context.device->GetInfo();
+ if (device_info.IsAdreno()) {
+ return SelectDWConvolutionAdreno(attr, creation_context, op_def, ptr);
+ } else if (device_info.IsPowerVR()) {
+ return SelectDWConvolutionPowerVR(attr, creation_context, op_def, ptr);
+ } else if (device_info.IsMali()) {
+ return SelectDWConvolutionMali(attr, creation_context, op_def, ptr);
+ } else {
+ return SelectDWConvolutionAdreno(attr, creation_context, op_def, ptr);
}
}
diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/fully_connected_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/fully_connected_selector.cc
index eacbea8..cb967e4 100644
--- a/tensorflow/lite/delegates/gpu/cl/selectors/fully_connected_selector.cc
+++ b/tensorflow/lite/delegates/gpu/cl/selectors/fully_connected_selector.cc
@@ -104,22 +104,20 @@
const CreationContext& creation_context,
const OperationDef& op_def, int batch_size,
std::unique_ptr<GPUOperation>* ptr) {
- switch (creation_context.device->vendor()) {
- case Vendor::QUALCOMM:
- return SelectFullyConnectedAdreno(attr, creation_context, op_def,
- batch_size, ptr);
- case Vendor::POWERVR:
- case Vendor::AMD:
- case Vendor::NVIDIA:
- case Vendor::INTEL:
- return SelectFullyConnectedPowerVR(attr, creation_context, op_def,
- batch_size, ptr);
- case Vendor::MALI:
- return SelectFullyConnectedMali(attr, creation_context, op_def,
+ const auto& device_info = creation_context.device->GetInfo();
+ if (device_info.IsAdreno()) {
+ return SelectFullyConnectedAdreno(attr, creation_context, op_def,
batch_size, ptr);
- default:
- return SelectFullyConnectedGeneric(attr, creation_context, op_def,
- batch_size, ptr);
+ } else if (device_info.IsPowerVR() || device_info.IsAMD() ||
+ device_info.IsNvidia() || device_info.IsIntel()) {
+ return SelectFullyConnectedPowerVR(attr, creation_context, op_def,
+ batch_size, ptr);
+ } else if (device_info.IsMali()) {
+ return SelectFullyConnectedMali(attr, creation_context, op_def, batch_size,
+ ptr);
+ } else {
+ return SelectFullyConnectedGeneric(attr, creation_context, op_def,
+ batch_size, ptr);
}
}