blob: 6c0c549adcfa33a58c3490e1cfecfb99575d35fe [file] [log] [blame]
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/delegates/gpu/cl/kernels/util.h"
#include <cmath>
#include <vector>
#include "absl/strings/str_cat.h"
#include "absl/strings/substitute.h"
#include "tensorflow/lite/delegates/gpu/common/data_type.h"
namespace tflite {
namespace gpu {
namespace cl {
namespace {
std::string GetReadImageFromDataType(DataType data_type) {
if (data_type == DataType::FLOAT32) {
return "read_imagef";
} else if (data_type == DataType::FLOAT16) {
return "read_imageh";
} else {
return "error";
}
}
std::string GetWriteImageFromDataType(DataType data_type) {
if (data_type == DataType::FLOAT32) {
return "write_imagef";
} else if (data_type == DataType::FLOAT16) {
return "write_imageh";
} else {
return "error";
}
}
std::string GetImageModifier(AccessType access) {
switch (access) {
case AccessType::READ:
return "__read_only";
case AccessType::WRITE:
return "__write_only";
case AccessType::READ_WRITE:
return "__read_write";
}
}
std::string TextureAddressModeToString(TextureAddressMode address_mode) {
switch (address_mode) {
case TextureAddressMode::DONT_CARE:
return "smp_none";
case TextureAddressMode::ZERO:
return "smp_zero";
}
}
} // namespace
std::string GetCommonDefines(CalculationsPrecision precision) {
std::string result;
switch (precision) {
case CalculationsPrecision::F32:
result += "#define ACCUM_FLT4 float4\n";
result += "#define FLT float\n";
result += "#define FLT2 float2\n";
result += "#define FLT3 float3\n";
result += "#define FLT4 float4\n";
result += "#define TO_FLT4 convert_float4\n";
result += "#define TO_ACCUM_TYPE convert_float4\n";
result += "#define TO_ACCUM_FLT convert_float\n";
result += "#define READ_IMAGE read_imagef\n";
result += "#define WRITE_IMAGE write_imagef\n";
break;
case CalculationsPrecision::F16:
result += "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
result += "#define ACCUM_FLT4 half4\n";
result += "#define FLT half\n";
result += "#define FLT2 half2\n";
result += "#define FLT3 half3\n";
result += "#define FLT4 half4\n";
result += "#define TO_FLT4 convert_half4\n";
result += "#define TO_ACCUM_TYPE convert_half4\n";
result += "#define TO_ACCUM_FLT convert_half\n";
result += "#define READ_IMAGE read_imageh\n";
result += "#define WRITE_IMAGE write_imageh\n";
break;
case CalculationsPrecision::F32_F16:
result += "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
result += "#define ACCUM_FLT4 float4\n";
result += "#define FLT half\n";
result += "#define FLT2 half2\n";
result += "#define FLT3 half3\n";
result += "#define FLT4 half4\n";
result += "#define TO_FLT4 convert_half4\n";
result += "#define TO_ACCUM_TYPE convert_float4\n";
result += "#define TO_ACCUM_FLT convert_float\n";
result += "#define READ_IMAGE read_imageh\n";
result += "#define WRITE_IMAGE write_imageh\n";
break;
}
result +=
"const sampler_t smp_edge = CLK_NORMALIZED_COORDS_FALSE | "
"CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;\n";
result +=
"const sampler_t smp_none = CLK_NORMALIZED_COORDS_FALSE | "
"CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;\n";
result +=
"const sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | "
"CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;\n";
return result;
}
TensorCodeGenerator::TensorCodeGenerator(const std::string& name,
const std::string& uniform_size_name,
const TensorDescriptor& descriptor)
: tensor_name_(name),
uniform_size_name_(uniform_size_name),
descriptor_(descriptor) {}
std::string TensorCodeGenerator::GetDeclaration(AccessType access_type) const {
switch (descriptor_.storage_type) {
case TensorStorageType::BUFFER:
return absl::StrCat("__global ", ToCLDataType(descriptor_.data_type, 4),
"* ", tensor_name_);
case TensorStorageType::TEXTURE_2D:
case TensorStorageType::SINGLE_TEXTURE_2D:
return GetImageModifier(access_type) + " image2d_t " + tensor_name_;
case TensorStorageType::TEXTURE_ARRAY:
return GetImageModifier(access_type) + " image2d_array_t " + tensor_name_;
case TensorStorageType::IMAGE_BUFFER:
if (access_type == AccessType::WRITE) {
return absl::StrCat("__global ", ToCLDataType(descriptor_.data_type, 4),
"* ", tensor_name_);
} else {
return GetImageModifier(access_type) + " image1d_buffer_t " +
tensor_name_;
}
case TensorStorageType::UNKNOWN:
return "error";
}
}
std::string TensorCodeGenerator::Read3D(const std::string& x,
const std::string& y,
const std::string& z,
TextureAddressMode address_mode) const {
return Read(GetGlobalAddressNoDeclaration(x, y, z), address_mode);
}
std::string TensorCodeGenerator::Read4D(const std::string& x,
const std::string& y,
const std::string& z,
const std::string& b,
TextureAddressMode address_mode) const {
return Read(GetGlobalAddressNoDeclaration(x, y, z, b), address_mode);
}
std::string TensorCodeGenerator::ReadAsFloat3D(
const std::string& x, const std::string& y, const std::string& z,
TextureAddressMode address_mode) const {
return ReadAsFloat(GetGlobalAddressNoDeclaration(x, y, z), address_mode);
}
std::string TensorCodeGenerator::ReadAsFloat4D(
const std::string& x, const std::string& y, const std::string& z,
const std::string& b, TextureAddressMode address_mode) const {
return ReadAsFloat(GetGlobalAddressNoDeclaration(x, y, z, b), address_mode);
}
std::string TensorCodeGenerator::GetAddress(const std::string& var_name,
const std::string& x,
const std::string& y,
const std::string& z) const {
return DeclareAddress(var_name, GetGlobalAddressNoDeclaration(x, y, z));
}
std::string TensorCodeGenerator::GetAddress(const std::string& var_name,
const std::string& x,
const std::string& y,
const std::string& z,
const std::string& b) const {
return DeclareAddress(var_name, GetGlobalAddressNoDeclaration(x, y, z, b));
}
std::string TensorCodeGenerator::GetGlobalAddressNoDeclaration(
const std::string& x, const std::string& y, const std::string& z) const {
switch (descriptor_.storage_type) {
case TensorStorageType::BUFFER:
case TensorStorageType::IMAGE_BUFFER:
return absl::Substitute("((($2) * $3.y + ($1)) * $3.x + ($0))", x, y, z,
uniform_size_name_);
case TensorStorageType::TEXTURE_2D:
return absl::Substitute("(int2)(($0), ($1) * $3.w + ($2))", x, y, z,
uniform_size_name_);
case TensorStorageType::SINGLE_TEXTURE_2D:
return absl::StrCat("(int2)(", x, ", ", y, ")");
case TensorStorageType::TEXTURE_ARRAY:
return absl::StrCat("(int4)(", x, ", ", y, ", ", z, ", 0)");
case TensorStorageType::UNKNOWN:
return "error";
}
}
std::string TensorCodeGenerator::GetGlobalAddressNoDeclaration(
const std::string& x, const std::string& y, const std::string& z,
const std::string& b) const {
if (b.empty()) {
return GetGlobalAddressNoDeclaration(x, y, z);
}
switch (descriptor_.storage_type) {
case TensorStorageType::BUFFER:
case TensorStorageType::IMAGE_BUFFER:
return absl::Substitute(
"(((($3) * $4.w + $2) * $4.y + ($1)) * $4.x + ($0))", x, y, z, b,
uniform_size_name_);
default:
return "error";
}
}
std::string TensorCodeGenerator::DeclareAddress(
const std::string& var_name, const std::string& address) const {
switch (descriptor_.storage_type) {
case TensorStorageType::BUFFER:
case TensorStorageType::IMAGE_BUFFER:
return absl::StrCat("int ", var_name, " = ", address, ";\n");
case TensorStorageType::TEXTURE_2D:
case TensorStorageType::SINGLE_TEXTURE_2D:
return absl::StrCat("int2 ", var_name, " = ", address, ";\n");
case TensorStorageType::TEXTURE_ARRAY:
return absl::StrCat("int4 ", var_name, " = ", address, ";\n");
case TensorStorageType::UNKNOWN:
return "";
}
}
std::string TensorCodeGenerator::Write3D(const std::string& var_name,
const std::string& x,
const std::string& y,
const std::string& z) const {
return Write(var_name, GetGlobalAddressNoDeclaration(x, y, z));
}
std::string TensorCodeGenerator::Write4D(const std::string& var_name,
const std::string& x,
const std::string& y,
const std::string& z,
const std::string& b) const {
return Write(var_name, GetGlobalAddressNoDeclaration(x, y, z, b));
}
std::string TensorCodeGenerator::Read(const std::string& global_address,
TextureAddressMode address_mode) const {
switch (descriptor_.storage_type) {
case TensorStorageType::BUFFER:
return absl::StrCat(tensor_name_, "[", global_address, "]");
case TensorStorageType::TEXTURE_2D:
case TensorStorageType::SINGLE_TEXTURE_2D:
case TensorStorageType::TEXTURE_ARRAY:
return absl::StrCat(
GetReadImageFromDataType(descriptor_.data_type), "(", tensor_name_,
", " + TextureAddressModeToString(address_mode) + ", ",
global_address, ")");
case TensorStorageType::IMAGE_BUFFER:
return absl::StrCat(GetReadImageFromDataType(descriptor_.data_type), "(",
tensor_name_, ", ", global_address, ")");
case TensorStorageType::UNKNOWN:
return "";
}
}
std::string TensorCodeGenerator::ReadAsFloat(
const std::string& global_address, TextureAddressMode address_mode) const {
switch (descriptor_.storage_type) {
case TensorStorageType::BUFFER:
return absl::StrCat("convert_float4(", tensor_name_, "[", global_address,
"])");
case TensorStorageType::TEXTURE_2D:
case TensorStorageType::SINGLE_TEXTURE_2D:
case TensorStorageType::TEXTURE_ARRAY:
return absl::StrCat(
"read_imagef(", tensor_name_,
", " + TextureAddressModeToString(address_mode) + ", ",
global_address, ")");
case TensorStorageType::IMAGE_BUFFER:
return absl::StrCat("read_imagef(", tensor_name_, ", ", global_address,
")");
case TensorStorageType::UNKNOWN:
return "";
}
}
std::string TensorCodeGenerator::Write(
const std::string& var_name, const std::string& global_address) const {
switch (descriptor_.storage_type) {
case TensorStorageType::BUFFER:
case TensorStorageType::IMAGE_BUFFER:
return absl::StrCat(tensor_name_, "[", global_address, "] = ", var_name,
";\n");
case TensorStorageType::TEXTURE_2D:
case TensorStorageType::SINGLE_TEXTURE_2D:
case TensorStorageType::TEXTURE_ARRAY:
return absl::StrCat(GetWriteImageFromDataType(descriptor_.data_type), "(",
tensor_name_, ", ", global_address, ", ", var_name,
");\n");
case TensorStorageType::UNKNOWN:
return "";
}
}
TextureAddressMode GetFastestZeroMode(const CLDevice& device) {
return device.IsAdreno3xx() ? TextureAddressMode::DONT_CARE
: TextureAddressMode::ZERO;
}
float4 GetMaskForLastPlane(int channels) {
float4 mask = float4(0.0f);
const int reminder = channels % 4 == 0 ? 4 : channels % 4;
for (int i = 0; i < reminder; ++i) {
mask[i] = 1.0f;
}
return mask;
}
} // namespace cl
} // namespace gpu
} // namespace tflite