tensorflow/lite/delegates/gpu/cl/environment.cc - platform/external/tensorflow - Git at Google

 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/

 #include "tensorflow/lite/delegates/gpu/cl/environment.h"

 #include <string>
 #include <vector>

 #include "tensorflow/lite/delegates/gpu/cl/cl_kernel.h"
 #include "tensorflow/lite/delegates/gpu/cl/util.h"
 #include "tensorflow/lite/delegates/gpu/common/shape.h"

 namespace tflite {
 namespace gpu {
 namespace cl {
 namespace {
 std::string GetKernelOneLayerTextureArray() {
   return R"(

 __kernel void main_function(__write_only image2d_array_t dst) {
   int X = (int)(get_global_id(0));
   int Y = (int)(get_global_id(1));

   write_imagef(dst, (int4)(X, Y, 0, 0), (float4)(2.0, 2.0, 2.0, 2.0));
 }
 )";
 }

 // Some Adreno < 600 have bug with one layer texture array. b/131099086
 // If we have one layer texture array and will write smt from kernel to this
 // texture, we will get zeroes instead of actual values.
 // The same kernel will work, if we use texture array with more than one layer.
 // With help of this code we can detect this bug.
 Status CheckKernelSupportOfOneLayerTextureArray(Environment* env,
                                                 bool* result) {
   // No bug on Adreno 6xx
   if (env->device().GetInfo().adreno_info.gpu_version >= 600) {
     *result = true;
     return OkStatus();
   }
   CLKernel kernel;
   RETURN_IF_ERROR(env->program_cache()->GetOrCreateCLKernel(
       GetKernelOneLayerTextureArray(), "main_function", env->context(),
       env->device(), &kernel));

   Tensor tensor;
   const BHWC shape(1, 4, 4, 4);
   RETURN_IF_ERROR(CreateTensor(
       env->context(), env->device(), shape,
       {DataType::FLOAT32, TensorStorageType::TEXTURE_ARRAY, Layout::HWC},
       &tensor));
   RETURN_IF_ERROR(kernel.SetMemory(0, tensor.GetMemoryPtr()));
   RETURN_IF_ERROR(env->queue()->DispatchImplicit(kernel, {4, 4, 1}, {4, 4, 1}));
   TensorFloat32 tensor_gpu;
   tensor_gpu.shape = shape;
   tensor_gpu.data.resize(shape.DimensionsProduct());
   RETURN_IF_ERROR(tensor.ReadData(env->queue(), &tensor_gpu));

   *result = true;
   for (int i = 0; i < 64; ++i) {
     if (tensor_gpu.data[i] != 2.0) {
       *result = false;
       break;
     }
   }
   return OkStatus();
 }

 Status CreateEnvironment(Environment* result, bool shared,
                          cl_context_properties egl_context,
                          cl_context_properties egl_display) {
   CLDevice gpu;
   RETURN_IF_ERROR(CreateDefaultGPUDevice(&gpu));

   CLContext context;
   if (shared) {
     RETURN_IF_ERROR(CreateCLGLContext(gpu, egl_context, egl_display, &context));
   } else {
     RETURN_IF_ERROR(CreateCLContext(gpu, &context));
   }
   CLCommandQueue queue;
   RETURN_IF_ERROR(CreateCLCommandQueue(gpu, context, &queue));
   ProfilingCommandQueue profiling_queue;
   RETURN_IF_ERROR(CreateProfilingCommandQueue(gpu, context, &profiling_queue));

   *result = Environment(std::move(gpu), std::move(context), std::move(queue),
                         std::move(profiling_queue));

   if (result->device().IsAdreno() && result->device().SupportsTextureArray()) {
     bool supports_one_layer;
     RETURN_IF_ERROR(
         CheckKernelSupportOfOneLayerTextureArray(result, &supports_one_layer));
     if (!supports_one_layer) {
       result->GetDevicePtr()->DisableOneLayerTextureArray();
     }
   }

   return OkStatus();
 }
 }  // namespace

 Environment::Environment(CLDevice&& device, CLContext&& context,
                          CLCommandQueue&& queue,
                          ProfilingCommandQueue&& profiling_queue)
     : device_(std::move(device)),
       context_(std::move(context)),
       queue_(std::move(queue)),
       profiling_queue_(std::move(profiling_queue)) {}

 Environment::Environment(Environment&& environment)
     : device_(std::move(environment.device_)),
       context_(std::move(environment.context_)),
       queue_(std::move(environment.queue_)),
       profiling_queue_(std::move(environment.profiling_queue_)),
       program_cache_(std::move(environment.program_cache_)) {}

 Environment& Environment::operator=(Environment&& environment) {
   if (this != &environment) {
     device_ = std::move(environment.device_);
     context_ = std::move(environment.context_);
     queue_ = std::move(environment.queue_);
     profiling_queue_ = std::move(environment.profiling_queue_);
     program_cache_ = std::move(environment.program_cache_);
   }
   return *this;
 }

 Status Environment::Init() {
   if (device().IsAdreno() && device().SupportsTextureArray()) {
     bool supports_one_layer;
     RETURN_IF_ERROR(
         CheckKernelSupportOfOneLayerTextureArray(this, &supports_one_layer));
     if (!supports_one_layer) {
       GetDevicePtr()->DisableOneLayerTextureArray();
     }
   }
   return OkStatus();
 }

 void Environment::SetHighPerformance() const {
   // TODO(sorokin) use cl_perf_hint if available
 }

 void Environment::SetDefaultPerformance() const {
   // TODO(sorokin) use cl_perf_hint if available
 }

 void Environment::SetLowPerformance() const {
   // TODO(sorokin) use cl_perf_hint if available
 }

 std::vector<CalculationsPrecision> Environment::GetSupportedPrecisions() const {
   std::vector<CalculationsPrecision> precisions;
   for (CalculationsPrecision precision :
        {CalculationsPrecision::F32, CalculationsPrecision::F32_F16,
         CalculationsPrecision::F16}) {
     if (IsSupported(precision)) {
       precisions.push_back(precision);
     }
   }
   return precisions;
 }

 bool Environment::IsSupported(CalculationsPrecision precision) const {
   switch (precision) {
     case CalculationsPrecision::F32_F16:
     case CalculationsPrecision::F16:
       return device_.SupportsFP16();
     case CalculationsPrecision::F32:
       return true;
   }
 }

 std::vector<TensorStorageType> Environment::GetSupportedStorages() const {
   std::vector<TensorStorageType> storage_types;
   for (auto storage_type :
        {TensorStorageType::TEXTURE_2D, TensorStorageType::BUFFER,
         TensorStorageType::TEXTURE_ARRAY, TensorStorageType::IMAGE_BUFFER,
         TensorStorageType::TEXTURE_3D}) {
     if (IsSupported(storage_type)) {
       storage_types.push_back(storage_type);
     }
   }
   return storage_types;
 }

 bool Environment::IsSupported(TensorStorageType storage_type) const {
   switch (storage_type) {
     case TensorStorageType::TEXTURE_2D:
       return !device_.IsAMD();
     case TensorStorageType::BUFFER:
       return true;
     case TensorStorageType::TEXTURE_ARRAY:
       return !device_.IsAMD() && device_.SupportsTextureArray();
     case TensorStorageType::IMAGE_BUFFER:
       return (device_.IsAdreno() || device_.IsAMD() || device_.IsNvidia()) &&
              device_.SupportsImageBuffer();
     case TensorStorageType::TEXTURE_3D:
       return !device_.IsAMD() && device_.SupportsImage3D();
     case TensorStorageType::SINGLE_TEXTURE_2D:
       return false;
     case TensorStorageType::UNKNOWN:
       return false;
   }
   return false;
 }

 TensorStorageType GetFastestStorageType(const CLDevice& gpu) {
   if (gpu.IsAdreno()) {
     if (gpu.IsAdreno6xxOrHigher()) {
       return TensorStorageType::TEXTURE_ARRAY;
     } else {
       return TensorStorageType::TEXTURE_2D;
     }
   } else if (gpu.IsPowerVR()) {
     return TensorStorageType::TEXTURE_2D;
   } else if (gpu.IsMali()) {
     return TensorStorageType::BUFFER;
   } else if (gpu.IsNvidia()) {
     return gpu.SupportsImageBuffer() ? TensorStorageType::IMAGE_BUFFER
                                      : TensorStorageType::BUFFER;
   } else if (gpu.IsAMD()) {
     return gpu.SupportsImageBuffer() ? TensorStorageType::IMAGE_BUFFER
                                      : TensorStorageType::BUFFER;
   }
   return TensorStorageType::BUFFER;
 }

 Status CreateEnvironment(Environment* result) {
   CLDevice gpu;
   RETURN_IF_ERROR(CreateDefaultGPUDevice(&gpu));

   CLContext context;
   RETURN_IF_ERROR(CreateCLContext(gpu, &context));
   CLCommandQueue queue;
   RETURN_IF_ERROR(CreateCLCommandQueue(gpu, context, &queue));
   ProfilingCommandQueue profiling_queue;
   RETURN_IF_ERROR(CreateProfilingCommandQueue(gpu, context, &profiling_queue));

   *result = Environment(std::move(gpu), std::move(context), std::move(queue),
                         std::move(profiling_queue));
   return result->Init();
 }

 }  // namespace cl
 }  // namespace gpu
 }  // namespace tflite
	/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.

	Licensed under the Apache License, Version 2.0 (the "License");
	you may not use this file except in compliance with the License.
	You may obtain a copy of the License at

	http://www.apache.org/licenses/LICENSE-2.0

	Unless required by applicable law or agreed to in writing, software
	distributed under the License is distributed on an "AS IS" BASIS,
	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	See the License for the specific language governing permissions and
	limitations under the License.
	==============================================================================*/

	#include "tensorflow/lite/delegates/gpu/cl/environment.h"

	#include <string>
	#include <vector>

	#include "tensorflow/lite/delegates/gpu/cl/cl_kernel.h"
	#include "tensorflow/lite/delegates/gpu/cl/util.h"
	#include "tensorflow/lite/delegates/gpu/common/shape.h"

	namespace tflite {
	namespace gpu {
	namespace cl {
	namespace {
	std::string GetKernelOneLayerTextureArray() {
	return R"(

	__kernel void main_function(__write_only image2d_array_t dst) {
	int X = (int)(get_global_id(0));
	int Y = (int)(get_global_id(1));

	write_imagef(dst, (int4)(X, Y, 0, 0), (float4)(2.0, 2.0, 2.0, 2.0));
	}
	)";
	}

	// Some Adreno < 600 have bug with one layer texture array. b/131099086
	// If we have one layer texture array and will write smt from kernel to this
	// texture, we will get zeroes instead of actual values.
	// The same kernel will work, if we use texture array with more than one layer.
	// With help of this code we can detect this bug.
	Status CheckKernelSupportOfOneLayerTextureArray(Environment* env,
	bool* result) {
	// No bug on Adreno 6xx
	if (env->device().GetInfo().adreno_info.gpu_version >= 600) {
	*result = true;
	return OkStatus();
	}
	CLKernel kernel;
	RETURN_IF_ERROR(env->program_cache()->GetOrCreateCLKernel(
	GetKernelOneLayerTextureArray(), "main_function", env->context(),
	env->device(), &kernel));

	Tensor tensor;
	const BHWC shape(1, 4, 4, 4);
	RETURN_IF_ERROR(CreateTensor(
	env->context(), env->device(), shape,
	{DataType::FLOAT32, TensorStorageType::TEXTURE_ARRAY, Layout::HWC},
	&tensor));
	RETURN_IF_ERROR(kernel.SetMemory(0, tensor.GetMemoryPtr()));
	RETURN_IF_ERROR(env->queue()->DispatchImplicit(kernel, {4, 4, 1}, {4, 4, 1}));
	TensorFloat32 tensor_gpu;
	tensor_gpu.shape = shape;
	tensor_gpu.data.resize(shape.DimensionsProduct());
	RETURN_IF_ERROR(tensor.ReadData(env->queue(), &tensor_gpu));

	*result = true;
	for (int i = 0; i < 64; ++i) {
	if (tensor_gpu.data[i] != 2.0) {
	*result = false;
	break;
	}
	}
	return OkStatus();
	}

	Status CreateEnvironment(Environment* result, bool shared,
	cl_context_properties egl_context,
	cl_context_properties egl_display) {
	CLDevice gpu;
	RETURN_IF_ERROR(CreateDefaultGPUDevice(&gpu));

	CLContext context;
	if (shared) {
	RETURN_IF_ERROR(CreateCLGLContext(gpu, egl_context, egl_display, &context));
	} else {
	RETURN_IF_ERROR(CreateCLContext(gpu, &context));
	}
	CLCommandQueue queue;
	RETURN_IF_ERROR(CreateCLCommandQueue(gpu, context, &queue));
	ProfilingCommandQueue profiling_queue;
	RETURN_IF_ERROR(CreateProfilingCommandQueue(gpu, context, &profiling_queue));

	*result = Environment(std::move(gpu), std::move(context), std::move(queue),
	std::move(profiling_queue));

	if (result->device().IsAdreno() && result->device().SupportsTextureArray()) {
	bool supports_one_layer;
	RETURN_IF_ERROR(
	CheckKernelSupportOfOneLayerTextureArray(result, &supports_one_layer));
	if (!supports_one_layer) {
	result->GetDevicePtr()->DisableOneLayerTextureArray();
	}
	}

	return OkStatus();
	}
	} // namespace

	Environment::Environment(CLDevice&& device, CLContext&& context,
	CLCommandQueue&& queue,
	ProfilingCommandQueue&& profiling_queue)
	: device_(std::move(device)),
	context_(std::move(context)),
	queue_(std::move(queue)),
	profiling_queue_(std::move(profiling_queue)) {}

	Environment::Environment(Environment&& environment)
	: device_(std::move(environment.device_)),
	context_(std::move(environment.context_)),
	queue_(std::move(environment.queue_)),
	profiling_queue_(std::move(environment.profiling_queue_)),
	program_cache_(std::move(environment.program_cache_)) {}

	Environment& Environment::operator=(Environment&& environment) {
	if (this != &environment) {
	device_ = std::move(environment.device_);
	context_ = std::move(environment.context_);
	queue_ = std::move(environment.queue_);
	profiling_queue_ = std::move(environment.profiling_queue_);
	program_cache_ = std::move(environment.program_cache_);
	}
	return *this;
	}

	Status Environment::Init() {
	if (device().IsAdreno() && device().SupportsTextureArray()) {
	bool supports_one_layer;
	RETURN_IF_ERROR(
	CheckKernelSupportOfOneLayerTextureArray(this, &supports_one_layer));
	if (!supports_one_layer) {
	GetDevicePtr()->DisableOneLayerTextureArray();
	}
	}
	return OkStatus();
	}

	void Environment::SetHighPerformance() const {
	// TODO(sorokin) use cl_perf_hint if available
	}

	void Environment::SetDefaultPerformance() const {
	// TODO(sorokin) use cl_perf_hint if available
	}

	void Environment::SetLowPerformance() const {
	// TODO(sorokin) use cl_perf_hint if available
	}

	std::vector<CalculationsPrecision> Environment::GetSupportedPrecisions() const {
	std::vector<CalculationsPrecision> precisions;
	for (CalculationsPrecision precision :
	{CalculationsPrecision::F32, CalculationsPrecision::F32_F16,
	CalculationsPrecision::F16}) {
	if (IsSupported(precision)) {
	precisions.push_back(precision);
	}
	}
	return precisions;
	}

	bool Environment::IsSupported(CalculationsPrecision precision) const {
	switch (precision) {
	case CalculationsPrecision::F32_F16:
	case CalculationsPrecision::F16:
	return device_.SupportsFP16();
	case CalculationsPrecision::F32:
	return true;
	}
	}

	std::vector<TensorStorageType> Environment::GetSupportedStorages() const {
	std::vector<TensorStorageType> storage_types;
	for (auto storage_type :
	{TensorStorageType::TEXTURE_2D, TensorStorageType::BUFFER,
	TensorStorageType::TEXTURE_ARRAY, TensorStorageType::IMAGE_BUFFER,
	TensorStorageType::TEXTURE_3D}) {
	if (IsSupported(storage_type)) {
	storage_types.push_back(storage_type);
	}
	}
	return storage_types;
	}

	bool Environment::IsSupported(TensorStorageType storage_type) const {
	switch (storage_type) {
	case TensorStorageType::TEXTURE_2D:
	return !device_.IsAMD();
	case TensorStorageType::BUFFER:
	return true;
	case TensorStorageType::TEXTURE_ARRAY:
	return !device_.IsAMD() && device_.SupportsTextureArray();
	case TensorStorageType::IMAGE_BUFFER:
	return (device_.IsAdreno() \|\| device_.IsAMD() \|\| device_.IsNvidia()) &&
	device_.SupportsImageBuffer();
	case TensorStorageType::TEXTURE_3D:
	return !device_.IsAMD() && device_.SupportsImage3D();
	case TensorStorageType::SINGLE_TEXTURE_2D:
	return false;
	case TensorStorageType::UNKNOWN:
	return false;
	}
	return false;
	}

	TensorStorageType GetFastestStorageType(const CLDevice& gpu) {
	if (gpu.IsAdreno()) {
	if (gpu.IsAdreno6xxOrHigher()) {
	return TensorStorageType::TEXTURE_ARRAY;
	} else {
	return TensorStorageType::TEXTURE_2D;
	}
	} else if (gpu.IsPowerVR()) {
	return TensorStorageType::TEXTURE_2D;
	} else if (gpu.IsMali()) {
	return TensorStorageType::BUFFER;
	} else if (gpu.IsNvidia()) {
	return gpu.SupportsImageBuffer() ? TensorStorageType::IMAGE_BUFFER
	: TensorStorageType::BUFFER;
	} else if (gpu.IsAMD()) {
	return gpu.SupportsImageBuffer() ? TensorStorageType::IMAGE_BUFFER
	: TensorStorageType::BUFFER;
	}
	return TensorStorageType::BUFFER;
	}

	Status CreateEnvironment(Environment* result) {
	CLDevice gpu;
	RETURN_IF_ERROR(CreateDefaultGPUDevice(&gpu));

	CLContext context;
	RETURN_IF_ERROR(CreateCLContext(gpu, &context));
	CLCommandQueue queue;
	RETURN_IF_ERROR(CreateCLCommandQueue(gpu, context, &queue));
	ProfilingCommandQueue profiling_queue;
	RETURN_IF_ERROR(CreateProfilingCommandQueue(gpu, context, &profiling_queue));

	*result = Environment(std::move(gpu), std::move(context), std::move(queue),
	std::move(profiling_queue));
	return result->Init();
	}

	} // namespace cl
	} // namespace gpu
	} // namespace tflite