tensorflow/lite/delegates/gpu/cl/tensor.h - platform/external/tensorflow - Git at Google

 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/

 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_TENSOR_H_
 #define TENSORFLOW_LITE_DELEGATES_GPU_CL_TENSOR_H_

 #include <cstdint>
 #include <memory>

 #include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h"
 #include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
 #include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
 #include "tensorflow/lite/delegates/gpu/cl/cl_memory.h"
 #include "tensorflow/lite/delegates/gpu/cl/gpu_object.h"
 #include "tensorflow/lite/delegates/gpu/cl/util.h"
 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
 #include "tensorflow/lite/delegates/gpu/common/shape.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"
 #include "tensorflow/lite/delegates/gpu/common/task/gpu_tensor.h"
 #include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"
 #include "tensorflow/lite/delegates/gpu/common/tensor.h"
 #include "tensorflow/lite/delegates/gpu/common/types.h"

 namespace tflite {
 namespace gpu {
 namespace cl {

 class Tensor : public GPUObject, public GpuSpatialTensor {
  public:
   Tensor()
       : memory_(nullptr), image_buffer_memory_(nullptr), memory_owner_(true) {}
   Tensor(cl_mem memory, bool memory_owner, const BHWC& shape,
          const TensorDescriptor& descriptor);
   Tensor(cl_mem memory, bool memory_owner, const BHWDC& shape,
          const TensorDescriptor& descriptor);
   Tensor(cl_mem memory, bool memory_owner, cl_mem image_buffer_memory,
          const BHWC& shape, const TensorDescriptor& descriptor);
   Tensor(cl_mem memory, bool memory_owner, cl_mem image_buffer_memory,
          const BHWDC& shape, const TensorDescriptor& descriptor);

   // Move only
   Tensor(Tensor&& tensor);
   Tensor& operator=(Tensor&& tensor);
   Tensor(const Tensor&) = delete;
   Tensor& operator=(const Tensor&) = delete;

   ~Tensor() override { Release(); }

   absl::Status GetGPUResources(const GPUObjectDescriptor* obj_ptr,
                                GPUResourcesWithValue* resources) const override;

   int Width() const override { return shape_.w; }
   int Height() const override { return shape_.h; }
   int Depth() const override { return shape_.d; }
   int Channels() const override { return shape_.c; }
   int Slices() const override { return DivideRoundUp(shape_.c, 4); }
   int Batch() const override { return shape_.b; }

   TensorDescriptor GetDescriptor() const override { return descriptor_; }
   DataType GetDataType() const { return descriptor_.GetDataType(); }
   TensorStorageType GetStorageType() const {
     return descriptor_.GetStorageType();
   }

   uint64_t GetMemorySizeInBytes() const;

   cl_mem GetMemoryPtr() const;

   // This function returns buffer memory ptr for IMAGE_BUFFER instead of image
   // memory ptr.
   cl_mem GetMemoryPtrForWriting() const;

   absl::Status WriteData(
       CLCommandQueue* queue,
       const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& src);
   absl::Status WriteData(
       CLCommandQueue* queue,
       const tflite::gpu::Tensor<HWC, DataType::FLOAT32>& src);
   template <DataType T>
   absl::Status WriteData(CLCommandQueue* queue,
                          const tflite::gpu::Tensor<BHWC, T>& src);
   template <DataType T>
   absl::Status WriteData(CLCommandQueue* queue,
                          const tflite::gpu::Tensor<BHWDC, T>& src);
   template <DataType T>
   absl::Status ReadData(CLCommandQueue* queue,
                         tflite::gpu::Tensor<BHWC, T>* dst) const;
   template <DataType T>
   absl::Status ReadData(CLCommandQueue* queue,
                         tflite::gpu::Tensor<BHWDC, T>* dst) const;

   absl::Status CreateFromDescriptor(const TensorDescriptor& desc,
                                     CLContext* context);
   absl::Status ToDescriptor(TensorDescriptor* desc,
                             CLCommandQueue* queue) const;

  private:
   friend absl::Status CreateTensorSharedImage2DBuffer(
       const CLContext& context, cl_mem memory,
       const TensorDescriptor& descriptor, int width_pixel_alignment,
       Tensor* result);
   absl::Status IsValid(const BHWC& shape) const;
   absl::Status IsValid(const BHWDC& shape) const;

   int GetChannelsAlignment() const;
   int GetAlignedChannels() const;

   template <typename T>
   absl::Status WriteDataBHWDC(const T* in, CLCommandQueue* queue);
   absl::Status WriteData(const void* ptr, CLCommandQueue* queue);
   template <typename T>
   absl::Status ReadDataBHWDC(T* out, CLCommandQueue* queue) const;
   absl::Status ReadData(void* ptr, CLCommandQueue* queue) const;

   int3 GetFullTensorRegion() const;
   void Release();

   cl_mem memory_;
   cl_mem image_buffer_memory_;  // for IMAGE_BUFFER/TEXTURE_2D/SINGLE_TEXTURE_2D
   bool memory_owner_;
   bool buffer_based_ = false;
   BHWDC shape_;
   TensorDescriptor descriptor_;
   // for use with TEXTURE_2D and when texture created from buffer.
   int aligned_texture_width_;
 };

 using TensorPtr = std::shared_ptr<Tensor>;

 absl::Status AllocateTensorMemory(const CLContext& context,
                                   const TensorDescriptor& descriptor,
                                   CLMemory* result);

 absl::Status CreateTensor(const CLContext& context,
                           const TensorDescriptor& descriptor, Tensor* result);

 absl::Status CreateSharedTensor(const CLContext& context, cl_mem memory,
                                 const BHWC& shape,
                                 const TensorDescriptor& descriptor,
                                 Tensor* result);

 absl::Status CreateSharedTensor(const CLContext& context, cl_mem memory,
                                 const BHWDC& shape,
                                 const TensorDescriptor& descriptor,
                                 Tensor* result);

 absl::Status CreateTensorSharedImage2DBuffer(const CLContext& context,
                                              cl_mem memory,
                                              const TensorDescriptor& descriptor,
                                              int width_pixel_alignment,
                                              Tensor* result);

 template <DataType T>
 absl::Status Tensor::WriteData(CLCommandQueue* queue,
                                const tflite::gpu::Tensor<BHWC, T>& src) {
   RETURN_IF_ERROR(IsValid(src.shape));
   return WriteDataBHWDC(src.data.data(), queue);
 }

 template <DataType T>
 absl::Status Tensor::WriteData(CLCommandQueue* queue,
                                const tflite::gpu::Tensor<BHWDC, T>& src) {
   RETURN_IF_ERROR(IsValid(src.shape));
   return WriteDataBHWDC(src.data.data(), queue);
 }

 template <DataType T>
 absl::Status Tensor::ReadData(CLCommandQueue* queue,
                               tflite::gpu::Tensor<BHWC, T>* dst) const {
   RETURN_IF_ERROR(IsValid(dst->shape));
   return ReadDataBHWDC(dst->data.data(), queue);
 }

 template <DataType T>
 absl::Status Tensor::ReadData(CLCommandQueue* queue,
                               tflite::gpu::Tensor<BHWDC, T>* dst) const {
   RETURN_IF_ERROR(IsValid(dst->shape));
   return ReadDataBHWDC(dst->data.data(), queue);
 }

 template <typename T>
 absl::Status Tensor::WriteDataBHWDC(const T* in, CLCommandQueue* queue) {
   std::unique_ptr<uint8_t[]> data_copy;
   data_copy.reset(new uint8_t[GetMemorySizeInBytes()]);
   if (descriptor_.GetDataType() == DataType::FLOAT16) {
     // rearrangement and conversion from float32 to float16
     DataFromBHWDC(reinterpret_cast<const float*>(in), shape_, descriptor_,
                   reinterpret_cast<half*>(data_copy.get()));
   } else {
     // rearrangement
     DataFromBHWDC(in, shape_, descriptor_,
                   reinterpret_cast<T*>(data_copy.get()));
   }

   return WriteData(data_copy.get(), queue);
 }

 template <typename T>
 absl::Status Tensor::ReadDataBHWDC(T* out, CLCommandQueue* queue) const {
   std::unique_ptr<uint8_t[]> data_copy;
   data_copy.reset(new uint8_t[GetMemorySizeInBytes()]);

   RETURN_IF_ERROR(ReadData(data_copy.get(), queue));

   if (descriptor_.GetDataType() == DataType::FLOAT16) {
     // rearrangement and conversion from float32 to float16
     DataToBHWDC(reinterpret_cast<half*>(data_copy.get()), shape_, descriptor_,
                 reinterpret_cast<float*>(out));
   } else {
     // rearrangement
     DataToBHWDC(reinterpret_cast<T*>(data_copy.get()), shape_, descriptor_,
                 out);
   }

   return absl::OkStatus();
 }

 }  // namespace cl
 }  // namespace gpu
 }  // namespace tflite

 #endif  // TENSORFLOW_LITE_DELEGATES_GPU_CL_TENSOR_H_
	/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.

	Licensed under the Apache License, Version 2.0 (the "License");
	you may not use this file except in compliance with the License.
	You may obtain a copy of the License at

	http://www.apache.org/licenses/LICENSE-2.0

	Unless required by applicable law or agreed to in writing, software
	distributed under the License is distributed on an "AS IS" BASIS,
	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	See the License for the specific language governing permissions and
	limitations under the License.
	==============================================================================*/

	#ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_TENSOR_H_
	#define TENSORFLOW_LITE_DELEGATES_GPU_CL_TENSOR_H_

	#include <cstdint>
	#include <memory>

	#include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h"
	#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
	#include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
	#include "tensorflow/lite/delegates/gpu/cl/cl_memory.h"
	#include "tensorflow/lite/delegates/gpu/cl/gpu_object.h"
	#include "tensorflow/lite/delegates/gpu/cl/util.h"
	#include "tensorflow/lite/delegates/gpu/common/data_type.h"
	#include "tensorflow/lite/delegates/gpu/common/shape.h"
	#include "tensorflow/lite/delegates/gpu/common/status.h"
	#include "tensorflow/lite/delegates/gpu/common/task/gpu_tensor.h"
	#include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"
	#include "tensorflow/lite/delegates/gpu/common/tensor.h"
	#include "tensorflow/lite/delegates/gpu/common/types.h"

	namespace tflite {
	namespace gpu {
	namespace cl {

	class Tensor : public GPUObject, public GpuSpatialTensor {
	public:
	Tensor()
	: memory_(nullptr), image_buffer_memory_(nullptr), memory_owner_(true) {}
	Tensor(cl_mem memory, bool memory_owner, const BHWC& shape,
	const TensorDescriptor& descriptor);
	Tensor(cl_mem memory, bool memory_owner, const BHWDC& shape,
	const TensorDescriptor& descriptor);
	Tensor(cl_mem memory, bool memory_owner, cl_mem image_buffer_memory,
	const BHWC& shape, const TensorDescriptor& descriptor);
	Tensor(cl_mem memory, bool memory_owner, cl_mem image_buffer_memory,
	const BHWDC& shape, const TensorDescriptor& descriptor);

	// Move only
	Tensor(Tensor&& tensor);
	Tensor& operator=(Tensor&& tensor);
	Tensor(const Tensor&) = delete;
	Tensor& operator=(const Tensor&) = delete;

	~Tensor() override { Release(); }

	absl::Status GetGPUResources(const GPUObjectDescriptor* obj_ptr,
	GPUResourcesWithValue* resources) const override;

	int Width() const override { return shape_.w; }
	int Height() const override { return shape_.h; }
	int Depth() const override { return shape_.d; }
	int Channels() const override { return shape_.c; }
	int Slices() const override { return DivideRoundUp(shape_.c, 4); }
	int Batch() const override { return shape_.b; }

	TensorDescriptor GetDescriptor() const override { return descriptor_; }
	DataType GetDataType() const { return descriptor_.GetDataType(); }
	TensorStorageType GetStorageType() const {
	return descriptor_.GetStorageType();
	}

	uint64_t GetMemorySizeInBytes() const;

	cl_mem GetMemoryPtr() const;

	// This function returns buffer memory ptr for IMAGE_BUFFER instead of image
	// memory ptr.
	cl_mem GetMemoryPtrForWriting() const;

	absl::Status WriteData(
	CLCommandQueue* queue,
	const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& src);
	absl::Status WriteData(
	CLCommandQueue* queue,
	const tflite::gpu::Tensor<HWC, DataType::FLOAT32>& src);
	template <DataType T>
	absl::Status WriteData(CLCommandQueue* queue,
	const tflite::gpu::Tensor<BHWC, T>& src);
	template <DataType T>
	absl::Status WriteData(CLCommandQueue* queue,
	const tflite::gpu::Tensor<BHWDC, T>& src);
	template <DataType T>
	absl::Status ReadData(CLCommandQueue* queue,
	tflite::gpu::Tensor<BHWC, T>* dst) const;
	template <DataType T>
	absl::Status ReadData(CLCommandQueue* queue,
	tflite::gpu::Tensor<BHWDC, T>* dst) const;

	absl::Status CreateFromDescriptor(const TensorDescriptor& desc,
	CLContext* context);
	absl::Status ToDescriptor(TensorDescriptor* desc,
	CLCommandQueue* queue) const;

	private:
	friend absl::Status CreateTensorSharedImage2DBuffer(
	const CLContext& context, cl_mem memory,
	const TensorDescriptor& descriptor, int width_pixel_alignment,
	Tensor* result);
	absl::Status IsValid(const BHWC& shape) const;
	absl::Status IsValid(const BHWDC& shape) const;

	int GetChannelsAlignment() const;
	int GetAlignedChannels() const;

	template <typename T>
	absl::Status WriteDataBHWDC(const T* in, CLCommandQueue* queue);
	absl::Status WriteData(const void* ptr, CLCommandQueue* queue);
	template <typename T>
	absl::Status ReadDataBHWDC(T* out, CLCommandQueue* queue) const;
	absl::Status ReadData(void* ptr, CLCommandQueue* queue) const;

	int3 GetFullTensorRegion() const;
	void Release();

	cl_mem memory_;
	cl_mem image_buffer_memory_; // for IMAGE_BUFFER/TEXTURE_2D/SINGLE_TEXTURE_2D
	bool memory_owner_;
	bool buffer_based_ = false;
	BHWDC shape_;
	TensorDescriptor descriptor_;
	// for use with TEXTURE_2D and when texture created from buffer.
	int aligned_texture_width_;
	};

	using TensorPtr = std::shared_ptr<Tensor>;

	absl::Status AllocateTensorMemory(const CLContext& context,
	const TensorDescriptor& descriptor,
	CLMemory* result);

	absl::Status CreateTensor(const CLContext& context,
	const TensorDescriptor& descriptor, Tensor* result);

	absl::Status CreateSharedTensor(const CLContext& context, cl_mem memory,
	const BHWC& shape,
	const TensorDescriptor& descriptor,
	Tensor* result);

	absl::Status CreateSharedTensor(const CLContext& context, cl_mem memory,
	const BHWDC& shape,
	const TensorDescriptor& descriptor,
	Tensor* result);

	absl::Status CreateTensorSharedImage2DBuffer(const CLContext& context,
	cl_mem memory,
	const TensorDescriptor& descriptor,
	int width_pixel_alignment,
	Tensor* result);

	template <DataType T>
	absl::Status Tensor::WriteData(CLCommandQueue* queue,
	const tflite::gpu::Tensor<BHWC, T>& src) {
	RETURN_IF_ERROR(IsValid(src.shape));
	return WriteDataBHWDC(src.data.data(), queue);
	}

	template <DataType T>
	absl::Status Tensor::WriteData(CLCommandQueue* queue,
	const tflite::gpu::Tensor<BHWDC, T>& src) {
	RETURN_IF_ERROR(IsValid(src.shape));
	return WriteDataBHWDC(src.data.data(), queue);
	}

	template <DataType T>
	absl::Status Tensor::ReadData(CLCommandQueue* queue,
	tflite::gpu::Tensor<BHWC, T>* dst) const {
	RETURN_IF_ERROR(IsValid(dst->shape));
	return ReadDataBHWDC(dst->data.data(), queue);
	}

	template <DataType T>
	absl::Status Tensor::ReadData(CLCommandQueue* queue,
	tflite::gpu::Tensor<BHWDC, T>* dst) const {
	RETURN_IF_ERROR(IsValid(dst->shape));
	return ReadDataBHWDC(dst->data.data(), queue);
	}

	template <typename T>
	absl::Status Tensor::WriteDataBHWDC(const T* in, CLCommandQueue* queue) {
	std::unique_ptr<uint8_t[]> data_copy;
	data_copy.reset(new uint8_t[GetMemorySizeInBytes()]);
	if (descriptor_.GetDataType() == DataType::FLOAT16) {
	// rearrangement and conversion from float32 to float16
	DataFromBHWDC(reinterpret_cast<const float*>(in), shape_, descriptor_,
	reinterpret_cast<half*>(data_copy.get()));
	} else {
	// rearrangement
	DataFromBHWDC(in, shape_, descriptor_,
	reinterpret_cast<T*>(data_copy.get()));
	}

	return WriteData(data_copy.get(), queue);
	}

	template <typename T>
	absl::Status Tensor::ReadDataBHWDC(T* out, CLCommandQueue* queue) const {
	std::unique_ptr<uint8_t[]> data_copy;
	data_copy.reset(new uint8_t[GetMemorySizeInBytes()]);

	RETURN_IF_ERROR(ReadData(data_copy.get(), queue));

	if (descriptor_.GetDataType() == DataType::FLOAT16) {
	// rearrangement and conversion from float32 to float16
	DataToBHWDC(reinterpret_cast<half*>(data_copy.get()), shape_, descriptor_,
	reinterpret_cast<float*>(out));
	} else {
	// rearrangement
	DataToBHWDC(reinterpret_cast<T*>(data_copy.get()), shape_, descriptor_,
	out);
	}

	return absl::OkStatus();
	}

	} // namespace cl
	} // namespace gpu
	} // namespace tflite

	#endif // TENSORFLOW_LITE_DELEGATES_GPU_CL_TENSOR_H_