tensorflow/lite/delegates/gpu/cl/tensor.h - platform/external/tensorflow - Git at Google

 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/

 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_TENSOR_H_
 #define TENSORFLOW_LITE_DELEGATES_GPU_CL_TENSOR_H_

 #include <cstdint>
 #include <memory>

 #include "absl/types/span.h"
 #include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h"
 #include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
 #include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
 #include "tensorflow/lite/delegates/gpu/cl/cl_memory.h"
 #include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
 #include "tensorflow/lite/delegates/gpu/cl/util.h"
 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
 #include "tensorflow/lite/delegates/gpu/common/shape.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"
 #include "tensorflow/lite/delegates/gpu/common/tensor.h"
 #include "tensorflow/lite/delegates/gpu/common/types.h"

 namespace tflite {
 namespace gpu {
 namespace cl {

 class Tensor {
  public:
   Tensor()
       : memory_(nullptr), image_buffer_memory_(nullptr), memory_owner_(true) {}
   Tensor(cl_mem memory, bool memory_owner, const BHWC& shape,
          const TensorDescriptor& descriptor);
   Tensor(cl_mem memory, bool memory_owner, cl_mem image_buffer_memory,
          const BHWC& shape, const TensorDescriptor& descriptor);

   // Move only
   Tensor(Tensor&& tensor);
   Tensor& operator=(Tensor&& tensor);
   Tensor(const Tensor&) = delete;
   Tensor& operator=(const Tensor&) = delete;

   virtual ~Tensor() { Release(); }

   int Width() const { return shape_.w; }
   int Height() const { return shape_.h; }
   int Channels() const { return shape_.c; }
   int Depth() const { return IntegralDivideRoundUp(shape_.c, 4); }
   int Batch() const { return shape_.b; }
   int4 GetSizeWithDepth() const {
     return int4(shape_.w, shape_.h, shape_.c, Depth());
   }

   // returns int4(width * batch, height, depth, batch)
   int4 GetWBatchedHDB() const {
     return int4(shape_.w * shape_.b, shape_.h, Depth(), shape_.b);
   }

   int4 GetWHDB() const { return int4(shape_.w, shape_.h, Depth(), shape_.b); }

   enum DataType DataType() const { return descriptor_.data_type; }
   TensorStorageType StorageType() const { return descriptor_.storage_type; }

   // for profiling and memory statistics
   uint64_t GetMemorySizeInBytes() const;

   cl_mem GetMemoryPtr() const;

   // This function returns buffer memory ptr for IMAGE_BUFFER instead of image
   // memory ptr.
   cl_mem GetMemoryPtrForWriting() const;

   Status WriteData(CLCommandQueue* queue, const TensorFloat32& src);
   Status ReadData(CLCommandQueue* queue, TensorFloat32* dst) const;

  private:
   Status IsValid(const BHWC& shape) const;

   int GetChannelsAlignment() const;
   int GetAlignedChannels() const;

   Status WriteDataBHWC(absl::Span<const float> in, CLCommandQueue* queue);
   Status ReadDataBHWC(absl::Span<float> out, CLCommandQueue* queue) const;

   template <typename T>
   void DataFromBHWC(absl::Span<const float> src, absl::Span<T> dst) const;
   template <typename T>
   void DataToBHWC(absl::Span<const T> src, absl::Span<float> dst) const;

   // TODO(sorokin) might be bad performance
   int GetLinearIndex(int b, int x, int y, int d, int sub_d) const {
     switch (descriptor_.storage_type) {
       case TensorStorageType::BUFFER:
       case TensorStorageType::IMAGE_BUFFER:
       case TensorStorageType::TEXTURE_ARRAY:
         return (((d * shape_.h + y) * shape_.w + x) * shape_.b + b) * 4 +
                sub_d;  // DHWBC4
       case TensorStorageType::TEXTURE_2D:
         return (((y * Depth() + d) * shape_.w + x) * shape_.b + b) * 4 +
                sub_d;  // HDWBC4
       case TensorStorageType::SINGLE_TEXTURE_2D:
         return ((y * shape_.w + x) * shape_.b + b) * shape_.c + sub_d;  // HWBC
       case TensorStorageType::UNKNOWN:
         return -1;
     }
   }

   int3 GetFullTensorRegion() const;
   void Release();

   cl_mem memory_;
   cl_mem image_buffer_memory_;  // for TensorStorageType::IMAGE_BUFFER only
   bool memory_owner_;
   BHWC shape_;
   TensorDescriptor descriptor_;
 };

 using TensorPtr = std::shared_ptr<Tensor>;

 bool CanCreateTensorWithShape(const CLContext& context, const CLDevice& device,
                               const BHWC& shape,
                               const TensorDescriptor& descriptor);

 Status AllocateTensorMemory(const CLContext& context, const CLDevice& device,
                             const BHWC& shape,
                             const TensorDescriptor& descriptor,
                             CLMemory* result);

 Status CreateTensor(const CLContext& context, const CLDevice& device,
                     const BHWC& shape, const TensorDescriptor& descriptor,
                     Tensor* result);

 Status CreateSharedTensor(const CLContext& context, const CLDevice& device,
                           cl_mem memory, const BHWC& shape,
                           const TensorDescriptor& descriptor, Tensor* result);

 }  // namespace cl
 }  // namespace gpu
 }  // namespace tflite

 #endif  // TENSORFLOW_LITE_DELEGATES_GPU_CL_TENSOR_H_
	/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.

	Licensed under the Apache License, Version 2.0 (the "License");
	you may not use this file except in compliance with the License.
	You may obtain a copy of the License at

	http://www.apache.org/licenses/LICENSE-2.0

	Unless required by applicable law or agreed to in writing, software
	distributed under the License is distributed on an "AS IS" BASIS,
	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	See the License for the specific language governing permissions and
	limitations under the License.
	==============================================================================*/

	#ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_TENSOR_H_
	#define TENSORFLOW_LITE_DELEGATES_GPU_CL_TENSOR_H_

	#include <cstdint>
	#include <memory>

	#include "absl/types/span.h"
	#include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h"
	#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
	#include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
	#include "tensorflow/lite/delegates/gpu/cl/cl_memory.h"
	#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
	#include "tensorflow/lite/delegates/gpu/cl/util.h"
	#include "tensorflow/lite/delegates/gpu/common/data_type.h"
	#include "tensorflow/lite/delegates/gpu/common/shape.h"
	#include "tensorflow/lite/delegates/gpu/common/status.h"
	#include "tensorflow/lite/delegates/gpu/common/tensor.h"
	#include "tensorflow/lite/delegates/gpu/common/types.h"

	namespace tflite {
	namespace gpu {
	namespace cl {

	class Tensor {
	public:
	Tensor()
	: memory_(nullptr), image_buffer_memory_(nullptr), memory_owner_(true) {}
	Tensor(cl_mem memory, bool memory_owner, const BHWC& shape,
	const TensorDescriptor& descriptor);
	Tensor(cl_mem memory, bool memory_owner, cl_mem image_buffer_memory,
	const BHWC& shape, const TensorDescriptor& descriptor);

	// Move only
	Tensor(Tensor&& tensor);
	Tensor& operator=(Tensor&& tensor);
	Tensor(const Tensor&) = delete;
	Tensor& operator=(const Tensor&) = delete;

	virtual ~Tensor() { Release(); }

	int Width() const { return shape_.w; }
	int Height() const { return shape_.h; }
	int Channels() const { return shape_.c; }
	int Depth() const { return IntegralDivideRoundUp(shape_.c, 4); }
	int Batch() const { return shape_.b; }
	int4 GetSizeWithDepth() const {
	return int4(shape_.w, shape_.h, shape_.c, Depth());
	}

	// returns int4(width * batch, height, depth, batch)
	int4 GetWBatchedHDB() const {
	return int4(shape_.w * shape_.b, shape_.h, Depth(), shape_.b);
	}

	int4 GetWHDB() const { return int4(shape_.w, shape_.h, Depth(), shape_.b); }

	enum DataType DataType() const { return descriptor_.data_type; }
	TensorStorageType StorageType() const { return descriptor_.storage_type; }

	// for profiling and memory statistics
	uint64_t GetMemorySizeInBytes() const;

	cl_mem GetMemoryPtr() const;

	// This function returns buffer memory ptr for IMAGE_BUFFER instead of image
	// memory ptr.
	cl_mem GetMemoryPtrForWriting() const;

	Status WriteData(CLCommandQueue* queue, const TensorFloat32& src);
	Status ReadData(CLCommandQueue* queue, TensorFloat32* dst) const;

	private:
	Status IsValid(const BHWC& shape) const;

	int GetChannelsAlignment() const;
	int GetAlignedChannels() const;

	Status WriteDataBHWC(absl::Span<const float> in, CLCommandQueue* queue);
	Status ReadDataBHWC(absl::Span<float> out, CLCommandQueue* queue) const;

	template <typename T>
	void DataFromBHWC(absl::Span<const float> src, absl::Span<T> dst) const;
	template <typename T>
	void DataToBHWC(absl::Span<const T> src, absl::Span<float> dst) const;

	// TODO(sorokin) might be bad performance
	int GetLinearIndex(int b, int x, int y, int d, int sub_d) const {
	switch (descriptor_.storage_type) {
	case TensorStorageType::BUFFER:
	case TensorStorageType::IMAGE_BUFFER:
	case TensorStorageType::TEXTURE_ARRAY:
	return (((d * shape_.h + y) * shape_.w + x) * shape_.b + b) * 4 +
	sub_d; // DHWBC4
	case TensorStorageType::TEXTURE_2D:
	return (((y * Depth() + d) * shape_.w + x) * shape_.b + b) * 4 +
	sub_d; // HDWBC4
	case TensorStorageType::SINGLE_TEXTURE_2D:
	return ((y * shape_.w + x) * shape_.b + b) * shape_.c + sub_d; // HWBC
	case TensorStorageType::UNKNOWN:
	return -1;
	}
	}

	int3 GetFullTensorRegion() const;
	void Release();

	cl_mem memory_;
	cl_mem image_buffer_memory_; // for TensorStorageType::IMAGE_BUFFER only
	bool memory_owner_;
	BHWC shape_;
	TensorDescriptor descriptor_;
	};

	using TensorPtr = std::shared_ptr<Tensor>;

	bool CanCreateTensorWithShape(const CLContext& context, const CLDevice& device,
	const BHWC& shape,
	const TensorDescriptor& descriptor);

	Status AllocateTensorMemory(const CLContext& context, const CLDevice& device,
	const BHWC& shape,
	const TensorDescriptor& descriptor,
	CLMemory* result);

	Status CreateTensor(const CLContext& context, const CLDevice& device,
	const BHWC& shape, const TensorDescriptor& descriptor,
	Tensor* result);

	Status CreateSharedTensor(const CLContext& context, const CLDevice& device,
	cl_mem memory, const BHWC& shape,
	const TensorDescriptor& descriptor, Tensor* result);

	} // namespace cl
	} // namespace gpu
	} // namespace tflite

	#endif // TENSORFLOW_LITE_DELEGATES_GPU_CL_TENSOR_H_