tensorflow/lite/delegates/gpu/common/task/gpu_operation.h - platform/external/tensorflow - Git at Google

 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/

 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_GPU_OPERATION_H_
 #define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_GPU_OPERATION_H_

 #include <string>
 #include <vector>

 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
 #include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
 #include "tensorflow/lite/delegates/gpu/common/kernel_info.h"
 #include "tensorflow/lite/delegates/gpu/common/precision.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"
 #include "tensorflow/lite/delegates/gpu/common/task/arguments.h"
 #include "tensorflow/lite/delegates/gpu/common/task/buffer_desc.h"
 #include "tensorflow/lite/delegates/gpu/common/task/compiler_options.h"
 #include "tensorflow/lite/delegates/gpu/common/task/gpu_tensor.h"
 #include "tensorflow/lite/delegates/gpu/common/task/serialization_base_generated.h"
 #include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"
 #include "tensorflow/lite/delegates/gpu/common/task/texture2d_desc.h"
 #include "tensorflow/lite/delegates/gpu/common/task/tuning_type.h"
 #include "tensorflow/lite/delegates/gpu/common/types.h"

 namespace tflite {
 namespace gpu {
 namespace cl {
 class ClOperation;
 }
 namespace metal {
 class ComputeTask;
 struct ComputeTaskDescriptor;
 }

 // kCustom: default value
 //   GPUOperation::GetGridSize must be overloaded
 // kWBToX_HDToY_SToZ:
 //   grid_x = dst_[0]->Width() * dst_[0]->Batch();
 //   grid_y = dst_[0]->Height() * dst_[0]->Depth();
 //   grid_z = dst_[0]->Slices();
 // kWBToX_HDToY_ZIs1:
 //   grid_x = dst_[0]->Width() * dst_[0]->Batch();
 //   grid_y = dst_[0]->Height() * dst_[0]->Depth();
 //   grid_z = 1;
 // kWBToX_HToY_DToZ:
 //   grid_x = dst_[0]->Width() * dst_[0]->Batch();
 //   grid_y = dst_[0]->Height();
 //   grid_z = dst_[0]->Depth();
 // kBToX_YIs1_ZIs1:
 //   grid_x = dst_[0]->Batch();
 //   grid_y = 1;
 //   grid_z = 1;
 enum class TensorToGrid {
   kCustom,
   kWBToX_HDToY_SToZ,
   kWBToX_HDToY_ZIs1,
   kWBToX_HToY_DToZ,
   kBToX_YIs1_ZIs1
 };

 struct OperationDef {
   CalculationsPrecision precision;
   std::vector<TensorDescriptor> src_tensors;
   std::vector<TensorDescriptor> dst_tensors;

   // returns FLOAT32 for F32 precision and FLOAT16 for F16 precision
   DataType GetDataType() const;
   // Primary means the first src tensor, because first tensor usually defines
   // the structure of kernel, all other resources(biases) types and etc.
   DataType GetPrimaryDataType() const;
   TensorStorageType GetPrimaryStorageType() const;
   bool IsBatchSupported() const;
 };

 // GPUOperation represents some implementation of neural network operation on
 // GPU. GPUOperation can contain another GPU operations with flag elementwise_.
 // When GPUOperation contains another GPU ops, this GPUoperation replaces
 // some sequence of operations Op + op0 + op1 + ...
 // Because of this abilities of GPUOperation, usage scenario is next:
 // Create instance of GPUOperation.
 // Create all instances of GPUOperations that we will(probably) attach
 // to GPUOperation. Attach all GPUOperations to GPUOperation. Call
 // GPUOperation.Compile(). Don't call GPUOperations.Compile() if it
 // attached, it useless(and may be error)
 class GPUOperation {
  public:
   GPUOperation() = default;
   explicit GPUOperation(const OperationDef& definition);
   virtual ~GPUOperation() = default;
   // Move only
   GPUOperation(GPUOperation&& operation);
   GPUOperation& operator=(GPUOperation&& operation);
   GPUOperation(const GPUOperation&) = delete;
   GPUOperation& operator=(const GPUOperation&) = delete;

   absl::Status AddOperation(GPUOperation* operation);

   void SetSrc(GpuSpatialTensor* ptr, int index = 0);
   void SetDst(GpuSpatialTensor* ptr, int index = 0);

   virtual void GetPossibleKernelWorkGroups(
       TuningType tuning_type, const GpuInfo& gpu_info,
       const KernelInfo& kernel_info, std::vector<int3>* work_groups) const;

   void AssembleCode(const GpuInfo& gpu_info);

   virtual absl::Status PostCompileCheck(const GpuInfo& gpu_info,
                                         const KernelInfo& kernel_info) {
     return absl::OkStatus();
   }

   const OperationDef& GetDefinition() const { return definition_; }

   void AddSrcTensor(const std::string& tensor_name,
                     const TensorDescriptor& desc);
   void AddSrcBuffer(const std::string& buffer_name,
                     const BufferDescriptor& desc);
   void AddSrcTexture2D(const std::string& texture_name,
                        const Texture2DDescriptor& desc);
   void AddDstTensor(const std::string& tensor_name,
                     const TensorDescriptor& desc);

   bool IsLinkable() const { return elementwise_ && linkable_; }

   // for linking
   void AddUniquePostfix(const std::string& unique_postfix);

   Arguments args_;
   std::string code_;
   int3 work_group_size_ = int3(8, 4, 1);
   std::vector<CompilerOptions> compiler_options_;
   // not applicable to elementwise
   TensorToGrid tensor_to_grid_ = TensorToGrid::kCustom;

   bool elementwise_ = false;
   // applicable only with elementwise_ = true;
   bool linkable_ = true;  // by default every elementwise is linkable
   // applicable only with elementwise_ = true;
   bool check_src_channels_size_ = false;

  protected:
   friend class cl::ClOperation;
   friend class metal::ComputeTask;
   friend struct metal::ComputeTaskDescriptor;
   friend flatbuffers::Offset<tflite::gpu::data::GPUOperation> Encode(
       const GPUOperation& op, flatbuffers::FlatBufferBuilder* builder);
   friend absl::Status Decode(const tflite::gpu::data::GPUOperation* fb_op,
                              GPUOperation* op);

   virtual absl::Status BindArguments(ArgumentsBinder* args) {
     return absl::OkStatus();
   }
   virtual int3 GetGridSize() const;

   // Defines operation calculation precision and format of src/dst tensors.
   OperationDef definition_;
   std::vector<GpuSpatialTensor*> src_;
   std::vector<GpuSpatialTensor*> dst_;
   int grid_dimension_ = 3;  // can be 1, 2 or 3
   int3 work_group_launch_order_ = int3(0, 1, 2);
   int3 grid_size_ = int3(0, 0, 0);
   std::vector<std::string> src_tensors_names_;
   std::vector<std::string> dst_tensors_names_;

  private:
   int3 work_groups_count_ = int3(0, 0, 0);
   int linkable_count_ = 0;
   std::string elementwise_code_;  // temporary, used during op construction
 };

 }  // namespace gpu
 }  // namespace tflite

 #endif  // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_GPU_OPERATION_H_
	/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.

	Licensed under the Apache License, Version 2.0 (the "License");
	you may not use this file except in compliance with the License.
	You may obtain a copy of the License at

	http://www.apache.org/licenses/LICENSE-2.0

	Unless required by applicable law or agreed to in writing, software
	distributed under the License is distributed on an "AS IS" BASIS,
	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	See the License for the specific language governing permissions and
	limitations under the License.
	==============================================================================*/

	#ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_GPU_OPERATION_H_
	#define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_GPU_OPERATION_H_

	#include <string>
	#include <vector>

	#include "tensorflow/lite/delegates/gpu/common/data_type.h"
	#include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
	#include "tensorflow/lite/delegates/gpu/common/kernel_info.h"
	#include "tensorflow/lite/delegates/gpu/common/precision.h"
	#include "tensorflow/lite/delegates/gpu/common/status.h"
	#include "tensorflow/lite/delegates/gpu/common/task/arguments.h"
	#include "tensorflow/lite/delegates/gpu/common/task/buffer_desc.h"
	#include "tensorflow/lite/delegates/gpu/common/task/compiler_options.h"
	#include "tensorflow/lite/delegates/gpu/common/task/gpu_tensor.h"
	#include "tensorflow/lite/delegates/gpu/common/task/serialization_base_generated.h"
	#include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"
	#include "tensorflow/lite/delegates/gpu/common/task/texture2d_desc.h"
	#include "tensorflow/lite/delegates/gpu/common/task/tuning_type.h"
	#include "tensorflow/lite/delegates/gpu/common/types.h"

	namespace tflite {
	namespace gpu {
	namespace cl {
	class ClOperation;
	}
	namespace metal {
	class ComputeTask;
	struct ComputeTaskDescriptor;
	}

	// kCustom: default value
	// GPUOperation::GetGridSize must be overloaded
	// kWBToX_HDToY_SToZ:
	// grid_x = dst_[0]->Width() * dst_[0]->Batch();
	// grid_y = dst_[0]->Height() * dst_[0]->Depth();
	// grid_z = dst_[0]->Slices();
	// kWBToX_HDToY_ZIs1:
	// grid_x = dst_[0]->Width() * dst_[0]->Batch();
	// grid_y = dst_[0]->Height() * dst_[0]->Depth();
	// grid_z = 1;
	// kWBToX_HToY_DToZ:
	// grid_x = dst_[0]->Width() * dst_[0]->Batch();
	// grid_y = dst_[0]->Height();
	// grid_z = dst_[0]->Depth();
	// kBToX_YIs1_ZIs1:
	// grid_x = dst_[0]->Batch();
	// grid_y = 1;
	// grid_z = 1;
	enum class TensorToGrid {
	kCustom,
	kWBToX_HDToY_SToZ,
	kWBToX_HDToY_ZIs1,
	kWBToX_HToY_DToZ,
	kBToX_YIs1_ZIs1
	};

	struct OperationDef {
	CalculationsPrecision precision;
	std::vector<TensorDescriptor> src_tensors;
	std::vector<TensorDescriptor> dst_tensors;

	// returns FLOAT32 for F32 precision and FLOAT16 for F16 precision
	DataType GetDataType() const;
	// Primary means the first src tensor, because first tensor usually defines
	// the structure of kernel, all other resources(biases) types and etc.
	DataType GetPrimaryDataType() const;
	TensorStorageType GetPrimaryStorageType() const;
	bool IsBatchSupported() const;
	};

	// GPUOperation represents some implementation of neural network operation on
	// GPU. GPUOperation can contain another GPU operations with flag elementwise_.
	// When GPUOperation contains another GPU ops, this GPUoperation replaces
	// some sequence of operations Op + op0 + op1 + ...
	// Because of this abilities of GPUOperation, usage scenario is next:
	// Create instance of GPUOperation.
	// Create all instances of GPUOperations that we will(probably) attach
	// to GPUOperation. Attach all GPUOperations to GPUOperation. Call
	// GPUOperation.Compile(). Don't call GPUOperations.Compile() if it
	// attached, it useless(and may be error)
	class GPUOperation {
	public:
	GPUOperation() = default;
	explicit GPUOperation(const OperationDef& definition);
	virtual ~GPUOperation() = default;
	// Move only
	GPUOperation(GPUOperation&& operation);
	GPUOperation& operator=(GPUOperation&& operation);
	GPUOperation(const GPUOperation&) = delete;
	GPUOperation& operator=(const GPUOperation&) = delete;

	absl::Status AddOperation(GPUOperation* operation);

	void SetSrc(GpuSpatialTensor* ptr, int index = 0);
	void SetDst(GpuSpatialTensor* ptr, int index = 0);

	virtual void GetPossibleKernelWorkGroups(
	TuningType tuning_type, const GpuInfo& gpu_info,
	const KernelInfo& kernel_info, std::vector<int3>* work_groups) const;

	void AssembleCode(const GpuInfo& gpu_info);

	virtual absl::Status PostCompileCheck(const GpuInfo& gpu_info,
	const KernelInfo& kernel_info) {
	return absl::OkStatus();
	}

	const OperationDef& GetDefinition() const { return definition_; }

	void AddSrcTensor(const std::string& tensor_name,
	const TensorDescriptor& desc);
	void AddSrcBuffer(const std::string& buffer_name,
	const BufferDescriptor& desc);
	void AddSrcTexture2D(const std::string& texture_name,
	const Texture2DDescriptor& desc);
	void AddDstTensor(const std::string& tensor_name,
	const TensorDescriptor& desc);

	bool IsLinkable() const { return elementwise_ && linkable_; }

	// for linking
	void AddUniquePostfix(const std::string& unique_postfix);

	Arguments args_;
	std::string code_;
	int3 work_group_size_ = int3(8, 4, 1);
	std::vector<CompilerOptions> compiler_options_;
	// not applicable to elementwise
	TensorToGrid tensor_to_grid_ = TensorToGrid::kCustom;

	bool elementwise_ = false;
	// applicable only with elementwise_ = true;
	bool linkable_ = true; // by default every elementwise is linkable
	// applicable only with elementwise_ = true;
	bool check_src_channels_size_ = false;

	protected:
	friend class cl::ClOperation;
	friend class metal::ComputeTask;
	friend struct metal::ComputeTaskDescriptor;
	friend flatbuffers::Offset<tflite::gpu::data::GPUOperation> Encode(
	const GPUOperation& op, flatbuffers::FlatBufferBuilder* builder);
	friend absl::Status Decode(const tflite::gpu::data::GPUOperation* fb_op,
	GPUOperation* op);

	virtual absl::Status BindArguments(ArgumentsBinder* args) {
	return absl::OkStatus();
	}
	virtual int3 GetGridSize() const;

	// Defines operation calculation precision and format of src/dst tensors.
	OperationDef definition_;
	std::vector<GpuSpatialTensor*> src_;
	std::vector<GpuSpatialTensor*> dst_;
	int grid_dimension_ = 3; // can be 1, 2 or 3
	int3 work_group_launch_order_ = int3(0, 1, 2);
	int3 grid_size_ = int3(0, 0, 0);
	std::vector<std::string> src_tensors_names_;
	std::vector<std::string> dst_tensors_names_;

	private:
	int3 work_groups_count_ = int3(0, 0, 0);
	int linkable_count_ = 0;
	std::string elementwise_code_; // temporary, used during op construction
	};

	} // namespace gpu
	} // namespace tflite

	#endif // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_GPU_OPERATION_H_