tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h - platform/external/tensorflow - Git at Google

 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/

 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_GPU_OPERATION_H_
 #define TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_GPU_OPERATION_H_

 #include <memory>
 #include <string>
 #include <vector>

 #include "tensorflow/lite/delegates/gpu/cl/arguments.h"
 #include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
 #include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
 #include "tensorflow/lite/delegates/gpu/cl/kernels/tuning_parameters.h"
 #include "tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.h"
 #include "tensorflow/lite/delegates/gpu/cl/precision.h"
 #include "tensorflow/lite/delegates/gpu/cl/program_cache.h"
 #include "tensorflow/lite/delegates/gpu/cl/tensor.h"
 #include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"
 #include "tensorflow/lite/delegates/gpu/common/types.h"

 namespace tflite {
 namespace gpu {
 namespace cl {

 struct CreationContext {
   const CLDevice* device;
   CLContext* context;
   CLCommandQueue* queue;
   ProgramCache* cache;
 };

 struct OperationDef {
   CalculationsPrecision precision;
   std::vector<TensorDescriptor> src_tensors;
   std::vector<TensorDescriptor> dst_tensors;

   // returns FLOAT32 for F32 precision and FLOAT16 for F16 precision
   DataType GetDataType() const;
   // Primary means the first src tensor, because first tensor usually defines
   // the structure of kernel, all other resources(biases) types and etc.
   DataType GetPrimaryDataType() const;
   TensorStorageType GetPrimaryStorageType() const;
   bool HasAllTensorsOfType(TensorStorageType storage_type) const;
   bool IsBatchSupported() const;
 };

 class ElementwiseOperation;

 absl::Status SetArguments(const std::vector<ElementwiseOperation*>& linked_ops,
                           Arguments* args);

 // GPUOperation represents some implementation of neural network operation on
 // GPU. GPUOperation can contain ElementwiseOperation operations, in this case,
 // ElementwiseOperation still hold necessary data and should be alive.
 // When GPUOperation contains ElementwiseOperations, this GPUoperation replaces
 // some sequence of operations Op + el_op0 + el_op1 + ...
 // Because of this abilities of GPUOperation, usage scenario is next:
 // Create instance of GPUOperation.
 // Create all instances of ElementwiseOperations that we will(probably) attach
 // to GPUOperation. Attach all ElementwiseOperations to GPUOperation. Call
 // GPUOperation.Compile(). Don't call ElementwiseOperation.Compile() if it
 // attached, it useless(and may be error)
 class GPUOperation {
  public:
   GPUOperation() = default;
   explicit GPUOperation(const OperationDef& definition);
   virtual ~GPUOperation() = default;
   // Move only
   GPUOperation(GPUOperation&& operation);
   GPUOperation& operator=(GPUOperation&& operation);
   GPUOperation(const GPUOperation&) = delete;
   GPUOperation& operator=(const GPUOperation&) = delete;

   void AddOperation(ElementwiseOperation* operation);

   void SetSrc(Tensor* ptr, int index = 0);
   void SetDst(Tensor* ptr, int index = 0);

   // should be called after changes of inputs/outputs.
   absl::Status UpdateParams() {
     RETURN_IF_ERROR(BindArguments());
     RETURN_IF_ERROR(SetArguments(linked_operations_, &args_));
     grid_size_ = GetGridSize();
     return absl::OkStatus();
   }

   absl::Status AddToQueue(CLCommandQueue* queue) {
     RETURN_IF_ERROR(args_.Bind(kernel_.kernel()));
     return queue->DispatchImplicit(kernel_, grid_size_, work_group_size_);
   }

   virtual absl::Status Tune(const TuningParameters& params) {
     RETURN_IF_ERROR(args_.Bind(kernel_.kernel()));
     return GetBestWorkGroup(params, kernel_, grid_size_, &work_group_size_);
   }

   virtual absl::Status Compile(const CreationContext& creation_context) {
     return absl::OkStatus();
   }

   const OperationDef& GetDefinition() const { return definition_; }

  protected:
   virtual absl::Status BindArguments() = 0;
   virtual int3 GetGridSize() const = 0;

   // Defines operation calculation precision and format of src/dst tensors.
   OperationDef definition_;
   std::vector<Tensor*> src_;
   std::vector<Tensor*> dst_;
   Arguments args_;
   CLKernel kernel_;
   int3 work_group_size_ = int3(8, 4, 1);
   int3 grid_size_ = int3(0, 0, 0);
   std::vector<ElementwiseOperation*> linked_operations_;
 };

 // ElementwiseOperation can be fused(linked) to another operation.
 // field linked_ indicate about this
 // link_index_ used mostly for generating of correct names for
 //   linked code variables
 // link_index_ is number of operation in sequence of linked operations
 // and should be unique in this sequence
 // link_index_ = 0 is equivalent that operation not linked.
 class ElementwiseOperation : public GPUOperation {
  public:
   ElementwiseOperation() {}
   explicit ElementwiseOperation(const OperationDef& definition)
       : GPUOperation(definition) {}

   virtual ~ElementwiseOperation() {}

   absl::Status Compile(const CreationContext& creation_context) override;
   absl::Status BindArguments() override;
   int3 GetGridSize() const override;

   // Move only
   ElementwiseOperation(ElementwiseOperation&& operation);
   ElementwiseOperation& operator=(ElementwiseOperation&& operation);
   ElementwiseOperation(const ElementwiseOperation&) = delete;
   ElementwiseOperation& operator=(const ElementwiseOperation&) = delete;

   virtual absl::Status SetArgs(const std::string& unique_postfix,
                                Arguments* args) {
     return absl::OkStatus();
   }

   Arguments&& MoveArgs() { return std::move(args_); }
   std::string GetCode() const { return code_; }

   // ovveride to return false if for any reason operation can not be linked.
   virtual bool IsLinkable() const { return true; }

  protected:
   bool check_src_channels_size_ = false;
   std::string code_;
 };

 absl::Status MergeOperations(
     const std::vector<ElementwiseOperation*>& linked_ops,
     Arguments* merged_args, std::string* merged_code);

 }  // namespace cl
 }  // namespace gpu
 }  // namespace tflite

 #endif  // TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_GPU_OPERATION_H_
	/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.

	Licensed under the Apache License, Version 2.0 (the "License");
	you may not use this file except in compliance with the License.
	You may obtain a copy of the License at

	http://www.apache.org/licenses/LICENSE-2.0

	Unless required by applicable law or agreed to in writing, software
	distributed under the License is distributed on an "AS IS" BASIS,
	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	See the License for the specific language governing permissions and
	limitations under the License.
	==============================================================================*/

	#ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_GPU_OPERATION_H_
	#define TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_GPU_OPERATION_H_

	#include <memory>
	#include <string>
	#include <vector>

	#include "tensorflow/lite/delegates/gpu/cl/arguments.h"
	#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
	#include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
	#include "tensorflow/lite/delegates/gpu/cl/kernels/tuning_parameters.h"
	#include "tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.h"
	#include "tensorflow/lite/delegates/gpu/cl/precision.h"
	#include "tensorflow/lite/delegates/gpu/cl/program_cache.h"
	#include "tensorflow/lite/delegates/gpu/cl/tensor.h"
	#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
	#include "tensorflow/lite/delegates/gpu/common/status.h"
	#include "tensorflow/lite/delegates/gpu/common/types.h"

	namespace tflite {
	namespace gpu {
	namespace cl {

	struct CreationContext {
	const CLDevice* device;
	CLContext* context;
	CLCommandQueue* queue;
	ProgramCache* cache;
	};

	struct OperationDef {
	CalculationsPrecision precision;
	std::vector<TensorDescriptor> src_tensors;
	std::vector<TensorDescriptor> dst_tensors;

	// returns FLOAT32 for F32 precision and FLOAT16 for F16 precision
	DataType GetDataType() const;
	// Primary means the first src tensor, because first tensor usually defines
	// the structure of kernel, all other resources(biases) types and etc.
	DataType GetPrimaryDataType() const;
	TensorStorageType GetPrimaryStorageType() const;
	bool HasAllTensorsOfType(TensorStorageType storage_type) const;
	bool IsBatchSupported() const;
	};

	class ElementwiseOperation;

	absl::Status SetArguments(const std::vector<ElementwiseOperation*>& linked_ops,
	Arguments* args);

	// GPUOperation represents some implementation of neural network operation on
	// GPU. GPUOperation can contain ElementwiseOperation operations, in this case,
	// ElementwiseOperation still hold necessary data and should be alive.
	// When GPUOperation contains ElementwiseOperations, this GPUoperation replaces
	// some sequence of operations Op + el_op0 + el_op1 + ...
	// Because of this abilities of GPUOperation, usage scenario is next:
	// Create instance of GPUOperation.
	// Create all instances of ElementwiseOperations that we will(probably) attach
	// to GPUOperation. Attach all ElementwiseOperations to GPUOperation. Call
	// GPUOperation.Compile(). Don't call ElementwiseOperation.Compile() if it
	// attached, it useless(and may be error)
	class GPUOperation {
	public:
	GPUOperation() = default;
	explicit GPUOperation(const OperationDef& definition);
	virtual ~GPUOperation() = default;
	// Move only
	GPUOperation(GPUOperation&& operation);
	GPUOperation& operator=(GPUOperation&& operation);
	GPUOperation(const GPUOperation&) = delete;
	GPUOperation& operator=(const GPUOperation&) = delete;

	void AddOperation(ElementwiseOperation* operation);

	void SetSrc(Tensor* ptr, int index = 0);
	void SetDst(Tensor* ptr, int index = 0);

	// should be called after changes of inputs/outputs.
	absl::Status UpdateParams() {
	RETURN_IF_ERROR(BindArguments());
	RETURN_IF_ERROR(SetArguments(linked_operations_, &args_));
	grid_size_ = GetGridSize();
	return absl::OkStatus();
	}

	absl::Status AddToQueue(CLCommandQueue* queue) {
	RETURN_IF_ERROR(args_.Bind(kernel_.kernel()));
	return queue->DispatchImplicit(kernel_, grid_size_, work_group_size_);
	}

	virtual absl::Status Tune(const TuningParameters& params) {
	RETURN_IF_ERROR(args_.Bind(kernel_.kernel()));
	return GetBestWorkGroup(params, kernel_, grid_size_, &work_group_size_);
	}

	virtual absl::Status Compile(const CreationContext& creation_context) {
	return absl::OkStatus();
	}

	const OperationDef& GetDefinition() const { return definition_; }

	protected:
	virtual absl::Status BindArguments() = 0;
	virtual int3 GetGridSize() const = 0;

	// Defines operation calculation precision and format of src/dst tensors.
	OperationDef definition_;
	std::vector<Tensor*> src_;
	std::vector<Tensor*> dst_;
	Arguments args_;
	CLKernel kernel_;
	int3 work_group_size_ = int3(8, 4, 1);
	int3 grid_size_ = int3(0, 0, 0);
	std::vector<ElementwiseOperation*> linked_operations_;
	};

	// ElementwiseOperation can be fused(linked) to another operation.
	// field linked_ indicate about this
	// link_index_ used mostly for generating of correct names for
	// linked code variables
	// link_index_ is number of operation in sequence of linked operations
	// and should be unique in this sequence
	// link_index_ = 0 is equivalent that operation not linked.
	class ElementwiseOperation : public GPUOperation {
	public:
	ElementwiseOperation() {}
	explicit ElementwiseOperation(const OperationDef& definition)
	: GPUOperation(definition) {}

	virtual ~ElementwiseOperation() {}

	absl::Status Compile(const CreationContext& creation_context) override;
	absl::Status BindArguments() override;
	int3 GetGridSize() const override;

	// Move only
	ElementwiseOperation(ElementwiseOperation&& operation);
	ElementwiseOperation& operator=(ElementwiseOperation&& operation);
	ElementwiseOperation(const ElementwiseOperation&) = delete;
	ElementwiseOperation& operator=(const ElementwiseOperation&) = delete;

	virtual absl::Status SetArgs(const std::string& unique_postfix,
	Arguments* args) {
	return absl::OkStatus();
	}

	Arguments&& MoveArgs() { return std::move(args_); }
	std::string GetCode() const { return code_; }

	// ovveride to return false if for any reason operation can not be linked.
	virtual bool IsLinkable() const { return true; }

	protected:
	bool check_src_channels_size_ = false;
	std::string code_;
	};

	absl::Status MergeOperations(
	const std::vector<ElementwiseOperation*>& linked_ops,
	Arguments* merged_args, std::string* merged_code);

	} // namespace cl
	} // namespace gpu
	} // namespace tflite

	#endif // TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_GPU_OPERATION_H_