blob: 7952484d50b92c8969178aa541ce837b82fe58d5 [file] [log] [blame]
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_TENSOR_H_
#define TENSORFLOW_LITE_DELEGATES_GPU_CL_TENSOR_H_
#include <cstdint>
#include <memory>
#include "absl/types/span.h"
#include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h"
#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
#include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
#include "tensorflow/lite/delegates/gpu/cl/cl_memory.h"
#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
#include "tensorflow/lite/delegates/gpu/cl/util.h"
#include "tensorflow/lite/delegates/gpu/common/data_type.h"
#include "tensorflow/lite/delegates/gpu/common/shape.h"
#include "tensorflow/lite/delegates/gpu/common/status.h"
#include "tensorflow/lite/delegates/gpu/common/tensor.h"
#include "tensorflow/lite/delegates/gpu/common/types.h"
namespace tflite {
namespace gpu {
namespace cl {
class Tensor {
public:
Tensor()
: memory_(nullptr), image_buffer_memory_(nullptr), memory_owner_(true) {}
Tensor(cl_mem memory, bool memory_owner, const BHWC& shape,
const TensorDescriptor& descriptor);
Tensor(cl_mem memory, bool memory_owner, cl_mem image_buffer_memory,
const BHWC& shape, const TensorDescriptor& descriptor);
// Move only
Tensor(Tensor&& tensor);
Tensor& operator=(Tensor&& tensor);
Tensor(const Tensor&) = delete;
Tensor& operator=(const Tensor&) = delete;
virtual ~Tensor() { Release(); }
int Width() const { return shape_.w; }
int Height() const { return shape_.h; }
int Channels() const { return shape_.c; }
int Depth() const { return IntegralDivideRoundUp(shape_.c, 4); }
int Batch() const { return shape_.b; }
int4 GetSizeWithDepth() const {
return int4(shape_.w, shape_.h, shape_.c, Depth());
}
// returns int4(width * batch, height, depth, batch)
int4 GetWBatchedHDB() const {
return int4(shape_.w * shape_.b, shape_.h, Depth(), shape_.b);
}
int4 GetWHDB() const { return int4(shape_.w, shape_.h, Depth(), shape_.b); }
enum DataType DataType() const { return descriptor_.data_type; }
TensorStorageType StorageType() const { return descriptor_.storage_type; }
// for profiling and memory statistics
uint64_t GetMemorySizeInBytes() const;
cl_mem GetMemoryPtr() const;
// This function returns buffer memory ptr for IMAGE_BUFFER instead of image
// memory ptr.
cl_mem GetMemoryPtrForWriting() const;
Status WriteData(CLCommandQueue* queue, const TensorFloat32& src);
Status ReadData(CLCommandQueue* queue, TensorFloat32* dst) const;
private:
Status IsValid(const BHWC& shape) const;
int GetChannelsAlignment() const;
int GetAlignedChannels() const;
Status WriteDataBHWC(absl::Span<const float> in, CLCommandQueue* queue);
Status ReadDataBHWC(absl::Span<float> out, CLCommandQueue* queue) const;
template <typename T>
void DataFromBHWC(absl::Span<const float> src, absl::Span<T> dst) const;
template <typename T>
void DataToBHWC(absl::Span<const T> src, absl::Span<float> dst) const;
// TODO(sorokin) might be bad performance
int GetLinearIndex(int b, int x, int y, int d, int sub_d) const {
switch (descriptor_.storage_type) {
case TensorStorageType::BUFFER:
case TensorStorageType::IMAGE_BUFFER:
case TensorStorageType::TEXTURE_ARRAY:
return (((d * shape_.h + y) * shape_.w + x) * shape_.b + b) * 4 +
sub_d; // DHWBC4
case TensorStorageType::TEXTURE_2D:
return (((y * Depth() + d) * shape_.w + x) * shape_.b + b) * 4 +
sub_d; // HDWBC4
case TensorStorageType::SINGLE_TEXTURE_2D:
return ((y * shape_.w + x) * shape_.b + b) * shape_.c + sub_d; // HWBC
case TensorStorageType::UNKNOWN:
return -1;
}
}
int3 GetFullTensorRegion() const;
void Release();
cl_mem memory_;
cl_mem image_buffer_memory_; // for TensorStorageType::IMAGE_BUFFER only
bool memory_owner_;
BHWC shape_;
TensorDescriptor descriptor_;
};
using TensorPtr = std::shared_ptr<Tensor>;
bool CanCreateTensorWithShape(const CLContext& context, const CLDevice& device,
const BHWC& shape,
const TensorDescriptor& descriptor);
Status AllocateTensorMemory(const CLContext& context, const CLDevice& device,
const BHWC& shape,
const TensorDescriptor& descriptor,
CLMemory* result);
Status CreateTensor(const CLContext& context, const CLDevice& device,
const BHWC& shape, const TensorDescriptor& descriptor,
Tensor* result);
Status CreateSharedTensor(const CLContext& context, const CLDevice& device,
cl_mem memory, const BHWC& shape,
const TensorDescriptor& descriptor, Tensor* result);
} // namespace cl
} // namespace gpu
} // namespace tflite
#endif // TENSORFLOW_LITE_DELEGATES_GPU_CL_TENSOR_H_