blob: be45cdf4b3a909b80e9f8165b2b37db7debc057e [file] [log] [blame]
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_DELEGATES_GPU_METAL_COMPUTE_TASK_DESCRIPTOR_H_
#define TENSORFLOW_LITE_DELEGATES_GPU_METAL_COMPUTE_TASK_DESCRIPTOR_H_
#include <cstdint>
#include <functional>
#include <map>
#include <string>
#include <utility>
#include <vector>
#include "tensorflow/lite/delegates/gpu/common/model.h"
#include "tensorflow/lite/delegates/gpu/common/shape.h"
#include "tensorflow/lite/delegates/gpu/common/types.h"
#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
namespace tflite {
namespace gpu {
namespace metal {
using OutputDimensions =
std::function<BHWC(const std::map<ValueId, BHWC>& buffers)>;
using UniformsFunction =
std::function<std::vector<uint8_t>(const std::map<ValueId, BHWC>& buffers)>;
using DispatchParamsFunction = std::function<std::pair<uint3, uint3>(
const std::map<ValueId, BHWC>& buffers)>;
// Compute task descriptor contains a linkable shader code or a code for
// complete shader to which other linkable can be attached or not. An operation
// can produce one or more descriptors and graph compiler uses descriptors as
// building blocks. All required data like immutable operation parameters
// (weights etc.) is attached to the descriptor.
struct ComputeTaskDescriptor {
struct InputBufferDescriptor {
ValueId id;
// The declaration is inserted into the compute function arguments list.
// Example for non-linkable task: "device FLT4* const input_buffer"
// Example for linkable: "device FLT4* const"
std::string declaration;
};
struct OutputBufferDescriptor {
ValueId id;
// The declaration is inserted into the compute function arguments list.
// Example for non-linkable task: "device FLT4* output_buffer"
// Example for linkable: "device FLT4*"
std::string declaration;
// Multiple outputs are allowed from a linkable operation so after fusion
// each buffer's dimensions are calculated separately from different
// operations.
OutputDimensions dimensions_function;
// Fusion absorbs intermediate tensors. Keep this ids to properly store
// output dimensions.
std::vector<ValueId> alias;
};
struct ImmutableBufferDescriptor {
std::string declaration;
std::vector<uint8_t> data;
};
// Uniforms are recalculated at any setInputDimensions call.
struct UniformBufferDescriptor {
// The declaration is inserted into the compute function arguments list.
// Example: "constant uint4& some_uniforms"
std::string declaration;
// This function re-calculates uniforms for specific input dimensions.
UniformsFunction data_function;
};
// Unique ID to match the graph compilation errors.
int id;
bool is_linkable;
// A linkable function or a full shader source with 3 parameters $ for
// substitute function. Example of linkable: "(FLT4 linkable$0(FLT4 value, int
// linear_index) { return value; })" Example of non-linkable function:
// #include <metal_stdlib>
// using namespace metal;
// $0
// kernel void ComputeFunction(
// $1
// uint3 gid[[thread_position_in_grid]]) {
// if (int(gid.x) >= size.x || int(gid.y) >= size.y) {
// return;
// }
// const int linear_index = (gid.z * size.y + gid.y) * size.x + gid.x;
// FLT4 value = input_buffer[linear_index] + 1.0f;
// $2
// output_buffer[linear_index] = value;
// }
std::string shader_source;
std::vector<InputBufferDescriptor> input_buffers;
// A single per-operation output is supported now.
OutputBufferDescriptor output_buffer;
std::vector<ImmutableBufferDescriptor> immutable_buffers;
std::vector<UniformBufferDescriptor> uniform_buffers;
// Dynamic resizing of input tensor is supported. User-defined functions to
// calculate new parameters for GPU compute task dispatching. A leading
// unlinkable task must provide this.
DispatchParamsFunction resize_function;
};
using ComputeTaskDescriptorPtr = std::shared_ptr<ComputeTaskDescriptor>;
/// Helper function to convert buffer's content into stream of bytes
template <typename T>
std::vector<uint8_t> GetByteBuffer(const std::vector<T>& input_vector) {
std::vector<uint8_t> result;
result.insert(result.begin(),
reinterpret_cast<const uint8_t*>(input_vector.data()),
reinterpret_cast<const uint8_t*>(input_vector.data()) +
input_vector.size() * sizeof(*input_vector.data()));
return result;
}
/// Converts float to destination type (if needed) and stores as bytes array.
std::vector<uint8_t> GetByteBufferConverted(
const std::vector<float>& input_vector,
RuntimeOptions::Precision destination_type);
/// Resizes, Converts float to destination type (if needed) and stores as bytes
/// array.
std::vector<uint8_t> GetByteBufferConvertedResized(
const std::vector<float>& input_vector,
RuntimeOptions::Precision destination_type, size_t elements_count);
} // namespace metal
} // namespace gpu
} // namespace tflite
#endif // TENSORFLOW_LITE_DELEGATES_GPU_METAL_COMPUTE_TASK_DESCRIPTOR_H_