blob: 06d78beef05d8bbd00d151db1f971d7f856c3eb7 [file] [log] [blame]
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/delegates/gpu/metal/selectors/operation_selector.h"
#include <vector>
#include "absl/strings/substitute.h"
#include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
#include "tensorflow/lite/delegates/gpu/common/model.h"
#include "tensorflow/lite/delegates/gpu/common/operations.h"
#include "tensorflow/lite/delegates/gpu/common/shape.h"
#include "tensorflow/lite/delegates/gpu/common/status.h"
#include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"
#include "tensorflow/lite/delegates/gpu/common/tasks/add.h"
#include "tensorflow/lite/delegates/gpu/common/tasks/concat_xy.h"
#include "tensorflow/lite/delegates/gpu/common/tasks/concat_z.h"
#include "tensorflow/lite/delegates/gpu/common/tasks/elementwise.h"
#include "tensorflow/lite/delegates/gpu/common/tasks/lstm.h"
#include "tensorflow/lite/delegates/gpu/common/tasks/max_unpooling.h"
#include "tensorflow/lite/delegates/gpu/common/tasks/padding.h"
#include "tensorflow/lite/delegates/gpu/common/tasks/pooling.h"
#include "tensorflow/lite/delegates/gpu/common/tasks/prelu.h"
#include "tensorflow/lite/delegates/gpu/common/tasks/quantize_and_dequantize.h"
#include "tensorflow/lite/delegates/gpu/common/tasks/reduce.h"
#include "tensorflow/lite/delegates/gpu/common/tasks/relu.h"
#include "tensorflow/lite/delegates/gpu/common/tasks/reshape.h"
#include "tensorflow/lite/delegates/gpu/common/tasks/reshapex4.h"
#include "tensorflow/lite/delegates/gpu/common/tasks/resize.h"
#include "tensorflow/lite/delegates/gpu/common/tasks/softmax.h"
#include "tensorflow/lite/delegates/gpu/common/tasks/softmax1x1.h"
#include "tensorflow/lite/delegates/gpu/common/tasks/space_to_depth.h"
#include "tensorflow/lite/delegates/gpu/common/tasks/strided_slice.h"
#include "tensorflow/lite/delegates/gpu/common/tasks/transpose.h"
#include "tensorflow/lite/delegates/gpu/common/util.h"
#include "tensorflow/lite/delegates/gpu/common/winograd_util.h"
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
#include "tensorflow/lite/delegates/gpu/metal/kernels/conv.h"
#include "tensorflow/lite/delegates/gpu/metal/kernels/depthwise_conv.h"
#include "tensorflow/lite/delegates/gpu/metal/kernels/fully_connected.h"
#include "tensorflow/lite/delegates/gpu/metal/kernels/transpose_conv.h"
#include "tensorflow/lite/delegates/gpu/metal/kernels/winograd.h"
#include "tensorflow/lite/delegates/gpu/metal/selectors/default_selector.h"
#include "tensorflow/lite/delegates/gpu/metal/selectors/subgraph.h"
namespace tflite {
namespace gpu {
namespace metal {
namespace {
std::unique_ptr<GPUOperation> SelectDepthWiseConv(
const OperationDef& op_def, const DepthwiseConvolution2DAttributes& attr) {
if (CheckDepthWiseConv3x3Stride1x1Support(attr)) {
auto gpu_op = CreateDepthWiseConv3x3Stride1x1(op_def, attr);
return absl::make_unique<DepthWiseConv3x3Stride1x1>(std::move(gpu_op));
} else if (CheckDepthWiseConv3x3Stride2Support(attr)) {
auto gpu_op = CreateDepthWiseConv3x3Stride2(op_def, attr);
return absl::make_unique<DepthWiseConv3x3Stride2>(std::move(gpu_op));
} else {
auto gpu_op = CreateDepthWiseConvolution(op_def, attr);
return absl::make_unique<DepthWiseConvolution>(std::move(gpu_op));
}
}
absl::Status SelectConcat(const ConcatAttributes& attr,
const std::vector<int>& channels,
const OperationDef& op_def, const GpuInfo& gpu_info,
std::unique_ptr<GPUOperation>* ptr) {
switch (attr.axis) {
case Axis::CHANNELS: {
GPUOperation operation = CreateConcatZ(op_def, channels, gpu_info);
*ptr = absl::make_unique<GPUOperation>(std::move(operation));
return absl::OkStatus();
}
case Axis::BATCH:
case Axis::DEPTH:
case Axis::HEIGHT:
case Axis::WIDTH: {
GPUOperation operation = CreateConcatXY(op_def, attr);
*ptr = absl::make_unique<GPUOperation>(std::move(operation));
return absl::OkStatus();
}
default:
return absl::UnimplementedError("No concat for this axis.");
}
}
std::unique_ptr<GPUOperation> SelectConvolutionTransposed(
const OperationDef& op_def, const ConvolutionTransposedAttributes& attr,
const GpuInfo& gpu_info) {
if (CheckConvolutionTransposed4x4Support(attr)) {
auto gpu_op = CreateConvolutionTransposed4x4(gpu_info, op_def, attr);
return absl::make_unique<ConvolutionTransposed4x4>(std::move(gpu_op));
} else {
auto gpu_op = CreateConvolutionTransposed(gpu_info, op_def, attr);
return absl::make_unique<ConvolutionTransposed>(std::move(gpu_op));
}
}
std::unique_ptr<GPUOperation> SelectLSTM(const OperationDef& op_def,
const GpuInfo& gpu_info) {
return absl::make_unique<GPUOperation>(CreateLSTM(op_def, gpu_info));
}
std::unique_ptr<GPUOperation> SelectMaxUnpooling(
const MaxUnpooling2DAttributes& attr, const OperationDef& op_def) {
return absl::make_unique<GPUOperation>(CreateMaxUnpooling(op_def, attr));
}
void SelectPadding(const PadAttributes& attr, const OperationDef& op_def,
std::unique_ptr<GPUOperation>* ptr) {
GPUOperation operation = CreatePadding(op_def, attr);
*ptr = absl::make_unique<GPUOperation>(std::move(operation));
}
std::unique_ptr<GPUOperation> SelectPooling(const Pooling2DAttributes& attr,
const OperationDef& op_def) {
return absl::make_unique<GPUOperation>(CreatePooling(op_def, attr));
}
std::unique_ptr<GPUOperation> SelectReduce(const std::set<Axis>& axis_to_reduce,
const BHWC& src_shape,
OperationType op_type,
const OperationDef& op_def,
const GpuInfo& gpu_info) {
return absl::make_unique<Reduce>(
CreateReduce(axis_to_reduce, src_shape, op_type, op_def, gpu_info));
}
absl::Status SelectResize(const Resize2DAttributes& attr,
const OperationDef& op_def,
std::unique_ptr<GPUOperation>* ptr) {
Resize operation = CreateResize(op_def, attr);
*ptr = absl::make_unique<Resize>(std::move(operation));
return absl::OkStatus();
}
void SelectReshape(int src_channels, int dst_channels,
const OperationDef& op_def,
std::unique_ptr<GPUOperation>* ptr) {
if (src_channels % 4 == 0 && dst_channels % 4 == 0) {
GPUOperation operation = CreateReshapex4(op_def);
*ptr = absl::make_unique<GPUOperation>(std::move(operation));
} else {
GPUOperation operation = CreateReshape(op_def);
*ptr = absl::make_unique<GPUOperation>(std::move(operation));
}
}
void SelectSoftmax(const BHWC& shape, const OperationDef& op_def,
std::unique_ptr<GPUOperation>* ptr) {
if (shape.w == 1 && shape.h == 1) {
Softmax1x1 operation = CreateSoftmax1x1(op_def);
*ptr = absl::make_unique<Softmax1x1>(std::move(operation));
} else {
GPUOperation operation = CreateSoftmax(op_def);
*ptr = absl::make_unique<GPUOperation>(std::move(operation));
}
}
void SelectSpaceToDepth(const SpaceToDepthAttributes& attr,
const OperationDef& op_def,
std::unique_ptr<GPUOperation>* ptr) {
GPUOperation operation = CreateSpaceToDepth(op_def, attr);
*ptr = absl::make_unique<GPUOperation>(std::move(operation));
}
void SelectStridedSlice(const SliceAttributes& attr, const OperationDef& op_def,
std::unique_ptr<GPUOperation>* ptr) {
StridedSlice operation = CreateStridedSlice(op_def, attr);
*ptr = absl::make_unique<StridedSlice>(std::move(operation));
}
void SelectTranspose(const TransposeAttributes& attr,
const OperationDef& op_def,
std::unique_ptr<GPUOperation>* ptr) {
GPUOperation operation = CreateTranspose(op_def, attr);
*ptr = absl::make_unique<GPUOperation>(std::move(operation));
}
std::unique_ptr<GPUOperation> SelectWinograd4x4To36(
const OperationDef& op_def, const Winograd4x4To36Attributes& attr,
const GpuInfo& gpu_info) {
if (gpu_info.IsApple()) {
auto gpu_op = CreateWinograd4x4To36(op_def, attr);
return absl::make_unique<Winograd4x4To36>(std::move(gpu_op));
} else {
auto gpu_op = CreateWinograd4x4To36TileX6(op_def, attr);
return absl::make_unique<Winograd4x4To36TileX6>(std::move(gpu_op));
}
}
std::unique_ptr<GPUOperation> SelectWinograd36To4x4(
const OperationDef& op_def, const Winograd36To4x4Attributes& attr,
const GpuInfo& gpu_info) {
if (gpu_info.IsApple()) {
auto gpu_op = CreateWinograd36To4x4(op_def, attr);
return absl::make_unique<Winograd36To4x4>(std::move(gpu_op));
} else {
auto gpu_op = CreateWinograd36To4x4Tile4x1(op_def, attr);
return absl::make_unique<Winograd36To4x4Tile4x1>(std::move(gpu_op));
}
}
bool IsRecommendedForWinograd4x4To6x6(const Convolution2DAttributes& attr,
const GpuInfo& gpu_info,
const BHWC& dst_shape) {
const int tiles_x = DivideRoundUp(dst_shape.w, 4);
const int tiles_y = DivideRoundUp(dst_shape.h, 4);
const int total_tiles = tiles_x * tiles_y;
const int src_depth = DivideRoundUp(attr.weights.shape.i, 4);
const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4);
int min_depth = 16;
const int min_tiles = 32;
if (total_tiles >= min_tiles * 8) {
min_depth /= 4;
min_depth = std::max(min_depth, 8);
} else if (total_tiles >= min_tiles * 4) {
min_depth /= 2;
min_depth = std::max(min_depth, 8);
}
const bool recommended_channels =
src_depth >= min_depth && dst_depth >= min_depth;
const bool recommended_hw = total_tiles >= min_tiles;
return recommended_channels && recommended_hw;
}
absl::Status WinogradFromNode(const GpuInfo& gpu_info,
const std::vector<Value*>& inputs,
const std::vector<Value*>& outputs,
const OperationDef& op_def,
const BHWC& input_shape, const BHWC& output_shape,
const Convolution2DAttributes& attr,
GPUOperationsSubgraph* gpu_subgraph) {
if (!IsSuitableForWinograd4x4To6x6(attr)) {
return absl::UnimplementedError("No implementation for this case.");
}
if (!IsRecommendedForWinograd4x4To6x6(attr, gpu_info, output_shape)) {
return absl::UnimplementedError("Not recommended for this case.");
}
const int tiles_x = DivideRoundUp(output_shape.w, 4);
const int tiles_y = DivideRoundUp(output_shape.h, 4);
const BHWC shape_0{input_shape.b, 36, tiles_x * tiles_y, input_shape.c};
const BHWC shape_1{input_shape.b, 36, tiles_x * tiles_y, output_shape.c};
TensorDescriptor tensor_desc = op_def.src_tensors[0];
gpu_subgraph->new_tensors = {{shape_0, tensor_desc}, {shape_1, tensor_desc}};
gpu_subgraph->operations.clear();
gpu_subgraph->operations.resize(3);
OperationDef winograd_up_def;
winograd_up_def.precision = op_def.precision;
winograd_up_def.src_tensors.push_back(op_def.src_tensors[0]);
winograd_up_def.dst_tensors.push_back(op_def.src_tensors[0]);
auto& winograd_up = gpu_subgraph->operations[0];
Winograd4x4To36Attributes wino_up_attr;
wino_up_attr.padding = attr.padding;
winograd_up.operation =
SelectWinograd4x4To36(winograd_up_def, wino_up_attr, gpu_info);
winograd_up.input_ids = {static_cast<int>(inputs[0]->id)};
winograd_up.output_ids = {-1};
OperationDef conv_def;
conv_def.precision = op_def.precision;
conv_def.src_tensors.push_back(op_def.src_tensors[0]);
conv_def.dst_tensors.push_back(op_def.src_tensors[0]);
auto& conv = gpu_subgraph->operations[1];
conv.input_ids = {-1};
conv.output_ids = {-2};
auto gpu_op = ConvolutionWino4x4To6x6(conv_def, shape_1, attr, gpu_info);
conv.task_desc = absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
OperationDef winograd_down_def;
winograd_down_def.precision = op_def.precision;
winograd_down_def.src_tensors.push_back(op_def.src_tensors[0]);
winograd_down_def.dst_tensors.push_back(op_def.dst_tensors[0]);
auto& winograd_down = gpu_subgraph->operations[2];
winograd_down.input_ids = {-2};
winograd_down.output_ids = {static_cast<int>(outputs[0]->id)};
Winograd36To4x4Attributes wino_down_attr;
wino_down_attr.output_shape = outputs[0]->tensor.shape;
wino_down_attr.biases = attr.bias;
winograd_down.operation =
SelectWinograd36To4x4(winograd_down_def, wino_down_attr, gpu_info);
return absl::OkStatus();
}
} // namespace
absl::Status GPUOperationFromNode(const GpuInfo& gpu_info,
const OperationDef& op_def,
const std::vector<Value*>& inputs,
const std::vector<Value*>& outputs,
const Node& node,
GPUOperationsSubgraph* gpu_subgraph) {
GPUOperationWithRefs* gpu_operation =
InitSingleOpSubgraph(inputs, outputs, gpu_subgraph);
auto op_type = OperationTypeFromString(node.operation.type);
switch (op_type) {
case OperationType::ADD: {
if (inputs.size() == 2 &&
(inputs[0]->tensor.shape.c == inputs[1]->tensor.shape.c ||
inputs[1]->tensor.shape.c == 1)) {
GPUOperation operation =
CreateElementwiseTwoInput(op_def, op_type, inputs[1]->tensor.shape);
gpu_operation->operation =
absl::make_unique<GPUOperation>(std::move(operation));
return absl::OkStatus();
} else if (inputs.size() >= 2) {
auto output = outputs[0];
std::vector<int> channels(inputs.size());
for (int i = 0; i < inputs.size(); ++i) {
channels[i] = inputs[i]->tensor.shape.c;
}
GPUOperation operation =
CreateAdd(op_def, channels, output->tensor.shape.c);
gpu_operation->operation =
absl::make_unique<GPUOperation>(std::move(operation));
return absl::OkStatus();
} else if (inputs.size() == 1 && node.operation.attributes.has_value()) {
auto attr =
absl::any_cast<ElementwiseAttributes>(node.operation.attributes);
GPUOperation operation =
CreateElementwise(gpu_info, op_def, op_type, attr);
gpu_operation->operation =
absl::make_unique<GPUOperation>(std::move(operation));
return absl::OkStatus();
}
return absl::UnimplementedError(absl::StrCat(
"No support of ", node.operation.type, " with this parameters"));
}
case OperationType::CONCAT: {
auto attr = absl::any_cast<ConcatAttributes>(node.operation.attributes);
std::vector<int> channels(inputs.size());
for (int i = 0; i < inputs.size(); ++i) {
channels[i] = inputs[i]->tensor.shape.c;
}
return SelectConcat(attr, channels, op_def, gpu_info,
&gpu_operation->operation);
}
case OperationType::CONVOLUTION_2D: {
if (inputs.size() != 1) {
return absl::UnimplementedError(
"Convolution does not support more than 1 runtime tensor");
}
auto attr =
absl::any_cast<Convolution2DAttributes>(node.operation.attributes);
auto input_shape = inputs[0]->tensor.shape;
auto output_shape = outputs[0]->tensor.shape;
if (WinogradFromNode(gpu_info, inputs, outputs, op_def, input_shape,
output_shape, attr, gpu_subgraph)
.ok()) {
return absl::OkStatus();
} else {
auto gpu_op = ConvolutionGeneric(op_def, output_shape, attr, gpu_info);
gpu_operation->task_desc =
absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
}
break;
}
case OperationType::CONVOLUTION_TRANSPOSED:
if (inputs.size() != 1) {
return absl::UnimplementedError(
"Convolution Transposed does not support more than 1 runtime "
"tensor");
}
gpu_operation->operation = SelectConvolutionTransposed(
op_def,
absl::any_cast<ConvolutionTransposedAttributes>(
node.operation.attributes),
gpu_info);
break;
case OperationType::DEPTHWISE_CONVOLUTION:
if (inputs.size() != 1) {
return absl::UnimplementedError(
"DepthWise Convolution does not support more than 1 runtime "
"tensor");
}
gpu_operation->operation = SelectDepthWiseConv(
op_def, absl::any_cast<DepthwiseConvolution2DAttributes>(
node.operation.attributes));
break;
case OperationType::FULLY_CONNECTED: {
FullyConnected gpu_op = CreateFullyConnected(
gpu_info, op_def,
absl::any_cast<FullyConnectedAttributes>(node.operation.attributes));
gpu_operation->operation =
absl::make_unique<FullyConnected>(std::move(gpu_op));
break;
}
case OperationType::LSTM: {
gpu_operation->operation = SelectLSTM(op_def, gpu_info);
return absl::OkStatus();
}
case OperationType::MAX_UNPOOLING_2D: {
auto attr =
absl::any_cast<MaxUnpooling2DAttributes>(node.operation.attributes);
gpu_operation->operation = SelectMaxUnpooling(attr, op_def);
return absl::OkStatus();
}
case OperationType::MEAN: {
auto attr = absl::any_cast<MeanAttributes>(node.operation.attributes);
gpu_operation->operation = SelectReduce(
attr.dims, inputs[0]->tensor.shape, op_type, op_def, gpu_info);
return absl::OkStatus();
}
case OperationType::PAD: {
auto attr = absl::any_cast<PadAttributes>(node.operation.attributes);
SelectPadding(attr, op_def, &gpu_operation->operation);
return absl::OkStatus();
}
case OperationType::POOLING_2D: {
auto attr =
absl::any_cast<Pooling2DAttributes>(node.operation.attributes);
gpu_operation->operation = SelectPooling(attr, op_def);
return absl::OkStatus();
}
case OperationType::PRELU: {
auto attr = absl::any_cast<PReLUAttributes>(node.operation.attributes);
gpu_operation->operation =
absl::make_unique<GPUOperation>(CreatePReLU(gpu_info, op_def, attr));
return absl::OkStatus();
}
case OperationType::REDUCE_MAXIMUM:
case OperationType::REDUCE_MINIMUM:
case OperationType::REDUCE_PRODUCT:
case OperationType::REDUCE_SUM: {
auto attr = absl::any_cast<ReduceAttributes>(node.operation.attributes);
gpu_operation->operation = SelectReduce(
attr.dims, inputs[0]->tensor.shape, op_type, op_def, gpu_info);
return absl::OkStatus();
}
case OperationType::RELU: {
auto attr = absl::any_cast<ReLUAttributes>(node.operation.attributes);
gpu_operation->operation =
absl::make_unique<GPUOperation>(CreateReLU(op_def, attr));
return absl::OkStatus();
}
case OperationType::QUANTIZE_AND_DEQUANTIZE: {
auto attr = absl::any_cast<QuantizeAndDequantizeAttributes>(
node.operation.attributes);
gpu_operation->operation = absl::make_unique<GPUOperation>(
CreateQuantizeAndDequantize(op_def, attr));
return absl::OkStatus();
}
case OperationType::RESHAPE: {
const int src_channels = inputs[0]->tensor.shape.c;
auto attr = absl::any_cast<ReshapeAttributes>(node.operation.attributes);
SelectReshape(src_channels, attr.new_shape.c, op_def,
&gpu_operation->operation);
return absl::OkStatus();
}
case OperationType::RESIZE: {
auto attr = absl::any_cast<Resize2DAttributes>(node.operation.attributes);
return SelectResize(attr, op_def, &gpu_operation->operation);
}
case OperationType::SLICE: {
auto attr = absl::any_cast<SliceAttributes>(node.operation.attributes);
SelectStridedSlice(attr, op_def, &gpu_operation->operation);
return absl::OkStatus();
}
case OperationType::SOFTMAX: {
auto attr = absl::any_cast<SoftmaxAttributes>(node.operation.attributes);
if (attr.axis != Axis::CHANNELS) {
return absl::UnimplementedError(
"Softmax supports only CHANNELS dimension");
}
const auto src_shape = inputs[0]->tensor.shape;
SelectSoftmax(src_shape, op_def, &gpu_operation->operation);
return absl::OkStatus();
}
case OperationType::SPACE_TO_DEPTH: {
auto attr =
absl::any_cast<SpaceToDepthAttributes>(node.operation.attributes);
SelectSpaceToDepth(attr, op_def, &gpu_operation->operation);
return absl::OkStatus();
}
case OperationType::TRANSPOSE: {
auto attr =
absl::any_cast<TransposeAttributes>(node.operation.attributes);
SelectTranspose(attr, op_def, &gpu_operation->operation);
return absl::OkStatus();
}
case OperationType::ABS:
case OperationType::COPY:
case OperationType::COS:
case OperationType::ELU:
case OperationType::EXP:
case OperationType::HARD_SWISH:
case OperationType::LOG:
case OperationType::NEG:
case OperationType::RSQRT:
case OperationType::SIGMOID:
case OperationType::SIN:
case OperationType::SQRT:
case OperationType::SQUARE:
case OperationType::TANH: {
GPUOperation operation =
CreateElementwiseOneInput(gpu_info, op_def, op_type);
gpu_operation->operation =
absl::make_unique<GPUOperation>(std::move(operation));
return absl::OkStatus();
}
case OperationType::DIV:
case OperationType::EQUAL:
case OperationType::GREATER:
case OperationType::GREATER_EQUAL:
case OperationType::LESS:
case OperationType::LESS_EQUAL:
case OperationType::MAXIMUM:
case OperationType::MINIMUM:
case OperationType::MUL:
case OperationType::NOT_EQUAL:
case OperationType::POW:
case OperationType::SQUARED_DIFF:
case OperationType::SUB: {
if (inputs.size() == 2) {
GPUOperation operation =
CreateElementwiseTwoInput(op_def, op_type, inputs[1]->tensor.shape);
gpu_operation->operation =
absl::make_unique<GPUOperation>(std::move(operation));
return absl::OkStatus();
} else if (inputs.size() == 1 && node.operation.attributes.has_value()) {
auto attr =
absl::any_cast<ElementwiseAttributes>(node.operation.attributes);
GPUOperation operation =
CreateElementwise(gpu_info, op_def, op_type, attr);
gpu_operation->operation =
absl::make_unique<GPUOperation>(std::move(operation));
return absl::OkStatus();
}
return absl::UnimplementedError(absl::StrCat(
"No support of ", node.operation.type, " with this parameters"));
}
case OperationType::BATCH_NORMALIZATION:
case OperationType::BATCH_TO_SPACE:
case OperationType::BATCHED_MATMUL:
case OperationType::CONSTANT:
// TODO(b/162763635): implement MeanStddevNormalization for Metal.
case OperationType::MEAN_STDDEV_NORMALIZATION:
case OperationType::SPACE_TO_BATCH:
return absl::UnimplementedError("Unsupported op: " + node.operation.type);
default:
return SelectDefault(gpu_info, op_def, inputs, outputs, node,
gpu_subgraph);
}
return absl::OkStatus();
}
} // namespace metal
} // namespace gpu
} // namespace tflite