RuntimeOptions replaced with CalculationsPrecision.
PiperOrigin-RevId: 346263927
Change-Id: I73b3035df2490633c526a865069c09209de214ed
diff --git a/tensorflow/lite/delegates/gpu/BUILD b/tensorflow/lite/delegates/gpu/BUILD
index 069230e..2c0080c 100644
--- a/tensorflow/lite/delegates/gpu/BUILD
+++ b/tensorflow/lite/delegates/gpu/BUILD
@@ -92,6 +92,7 @@
"//tensorflow/lite/delegates/gpu/common:model",
"//tensorflow/lite/delegates/gpu/common:model_builder",
"//tensorflow/lite/delegates/gpu/common:model_transformer",
+ "//tensorflow/lite/delegates/gpu/common:precision",
"//tensorflow/lite/delegates/gpu/common:quantization_util",
"//tensorflow/lite/delegates/gpu/common:shape",
"//tensorflow/lite/delegates/gpu/common:status",
diff --git a/tensorflow/lite/delegates/gpu/metal/BUILD b/tensorflow/lite/delegates/gpu/metal/BUILD
index 6dcde34..609bf4f 100644
--- a/tensorflow/lite/delegates/gpu/metal/BUILD
+++ b/tensorflow/lite/delegates/gpu/metal/BUILD
@@ -26,10 +26,10 @@
deps = [
":compiled_model",
":compute_task_descriptor",
- ":runtime_options",
"//tensorflow/lite/delegates/gpu/common:gpu_info",
"//tensorflow/lite/delegates/gpu/common:model",
"//tensorflow/lite/delegates/gpu/common:operations",
+ "//tensorflow/lite/delegates/gpu/common:precision",
"//tensorflow/lite/delegates/gpu/common:shape",
"//tensorflow/lite/delegates/gpu/common:status",
"//tensorflow/lite/delegates/gpu/common:util",
@@ -163,8 +163,8 @@
":common",
":compute_task_descriptor",
":metal_arguments",
- ":runtime_options",
"//tensorflow/lite/delegates/gpu/common:model",
+ "//tensorflow/lite/delegates/gpu/common:precision",
"//tensorflow/lite/delegates/gpu/common:shape",
"//tensorflow/lite/delegates/gpu/common:status",
"//tensorflow/lite/delegates/gpu/common:types",
@@ -211,9 +211,9 @@
":compiled_model",
":compute_task",
":compute_task_descriptor",
- ":runtime_options",
"//tensorflow/lite/delegates/gpu/common:memory_management",
"//tensorflow/lite/delegates/gpu/common:model",
+ "//tensorflow/lite/delegates/gpu/common:precision",
"//tensorflow/lite/delegates/gpu/common:shape",
"//tensorflow/lite/delegates/gpu/common:status",
"//tensorflow/lite/delegates/gpu/common:util",
@@ -292,11 +292,6 @@
],
)
-cc_library(
- name = "runtime_options",
- hdrs = ["runtime_options.h"],
-)
-
objc_library(
name = "TestBinary",
testonly = 1,
@@ -342,7 +337,6 @@
"//tensorflow/lite/delegates/gpu/metal:common",
"//tensorflow/lite/delegates/gpu/metal:inference_context",
"//tensorflow/lite/delegates/gpu/metal:metal_spatial_tensor",
- "//tensorflow/lite/delegates/gpu/metal:runtime_options",
"//tensorflow/lite/delegates/gpu/metal/kernels:test_util",
"@com_google_absl//absl/memory",
],
diff --git a/tensorflow/lite/delegates/gpu/metal/api.cc b/tensorflow/lite/delegates/gpu/metal/api.cc
index b04632b..ff8bedf 100644
--- a/tensorflow/lite/delegates/gpu/metal/api.cc
+++ b/tensorflow/lite/delegates/gpu/metal/api.cc
@@ -48,7 +48,6 @@
#include "tensorflow/lite/delegates/gpu/metal/kernels/space_to_depth.h"
#include "tensorflow/lite/delegates/gpu/metal/kernels/transpose_conv.h"
#include "tensorflow/lite/delegates/gpu/metal/kernels/winograd.h"
-#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
namespace tflite {
namespace gpu {
@@ -183,7 +182,7 @@
const std::vector<ValueId>& inputs,
const std::vector<ValueId>& outputs,
const GpuInfo& gpu_info,
- const RuntimeOptions& options,
+ CalculationsPrecision precision,
int* last_value_id,
std::map<ValueId, BHWC>* tensor_shapes,
std::vector<NodeDescriptor>* nodes) {
@@ -199,15 +198,7 @@
node_desc.src_tensors_ids = inputs;
node_desc.dst_tensors_ids = outputs;
OperationDef op_def;
- if (options.storage_precision == RuntimeOptions::Precision::FP32) {
- op_def.precision = CalculationsPrecision::F32;
- } else {
- if (options.accumulator_precision == RuntimeOptions::Precision::FP32) {
- op_def.precision = CalculationsPrecision::F32_F16;
- } else {
- op_def.precision = CalculationsPrecision::F16;
- }
- }
+ op_def.precision = precision;
DataType data_type = DeduceDataTypeFromPrecision(op_def.precision);
TensorDescriptor tensor_descriptor =
TensorDescriptor{data_type, TensorStorageType::BUFFER, Layout::HWC};
@@ -536,7 +527,7 @@
} // namespace
absl::Status Compile(const GraphFloat32& graph, const GpuInfo& gpu_info,
- const RuntimeOptions& options,
+ CalculationsPrecision precision,
CompiledModel* compiled_model) {
int last_value_id = 0;
for (const auto& value : graph.values()) {
@@ -555,11 +546,11 @@
}
std::vector<NodeDescriptor> node_descs;
std::vector<ComputeTaskDescriptorPtr> custom_tasks;
- auto custom_status =
- RegisterCustomOps(graph, node, inputs, outputs, options, &custom_tasks);
+ auto custom_status = RegisterCustomOps(graph, node, inputs, outputs,
+ precision, &custom_tasks);
if (!custom_status.ok()) {
auto primary_status = RegisterPrimaryOps(
- graph, node, inputs, outputs, gpu_info, options, &last_value_id,
+ graph, node, inputs, outputs, gpu_info, precision, &last_value_id,
&compiled_model->tensor_shapes, &node_descs);
if (!primary_status.ok()) {
return absl::UnimplementedError(
diff --git a/tensorflow/lite/delegates/gpu/metal/api.h b/tensorflow/lite/delegates/gpu/metal/api.h
index f7cdfa4..a2ef5c2 100644
--- a/tensorflow/lite/delegates/gpu/metal/api.h
+++ b/tensorflow/lite/delegates/gpu/metal/api.h
@@ -18,9 +18,9 @@
#include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
#include "tensorflow/lite/delegates/gpu/common/model.h"
+#include "tensorflow/lite/delegates/gpu/common/precision.h"
#include "tensorflow/lite/delegates/gpu/common/status.h"
#include "tensorflow/lite/delegates/gpu/metal/compiled_model.h"
-#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
namespace tflite {
namespace gpu {
@@ -28,7 +28,7 @@
// Builds CompiledModel out of GraphFloat32 graph using provided RuntimeOptions.
absl::Status Compile(const GraphFloat32& graph, const GpuInfo& gpu_info,
- const RuntimeOptions& options,
+ CalculationsPrecision precision,
CompiledModel* compiled_model);
} // namespace metal
diff --git a/tensorflow/lite/delegates/gpu/metal/compute_task.h b/tensorflow/lite/delegates/gpu/metal/compute_task.h
index b3c32f4..73e9d81 100644
--- a/tensorflow/lite/delegates/gpu/metal/compute_task.h
+++ b/tensorflow/lite/delegates/gpu/metal/compute_task.h
@@ -24,17 +24,17 @@
#include <vector>
#include "tensorflow/lite/delegates/gpu/common/model.h"
+#include "tensorflow/lite/delegates/gpu/common/precision.h"
#include "tensorflow/lite/delegates/gpu/common/shape.h"
#include "tensorflow/lite/delegates/gpu/common/status.h"
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
-#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
@interface TFLComputeTask : NSObject
/// Returns empty string or error if shader can't be compiled.
- (absl::Status)compileWithDevice:(id<MTLDevice>)device
taskDescriptor:(const tflite::gpu::metal::NodeDescriptor&)desc
- runtimeOptions:(const ::tflite::gpu::metal::RuntimeOptions&)options;
+ precision:(tflite::gpu::CalculationsPrecision)precision;
/// Updates parameters for inputs/outputs/intermediate tensors
- (absl::Status)updateParamsWithDevice:(id<MTLDevice>)device
diff --git a/tensorflow/lite/delegates/gpu/metal/compute_task.mm b/tensorflow/lite/delegates/gpu/metal/compute_task.mm
index 62a6a61..388ca95 100644
--- a/tensorflow/lite/delegates/gpu/metal/compute_task.mm
+++ b/tensorflow/lite/delegates/gpu/metal/compute_task.mm
@@ -26,7 +26,6 @@
#include "tensorflow/lite/delegates/gpu/common/types.h"
#include "tensorflow/lite/delegates/gpu/common/util.h"
#include "tensorflow/lite/delegates/gpu/metal/common.h"
-#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
using ::tflite::gpu::AlignByN;
using ::tflite::gpu::BHWC;
@@ -34,7 +33,7 @@
using ::tflite::gpu::metal::ComputeTaskDescriptorPtr;
using ::tflite::gpu::metal::CreateComputeProgram;
using ::tflite::gpu::metal::DispatchParamsFunction;
-using ::tflite::gpu::metal::RuntimeOptions;
+using ::tflite::gpu::CalculationsPrecision;
using ::tflite::gpu::metal::UniformsFunction;
using ::tflite::gpu::uint3;
using ::tflite::gpu::ValueId;
@@ -73,7 +72,7 @@
- (absl::Status)compileWithDevice:(id<MTLDevice>)device
taskDescriptor:(const tflite::gpu::metal::NodeDescriptor&)desc
- runtimeOptions:(const RuntimeOptions&)options {
+ precision:(CalculationsPrecision)precision; {
size_t offset = desc.task->src_tensors_names.size() + desc.task->uniform_buffers.size()
+ desc.task->immutable_buffers.size() + 1;
RETURN_IF_ERROR(_metal_args.Init(device, offset, &desc.task->args, &desc.task->shader_source));
@@ -90,13 +89,13 @@
NSString* toAccumulatorType2 = @"";
NSString* toAccumulatorType3 = @"";
NSString* toAccumulatorType4 = @"";
- if (options.storage_precision == RuntimeOptions::Precision::FP32) {
+ if (precision == CalculationsPrecision::F32) {
storageType = @"float";
accumulatorType = @"float";
} else {
// FP16
storageType = @"half";
- if (options.accumulator_precision == RuntimeOptions::Precision::FP32) {
+ if (precision == CalculationsPrecision::F32_F16) {
accumulatorType = @"float";
toAccumulatorType = @"float";
toAccumulatorType2 = @"float2";
@@ -136,10 +135,9 @@
_uniformBuffers.emplace_back(UniformBuffer{{}, uniform.data_function});
}
_outputBuffers.emplace_back(OutputBuffer{desc.dst_tensors_ids[0], nil});
+ const bool f32_storage = precision == CalculationsPrecision::F32;
for (auto& immutable : desc.task->immutable_buffers) {
- int padding =
- 4 * (options.storage_precision == RuntimeOptions::Precision::FP32 ? sizeof(float)
- : sizeof(HalfBits));
+ int padding = 4 * (f32_storage ? sizeof(float) : sizeof(HalfBits));
int paddedSize = AlignByN(immutable.data.size(), padding);
immutable.data.resize(paddedSize);
id<MTLBuffer> metalBuffer = [device newBufferWithBytes:immutable.data.data()
diff --git a/tensorflow/lite/delegates/gpu/metal/inference_context.h b/tensorflow/lite/delegates/gpu/metal/inference_context.h
index c215a91..f5d03fb 100644
--- a/tensorflow/lite/delegates/gpu/metal/inference_context.h
+++ b/tensorflow/lite/delegates/gpu/metal/inference_context.h
@@ -23,11 +23,11 @@
#include <vector>
#include "tensorflow/lite/delegates/gpu/common/model.h"
+#include "tensorflow/lite/delegates/gpu/common/precision.h"
#include "tensorflow/lite/delegates/gpu/common/shape.h"
#include "tensorflow/lite/delegates/gpu/common/status.h"
#include "tensorflow/lite/delegates/gpu/metal/compiled_model.h"
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
-#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
/// Stages of model preprocessing:
/// 1. Operations' initialization. All operations are initialized and added into
@@ -56,7 +56,7 @@
model:(const tflite::gpu::metal::CompiledModel&)compiledModel
inputBufferIDs:(const std::vector<tflite::gpu::ValueId>&)inputBufferIDs
outputBufferIDs:(const std::vector<tflite::gpu::ValueId>&)outputBufferIDs
- runtimeOptions:(const tflite::gpu::metal::RuntimeOptions&)options;
+ precision:(tflite::gpu::CalculationsPrecision)precision;
/// Inserts all GPU compute tasks into the command encoder.
/// @param inputOutputBuffers Must be created and passed into the method with pairs ID:buffer
diff --git a/tensorflow/lite/delegates/gpu/metal/inference_context.mm b/tensorflow/lite/delegates/gpu/metal/inference_context.mm
index 84322a4..5512dd3 100644
--- a/tensorflow/lite/delegates/gpu/metal/inference_context.mm
+++ b/tensorflow/lite/delegates/gpu/metal/inference_context.mm
@@ -22,16 +22,16 @@
#include "tensorflow/lite/delegates/gpu/common/memory_management.h"
#include "tensorflow/lite/delegates/gpu/common/memory_management/types.h"
#include "tensorflow/lite/delegates/gpu/common/model.h"
+#include "tensorflow/lite/delegates/gpu/common/precision.h"
#include "tensorflow/lite/delegates/gpu/common/shape.h"
#include "tensorflow/lite/delegates/gpu/common/status.h"
#include "tensorflow/lite/delegates/gpu/common/util.h"
#include "tensorflow/lite/delegates/gpu/metal/compute_task.h"
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
-#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
using ::tflite::gpu::BHWC;
using ::tflite::gpu::metal::ComputeTaskDescriptorPtr;
-using ::tflite::gpu::metal::RuntimeOptions;
+using ::tflite::gpu::CalculationsPrecision;
using ::tflite::gpu::ValueId;
using ::tflite::gpu::AlignByN;
using ::tflite::gpu::HalfBits;
@@ -45,7 +45,7 @@
std::vector<ValueId> _inputIds;
std::vector<ValueId> _outputIds;
id<MTLDevice> _device;
- RuntimeOptions _options;
+ CalculationsPrecision _precision;
std::map<ValueId, BHWC> _tensorShapes;
}
@@ -53,17 +53,17 @@
model:(const tflite::gpu::metal::CompiledModel&) compiledModel
inputBufferIDs:(const std::vector<tflite::gpu::ValueId>&)inputBufferIDs
outputBufferIDs:(const std::vector<tflite::gpu::ValueId>&)outputBufferIDs
- runtimeOptions:(const RuntimeOptions&)options {
+ precision:(tflite::gpu::CalculationsPrecision)precision {
_device = device;
_inputIds = inputBufferIDs;
_outputIds = outputBufferIDs;
- _options = options;
+ _precision = precision;
// Metal resources are created here.
for (const auto& node : compiledModel.nodes) {
TFLComputeTask* task = [[TFLComputeTask alloc] init];
RETURN_IF_ERROR([task compileWithDevice:_device
taskDescriptor:node
- runtimeOptions:_options]);
+ precision:_precision]);
[task setDescription:node.description];
_computeTasks.emplace_back(task);
}
@@ -119,9 +119,8 @@
RETURN_IF_ERROR(AssignObjectsToTensors(usageRecords, MemoryStrategy::GREEDY_BEST, &assignment));
auto objectsCount = assignment.object_sizes.size();
std::vector<id<MTLBuffer>> sharedBuffers(objectsCount);
- size_t dataTypeSize = _options.storage_precision == RuntimeOptions::Precision::FP32
- ? sizeof(float)
- : sizeof(HalfBits);
+ const bool f32_storage = _precision == CalculationsPrecision::F32;
+ size_t dataTypeSize = f32_storage ? sizeof(float) : sizeof(HalfBits);
// allocate buffers for each shared object
for (size_t i = 0; i < objectsCount; ++i) {
diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/BUILD b/tensorflow/lite/delegates/gpu/metal/kernels/BUILD
index 4dd0ed1..20c59e2 100644
--- a/tensorflow/lite/delegates/gpu/metal/kernels/BUILD
+++ b/tensorflow/lite/delegates/gpu/metal/kernels/BUILD
@@ -174,9 +174,9 @@
hdrs = ["custom_registry.h"],
deps = [
"//tensorflow/lite/delegates/gpu/common:model",
+ "//tensorflow/lite/delegates/gpu/common:precision",
"//tensorflow/lite/delegates/gpu/common:status",
"//tensorflow/lite/delegates/gpu/metal:compute_task_descriptor",
- "//tensorflow/lite/delegates/gpu/metal:runtime_options",
],
)
@@ -814,6 +814,7 @@
"//tensorflow/lite/delegates/gpu/common:gpu_info",
"//tensorflow/lite/delegates/gpu/common:model",
"//tensorflow/lite/delegates/gpu/common:operations",
+ "//tensorflow/lite/delegates/gpu/common:precision",
"//tensorflow/lite/delegates/gpu/common:shape",
"//tensorflow/lite/delegates/gpu/common:status",
"//tensorflow/lite/delegates/gpu/common:tensor",
@@ -823,7 +824,6 @@
"//tensorflow/lite/delegates/gpu/metal:common",
"//tensorflow/lite/delegates/gpu/metal:compiled_model",
"//tensorflow/lite/delegates/gpu/metal:inference_context",
- "//tensorflow/lite/delegates/gpu/metal:runtime_options",
"@FP16",
"@com_google_absl//absl/memory",
],
@@ -897,12 +897,12 @@
deps = [
":test_util",
"//tensorflow/lite/delegates/gpu/common:gpu_info",
+ "//tensorflow/lite/delegates/gpu/common:precision",
"//tensorflow/lite/delegates/gpu/common:shape",
"//tensorflow/lite/delegates/gpu/common:types",
"//tensorflow/lite/delegates/gpu/common:util",
"//tensorflow/lite/delegates/gpu/metal:common",
"//tensorflow/lite/delegates/gpu/metal:inference_context",
- "//tensorflow/lite/delegates/gpu/metal:runtime_options",
],
)
diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/add_test.mm b/tensorflow/lite/delegates/gpu/metal/kernels/add_test.mm
index 22a798c..3facbc4 100644
--- a/tensorflow/lite/delegates/gpu/metal/kernels/add_test.mm
+++ b/tensorflow/lite/delegates/gpu/metal/kernels/add_test.mm
@@ -27,7 +27,6 @@
#include "tensorflow/lite/delegates/gpu/common/util.h"
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
#include "tensorflow/lite/delegates/gpu/metal/kernels/test_util.h"
-#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
using ::tflite::gpu::ElementwiseAttributes;
using ::tflite::gpu::BHWC;
diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/concat_test.mm b/tensorflow/lite/delegates/gpu/metal/kernels/concat_test.mm
index 195a298..6ac084c 100644
--- a/tensorflow/lite/delegates/gpu/metal/kernels/concat_test.mm
+++ b/tensorflow/lite/delegates/gpu/metal/kernels/concat_test.mm
@@ -27,7 +27,6 @@
#include "tensorflow/lite/delegates/gpu/common/util.h"
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
#include "tensorflow/lite/delegates/gpu/metal/kernels/test_util.h"
-#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
using ::tflite::gpu::Axis;
using ::tflite::gpu::BHWC;
diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/conv_test.mm b/tensorflow/lite/delegates/gpu/metal/kernels/conv_test.mm
index 71ea6f9..6775fd3 100644
--- a/tensorflow/lite/delegates/gpu/metal/kernels/conv_test.mm
+++ b/tensorflow/lite/delegates/gpu/metal/kernels/conv_test.mm
@@ -28,7 +28,6 @@
#include "tensorflow/lite/delegates/gpu/common/util.h"
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
#include "tensorflow/lite/delegates/gpu/metal/kernels/test_util.h"
-#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
using ::tflite::gpu::Axis;
using ::tflite::gpu::BHWC;
@@ -286,9 +285,6 @@
}
id<MTLDevice> device = MTLCreateSystemDefaultDevice();
- tflite::gpu::metal::RuntimeOptions options;
- options.storage_precision = tflite::gpu::metal::RuntimeOptions::Precision::FP32;
- options.accumulator_precision = tflite::gpu::metal::RuntimeOptions::Precision::FP32;
std::map<ValueId, TensorFloat32> inputs_v0;
inputs_v0[0] = src_tensor;
diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/custom_registry.cc b/tensorflow/lite/delegates/gpu/metal/kernels/custom_registry.cc
index 620a458..fa97160 100644
--- a/tensorflow/lite/delegates/gpu/metal/kernels/custom_registry.cc
+++ b/tensorflow/lite/delegates/gpu/metal/kernels/custom_registry.cc
@@ -18,9 +18,9 @@
#include <vector>
#include "tensorflow/lite/delegates/gpu/common/model.h"
+#include "tensorflow/lite/delegates/gpu/common/precision.h"
#include "tensorflow/lite/delegates/gpu/common/status.h"
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
-#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
namespace tflite {
namespace gpu {
@@ -29,7 +29,7 @@
absl::Status RegisterCustomOps(const GraphFloat32& graph, const Node* node,
const std::vector<ValueId>& inputs,
const std::vector<ValueId>& outputs,
- const RuntimeOptions& options,
+ CalculationsPrecision precision,
std::vector<ComputeTaskDescriptorPtr>* tasks) {
return absl::UnimplementedError("Unsupported op: " + node->operation.type);
}
diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/custom_registry.h b/tensorflow/lite/delegates/gpu/metal/kernels/custom_registry.h
index eee1632..2f08b74 100644
--- a/tensorflow/lite/delegates/gpu/metal/kernels/custom_registry.h
+++ b/tensorflow/lite/delegates/gpu/metal/kernels/custom_registry.h
@@ -19,9 +19,9 @@
#include <vector>
#include "tensorflow/lite/delegates/gpu/common/model.h"
+#include "tensorflow/lite/delegates/gpu/common/precision.h"
#include "tensorflow/lite/delegates/gpu/common/status.h"
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
-#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
namespace tflite {
namespace gpu {
@@ -31,7 +31,7 @@
absl::Status RegisterCustomOps(const GraphFloat32& graph, const Node* node,
const std::vector<ValueId>& inputs,
const std::vector<ValueId>& outputs,
- const RuntimeOptions& options,
+ CalculationsPrecision precision,
std::vector<ComputeTaskDescriptorPtr>* tasks);
} // namespace metal
diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/depthwise_conv_test.mm b/tensorflow/lite/delegates/gpu/metal/kernels/depthwise_conv_test.mm
index dcf550f..817a371 100644
--- a/tensorflow/lite/delegates/gpu/metal/kernels/depthwise_conv_test.mm
+++ b/tensorflow/lite/delegates/gpu/metal/kernels/depthwise_conv_test.mm
@@ -27,7 +27,6 @@
#include "tensorflow/lite/delegates/gpu/common/util.h"
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
#include "tensorflow/lite/delegates/gpu/metal/kernels/test_util.h"
-#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
using ::tflite::gpu::Axis;
using ::tflite::gpu::BHWC;
diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/elementwise_test.mm b/tensorflow/lite/delegates/gpu/metal/kernels/elementwise_test.mm
index 867ed59..5826e2b 100644
--- a/tensorflow/lite/delegates/gpu/metal/kernels/elementwise_test.mm
+++ b/tensorflow/lite/delegates/gpu/metal/kernels/elementwise_test.mm
@@ -27,7 +27,6 @@
#include "tensorflow/lite/delegates/gpu/common/util.h"
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
#include "tensorflow/lite/delegates/gpu/metal/kernels/test_util.h"
-#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
using ::tflite::gpu::DataType;
using ::tflite::gpu::HWC;
diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/fully_connected_test.mm b/tensorflow/lite/delegates/gpu/metal/kernels/fully_connected_test.mm
index e57f9aa..b6e4cb9 100644
--- a/tensorflow/lite/delegates/gpu/metal/kernels/fully_connected_test.mm
+++ b/tensorflow/lite/delegates/gpu/metal/kernels/fully_connected_test.mm
@@ -27,7 +27,6 @@
#include "tensorflow/lite/delegates/gpu/common/util.h"
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
#include "tensorflow/lite/delegates/gpu/metal/kernels/test_util.h"
-#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
using ::tflite::gpu::BHWC;
using ::tflite::gpu::DataType;
diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/max_unpooling_test.mm b/tensorflow/lite/delegates/gpu/metal/kernels/max_unpooling_test.mm
index cf4aacf..5ee3603 100644
--- a/tensorflow/lite/delegates/gpu/metal/kernels/max_unpooling_test.mm
+++ b/tensorflow/lite/delegates/gpu/metal/kernels/max_unpooling_test.mm
@@ -27,7 +27,6 @@
#include "tensorflow/lite/delegates/gpu/common/util.h"
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
#include "tensorflow/lite/delegates/gpu/metal/kernels/test_util.h"
-#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
using ::tflite::gpu::BHWC;
using ::tflite::gpu::DataType;
diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/mean_test.mm b/tensorflow/lite/delegates/gpu/metal/kernels/mean_test.mm
index 67325c1..e4fa301 100644
--- a/tensorflow/lite/delegates/gpu/metal/kernels/mean_test.mm
+++ b/tensorflow/lite/delegates/gpu/metal/kernels/mean_test.mm
@@ -27,7 +27,6 @@
#include "tensorflow/lite/delegates/gpu/common/util.h"
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
#include "tensorflow/lite/delegates/gpu/metal/kernels/test_util.h"
-#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
using ::tflite::gpu::Axis;
using ::tflite::gpu::BHWC;
diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/padding_test.mm b/tensorflow/lite/delegates/gpu/metal/kernels/padding_test.mm
index 9c55cfc..e8c0ef6 100644
--- a/tensorflow/lite/delegates/gpu/metal/kernels/padding_test.mm
+++ b/tensorflow/lite/delegates/gpu/metal/kernels/padding_test.mm
@@ -27,7 +27,6 @@
#include "tensorflow/lite/delegates/gpu/common/util.h"
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
#include "tensorflow/lite/delegates/gpu/metal/kernels/test_util.h"
-#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
using ::tflite::gpu::BHWC;
using ::tflite::gpu::DataType;
diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/pooling_test.mm b/tensorflow/lite/delegates/gpu/metal/kernels/pooling_test.mm
index d2d95b3..a28dd64 100644
--- a/tensorflow/lite/delegates/gpu/metal/kernels/pooling_test.mm
+++ b/tensorflow/lite/delegates/gpu/metal/kernels/pooling_test.mm
@@ -27,7 +27,6 @@
#include "tensorflow/lite/delegates/gpu/common/util.h"
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
#include "tensorflow/lite/delegates/gpu/metal/kernels/test_util.h"
-#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
using ::tflite::gpu::BHWC;
using ::tflite::gpu::DataType;
diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/prelu_test.mm b/tensorflow/lite/delegates/gpu/metal/kernels/prelu_test.mm
index 1df08be..3a01ca2 100644
--- a/tensorflow/lite/delegates/gpu/metal/kernels/prelu_test.mm
+++ b/tensorflow/lite/delegates/gpu/metal/kernels/prelu_test.mm
@@ -27,7 +27,6 @@
#include "tensorflow/lite/delegates/gpu/common/util.h"
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
#include "tensorflow/lite/delegates/gpu/metal/kernels/test_util.h"
-#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
using ::tflite::gpu::BHWC;
using ::tflite::gpu::DataType;
diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/quantize_and_dequantize_test.mm b/tensorflow/lite/delegates/gpu/metal/kernels/quantize_and_dequantize_test.mm
index 7a16f1d..7eb71bf 100644
--- a/tensorflow/lite/delegates/gpu/metal/kernels/quantize_and_dequantize_test.mm
+++ b/tensorflow/lite/delegates/gpu/metal/kernels/quantize_and_dequantize_test.mm
@@ -25,7 +25,6 @@
#include "tensorflow/lite/delegates/gpu/common/util.h"
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
#include "tensorflow/lite/delegates/gpu/metal/kernels/test_util.h"
-#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
using ::tflite::NudgeQuantizationRange;
diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/relu_test.mm b/tensorflow/lite/delegates/gpu/metal/kernels/relu_test.mm
index 52de77e..d685a8c 100644
--- a/tensorflow/lite/delegates/gpu/metal/kernels/relu_test.mm
+++ b/tensorflow/lite/delegates/gpu/metal/kernels/relu_test.mm
@@ -27,7 +27,6 @@
#include "tensorflow/lite/delegates/gpu/common/util.h"
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
#include "tensorflow/lite/delegates/gpu/metal/kernels/test_util.h"
-#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
using ::tflite::gpu::BHWC;
using ::tflite::gpu::DataType;
diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/reshape_test.mm b/tensorflow/lite/delegates/gpu/metal/kernels/reshape_test.mm
index 684e83b..9a64ef5 100644
--- a/tensorflow/lite/delegates/gpu/metal/kernels/reshape_test.mm
+++ b/tensorflow/lite/delegates/gpu/metal/kernels/reshape_test.mm
@@ -27,7 +27,6 @@
#include "tensorflow/lite/delegates/gpu/common/util.h"
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
#include "tensorflow/lite/delegates/gpu/metal/kernels/test_util.h"
-#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
using ::tflite::gpu::BHWC;
using ::tflite::gpu::DataType;
diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/resize_test.mm b/tensorflow/lite/delegates/gpu/metal/kernels/resize_test.mm
index 082f2c8..f087777 100644
--- a/tensorflow/lite/delegates/gpu/metal/kernels/resize_test.mm
+++ b/tensorflow/lite/delegates/gpu/metal/kernels/resize_test.mm
@@ -27,7 +27,6 @@
#include "tensorflow/lite/delegates/gpu/common/util.h"
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
#include "tensorflow/lite/delegates/gpu/metal/kernels/test_util.h"
-#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
using ::tflite::gpu::BHWC;
using ::tflite::gpu::DataType;
diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/slice_test.mm b/tensorflow/lite/delegates/gpu/metal/kernels/slice_test.mm
index e0c2956..25b45d4 100644
--- a/tensorflow/lite/delegates/gpu/metal/kernels/slice_test.mm
+++ b/tensorflow/lite/delegates/gpu/metal/kernels/slice_test.mm
@@ -27,7 +27,6 @@
#include "tensorflow/lite/delegates/gpu/common/util.h"
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
#include "tensorflow/lite/delegates/gpu/metal/kernels/test_util.h"
-#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
using ::tflite::gpu::BHWC;
using ::tflite::gpu::DataType;
diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/softmax_test.mm b/tensorflow/lite/delegates/gpu/metal/kernels/softmax_test.mm
index 9196e9f..c5b2fd0 100644
--- a/tensorflow/lite/delegates/gpu/metal/kernels/softmax_test.mm
+++ b/tensorflow/lite/delegates/gpu/metal/kernels/softmax_test.mm
@@ -27,7 +27,6 @@
#include "tensorflow/lite/delegates/gpu/common/util.h"
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
#include "tensorflow/lite/delegates/gpu/metal/kernels/test_util.h"
-#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
using ::tflite::gpu::Axis;
using ::tflite::gpu::BHWC;
diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/space_to_depth_test.mm b/tensorflow/lite/delegates/gpu/metal/kernels/space_to_depth_test.mm
index 17e3988..b7c474e 100644
--- a/tensorflow/lite/delegates/gpu/metal/kernels/space_to_depth_test.mm
+++ b/tensorflow/lite/delegates/gpu/metal/kernels/space_to_depth_test.mm
@@ -27,7 +27,6 @@
#include "tensorflow/lite/delegates/gpu/common/tensor.h"
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
#include "tensorflow/lite/delegates/gpu/metal/kernels/test_util.h"
-#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
using ::tflite::gpu::BHWC;
using ::tflite::gpu::DataType;
diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/test_util.h b/tensorflow/lite/delegates/gpu/metal/kernels/test_util.h
index 14b64d3..bf8cbc3 100644
--- a/tensorflow/lite/delegates/gpu/metal/kernels/test_util.h
+++ b/tensorflow/lite/delegates/gpu/metal/kernels/test_util.h
@@ -26,7 +26,6 @@
#include "tensorflow/lite/delegates/gpu/metal/compiled_model.h"
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
#include "tensorflow/lite/delegates/gpu/metal/inference_context.h"
-#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
namespace tflite {
namespace gpu {
diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/test_util.mm b/tensorflow/lite/delegates/gpu/metal/kernels/test_util.mm
index 912910c..75a5522 100644
--- a/tensorflow/lite/delegates/gpu/metal/kernels/test_util.mm
+++ b/tensorflow/lite/delegates/gpu/metal/kernels/test_util.mm
@@ -33,7 +33,7 @@
#include "tensorflow/lite/delegates/gpu/metal/compiled_model.h"
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
#include "tensorflow/lite/delegates/gpu/metal/inference_context.h"
-#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
+#include "tensorflow/lite/delegates/gpu/common/precision.h"
#include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
namespace tflite {
@@ -84,11 +84,9 @@
std::string device_name = std::string([[device name] UTF8String]);
GpuInfo gpu_info;
GetGpuInfoFromDeviceDescription(device_name, GpuApi::kMetal, &gpu_info);
- RuntimeOptions options;
- options.storage_precision = RuntimeOptions::Precision::FP32;
- options.accumulator_precision = RuntimeOptions::Precision::FP32;
+ CalculationsPrecision precision = CalculationsPrecision::F32;
CompiledModel compiled_model;
- RETURN_IF_ERROR(Compile(graph_, gpu_info, options, &compiled_model));
+ RETURN_IF_ERROR(Compile(graph_, gpu_info, precision, &compiled_model));
CompiledModel optimized_model;
RETURN_IF_ERROR(ValidateOptimizeModel(input_ids, output_ids, compiled_model, &optimized_model));
@@ -97,7 +95,7 @@
model:optimized_model
inputBufferIDs:input_ids
outputBufferIDs:output_ids
- runtimeOptions:options]);
+ precision:precision]);
std::map<ValueId, BHWC> input_dimensions;
std::map<ValueId, id<MTLBuffer>> input_buffers;
for (auto& input : inputs_) {
@@ -193,16 +191,14 @@
RETURN_IF_ERROR(
ValidateOptimizeModel(inputBufferIDs, outputBufferIDs, raw_model, &optimized_model));
- RuntimeOptions options;
- options.storage_precision = RuntimeOptions::Precision::FP32;
- options.accumulator_precision = RuntimeOptions::Precision::FP32;
+ CalculationsPrecision precision = CalculationsPrecision::F32;
TFLInferenceContext* graph = [[TFLInferenceContext alloc] init];
RETURN_IF_ERROR([graph compileModelWithDevice:device
model:optimized_model
inputBufferIDs:inputBufferIDs
outputBufferIDs:outputBufferIDs
- runtimeOptions:options]);
+ precision:precision]);
std::map<ValueId, BHWC> inputDimensions;
std::map<ValueId, std::vector<float>> inputBuffersCPU;
std::map<ValueId, id<MTLBuffer>> inputBuffersGPU;
diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/transpose_conv_test.mm b/tensorflow/lite/delegates/gpu/metal/kernels/transpose_conv_test.mm
index 3d716ec..dd5f412 100644
--- a/tensorflow/lite/delegates/gpu/metal/kernels/transpose_conv_test.mm
+++ b/tensorflow/lite/delegates/gpu/metal/kernels/transpose_conv_test.mm
@@ -27,7 +27,6 @@
#include "tensorflow/lite/delegates/gpu/common/util.h"
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
#include "tensorflow/lite/delegates/gpu/metal/kernels/test_util.h"
-#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
using ::tflite::gpu::ConvolutionTransposedAttributes;
using ::tflite::gpu::BHWC;
diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/winograd_test.mm b/tensorflow/lite/delegates/gpu/metal/kernels/winograd_test.mm
index 90d6c2e..7f138a3 100644
--- a/tensorflow/lite/delegates/gpu/metal/kernels/winograd_test.mm
+++ b/tensorflow/lite/delegates/gpu/metal/kernels/winograd_test.mm
@@ -26,7 +26,6 @@
#include "tensorflow/lite/delegates/gpu/common/util.h"
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
#include "tensorflow/lite/delegates/gpu/metal/kernels/test_util.h"
-#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
#include "tensorflow/lite/delegates/gpu/common/winograd_util.h"
using ::tflite::gpu::BHWC;
@@ -151,10 +150,6 @@
}
}
- tflite::gpu::metal::RuntimeOptions options;
- options.storage_precision = tflite::gpu::metal::RuntimeOptions::Precision::FP32;
- options.accumulator_precision = tflite::gpu::metal::RuntimeOptions::Precision::FP32;
-
tflite::gpu::metal::Winograd4x4To36Attributes attr;
attr.padding.prepended = tflite::gpu::HW(1, 1);
attr.padding.appended = tflite::gpu::HW(1, 1);
@@ -229,10 +224,6 @@
attr.biases.shape = tflite::gpu::Linear(1);
attr.biases.data.resize(1, 0.0f);
- tflite::gpu::metal::RuntimeOptions options;
- options.storage_precision = tflite::gpu::metal::RuntimeOptions::Precision::FP32;
- options.accumulator_precision = tflite::gpu::metal::RuntimeOptions::Precision::FP32;
-
tflite::gpu::OperationDef op_def;
op_def.precision = tflite::gpu::CalculationsPrecision::F32;
tflite::gpu::TensorDescriptor tensor_descriptor = tflite::gpu::TensorDescriptor{
@@ -304,10 +295,6 @@
attr.biases.shape = tflite::gpu::Linear(1);
attr.biases.data.resize(1, 0.0f);
- tflite::gpu::metal::RuntimeOptions options;
- options.storage_precision = tflite::gpu::metal::RuntimeOptions::Precision::FP32;
- options.accumulator_precision = tflite::gpu::metal::RuntimeOptions::Precision::FP32;
-
tflite::gpu::OperationDef op_def;
op_def.precision = tflite::gpu::CalculationsPrecision::F32;
tflite::gpu::TensorDescriptor tensor_descriptor = tflite::gpu::TensorDescriptor{
diff --git a/tensorflow/lite/delegates/gpu/metal/runtime_options.h b/tensorflow/lite/delegates/gpu/metal/runtime_options.h
deleted file mode 100644
index d8e8fe3..0000000
--- a/tensorflow/lite/delegates/gpu/metal/runtime_options.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_LITE_DELEGATES_GPU_METAL_RUNTIME_OPTIONS_H_
-#define TENSORFLOW_LITE_DELEGATES_GPU_METAL_RUNTIME_OPTIONS_H_
-
-namespace tflite {
-namespace gpu {
-namespace metal {
-
-struct RuntimeOptions {
- enum class Precision {
- FP16,
- FP32,
- };
- // Buffer storage format. If FP32 then accumulator must be FP32.
- Precision storage_precision = Precision::FP32;
- // Accumulator precision. Defines the precision for convolutions.
- Precision accumulator_precision = Precision::FP32;
-};
-
-} // namespace metal
-} // namespace gpu
-} // namespace tflite
-
-#endif // TENSORFLOW_LITE_DELEGATES_GPU_METAL_RUNTIME_OPTIONS_H_
diff --git a/tensorflow/lite/delegates/gpu/metal_delegate.mm b/tensorflow/lite/delegates/gpu/metal_delegate.mm
index 229f898..27d3f45 100644
--- a/tensorflow/lite/delegates/gpu/metal_delegate.mm
+++ b/tensorflow/lite/delegates/gpu/metal_delegate.mm
@@ -45,10 +45,11 @@
#include "tensorflow/lite/delegates/gpu/metal/compiled_model.h"
#include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
#include "tensorflow/lite/delegates/gpu/metal/inference_context.h"
-#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
+#include "tensorflow/lite/delegates/gpu/common/precision.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/minimal_logging.h"
+
namespace tflite {
namespace gpu {
namespace metal {
@@ -338,19 +339,17 @@
GpuInfo gpu_info;
GetGpuInfoFromDeviceDescription(device_name, GpuApi::kMetal, &gpu_info);
size_t storage_type_size;
- RuntimeOptions runtime_options;
+ CalculationsPrecision precision;
if (options_.allow_precision_loss) {
storage_type_size = sizeof(HalfBits);
- runtime_options.storage_precision = RuntimeOptions::Precision::FP16;
if (gpu_info.IsRoundToNearestSupported()) {
- runtime_options.accumulator_precision = RuntimeOptions::Precision::FP16;
+ precision = CalculationsPrecision::F16;
} else {
- runtime_options.accumulator_precision = RuntimeOptions::Precision::FP32;
+ precision = CalculationsPrecision::F32_F16;
}
} else {
storage_type_size = sizeof(float);
- runtime_options.storage_precision = RuntimeOptions::Precision::FP32;
- runtime_options.accumulator_precision = RuntimeOptions::Precision::FP32;
+ precision = CalculationsPrecision::F32;
}
// TODO(impjdi): Merge logic with above.
@@ -435,7 +434,7 @@
// TODO(impjdi): Merge these.
CompiledModel compiled_model;
- RETURN_IF_ERROR(Compile(graph, gpu_info, runtime_options, &compiled_model));
+ RETURN_IF_ERROR(Compile(graph, gpu_info, precision, &compiled_model));
CompiledModel optimized_model;
RETURN_IF_ERROR(ValidateOptimizeModel(input_ids, output_ids, compiled_model, &optimized_model));
@@ -444,7 +443,7 @@
model:optimized_model
inputBufferIDs:input_ids
outputBufferIDs:output_ids
- runtimeOptions:runtime_options]);
+ precision:precision]);
return absl::OkStatus();
}