blob: a40f38fcfc390714e2c4d25939539ef5fc04e33b [file] [log] [blame]
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/tools/optimize/operator_property.h"
#include "tensorflow/lite/schema/schema_generated.h"
#include "tensorflow/lite/schema/schema_utils.h"
namespace tflite {
namespace optimize {
namespace operator_property {
namespace {
const OpVariant GetOperatorVariant(const ModelT* model, int subgraph_index,
int op_index) {
OpVariant op_variant;
OperatorT* op =
model->subgraphs.at(subgraph_index)->operators[op_index].get();
op_variant.op_code =
GetBuiltinCode(model->operator_codes[op->opcode_index].get());
if (op_variant.op_code == BuiltinOperator_LSTM ||
op_variant.op_code == BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM) {
if (op->inputs.size() == 5) {
// The 5 input ("basic") LSTM is not supported in this tooling (yet).
op_variant.is_quantizable = false;
return op_variant;
}
const int cell_to_output_weight_index = 11;
const int forget_layer_norm_coefficients_index = 21;
const int projection_weights_index = 16;
op_variant.use_projection = op->inputs[projection_weights_index] != -1;
op_variant.use_peephole = op->inputs[cell_to_output_weight_index] != -1;
if (op->inputs.size() == 20) {
op_variant.use_layer_norm = false;
} else {
op_variant.use_layer_norm =
op->inputs[forget_layer_norm_coefficients_index] != -1;
}
}
return op_variant;
}
} // namespace
OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
int op_index) {
OpVariant op_variant = GetOperatorVariant(model, subgraph_index, op_index);
return GetOperatorProperty(op_variant);
}
// Update operation defintions in TensorFlow Lite dialect accordingly when there
// are any needs on updating the kernel support level.
// LINT.IfChange
OperatorProperty GetOperatorProperty(OpVariant op_variant) {
BuiltinOperator op_code = op_variant.op_code;
OperatorProperty property;
switch (op_code) {
case BuiltinOperator_ABS:
property.inputs = {{0, {}}};
property.outputs = {{0, {}}};
property.version = 2;
break;
case BuiltinOperator_RSQRT:
property.inputs = {{0, {}}};
property.outputs = {{0, {}}};
property.version = 2;
break;
case BuiltinOperator_ADD:
property.inputs = {{0, {}}, {1, {}}};
property.outputs = {{0, {}}};
property.version = 2;
property.quantize_input_as_activations = true;
break;
case BuiltinOperator_ARG_MAX:
property.inputs = {{0, {}}};
// ArgMax has no quantizable output.
property.version = 2;
property.quantizable_int16 = false;
break;
case BuiltinOperator_AVERAGE_POOL_2D:
property.inputs = {{0, {}}};
property.outputs = {{0, {}}};
property.restrict_same_input_output_scale = true;
property.version = 2;
break;
case BuiltinOperator_BATCH_MATMUL: {
property.inputs = {{0, {}}, {1, {}}};
property.outputs = {{0, {}}};
property.version = 2;
property.quantize_input_as_activations = true;
break;
}
case BuiltinOperator_BATCH_TO_SPACE_ND:
case BuiltinOperator_SPACE_TO_BATCH_ND:
case BuiltinOperator_SPACE_TO_DEPTH:
// We skip inputs 1 and 2 since they aren't real valued (they are shapes).
property.inputs = {{0, {}}};
property.outputs = {{0, {}}};
property.restrict_same_input_output_scale = true;
property.version = 2;
property.quantizable_int16 = false;
break;
case BuiltinOperator_BROADCAST_TO:
property.inputs = {{0, {}}};
property.outputs = {{0, {}}};
property.restrict_same_input_output_scale = true;
property.version = 3;
break;
case BuiltinOperator_DEPTH_TO_SPACE:
property.inputs = {{0, {}}};
property.outputs = {{0, {}}};
property.restrict_same_input_output_scale = true;
property.version = 2;
property.quantizable_int16 = false;
break;
case BuiltinOperator_SPLIT:
// We skip input 0 since it is the split dim which is not real valued.
property.inputs = {{1, {}}};
property.arbitrary_outputs = true;
property.restrict_same_input_output_scale = true;
property.version = 2;
break;
case BuiltinOperator_SPLIT_V:
property.inputs = {{0, {}}};
property.arbitrary_outputs = true;
property.restrict_same_input_output_scale = true;
property.version = 2;
break;
case BuiltinOperator_CONCATENATION:
property.arbitrary_inputs = true;
property.outputs = {{0, {}}};
property.restrict_same_input_output_scale = true;
property.version = 2;
break;
case BuiltinOperator_CONV_2D: {
TensorProperty tensor_property;
tensor_property.per_axis = true;
tensor_property.per_axis_index = 0;
tensor_property.symmetric = true;
property.inputs = {{0, {}}, {1, tensor_property}};
property.outputs = {{0, {}}};
property.biases = {2};
property.version = 3;
break;
}
case BuiltinOperator_TRANSPOSE_CONV: {
TensorProperty tensor_property;
tensor_property.per_axis = true;
tensor_property.per_axis_index = 0;
tensor_property.symmetric = true;
property.inputs = {{2, {}}, {1, tensor_property}};
property.outputs = {{0, {}}};
property.biases = {3};
property.version = 3;
break;
}
case BuiltinOperator_DEPTHWISE_CONV_2D: {
TensorProperty tensor_property;
tensor_property.per_axis = true;
tensor_property.per_axis_index = 3;
tensor_property.symmetric = true;
property.inputs = {
{0, {}},
{1, tensor_property},
};
property.outputs = {{0, {}}};
property.biases = {2};
property.version = 3;
break;
}
case BuiltinOperator_EQUAL:
case BuiltinOperator_NOT_EQUAL:
case BuiltinOperator_GREATER:
case BuiltinOperator_GREATER_EQUAL:
case BuiltinOperator_LESS:
case BuiltinOperator_LESS_EQUAL:
property.inputs = {{0, {}}, {1, {}}};
// Comparisons have no quantizable outputs.
property.version = 2;
property.quantizable_int16 = false;
break;
case BuiltinOperator_EXPAND_DIMS:
// We skip input 1 as it is not real valued (it's the index of axis) and
// hence does not need to be quantized.
property.inputs = {{0, {}}};
property.outputs = {{0, {}}};
property.version = 1;
break;
case BuiltinOperator_FILL: {
property.inputs = {{1, {}}};
property.outputs = {{0, {}}};
property.restrict_same_input_output_scale = true;
property.version = 3;
break;
}
case BuiltinOperator_FULLY_CONNECTED: {
TensorProperty tensor_property;
tensor_property.symmetric = true;
property.inputs = {{0, {}}, {1, tensor_property}};
property.outputs = {{0, {}}};
property.biases = {2};
property.version = 4;
break;
}
case BuiltinOperator_GATHER:
property.inputs = {{0, {}}};
property.outputs = {{0, {}}};
property.restrict_same_input_output_scale = true;
property.quantize_input_as_activations = true;
property.version = 2;
break;
case BuiltinOperator_GATHER_ND:
property.inputs = {{0, {}}};
property.outputs = {{0, {}}};
property.restrict_same_input_output_scale = true;
property.version = 3;
break;
case BuiltinOperator_HARD_SWISH: {
property.inputs = {{0, {}}};
property.outputs = {{0, {}}};
property.version = 1;
property.quantizable_int16 = false;
break;
}
case BuiltinOperator_LOG_SOFTMAX: {
property.inputs = {{0, {}}};
// LogSoftmax requires output with 16/256 as scale and 127 as zero point.
TensorProperty tensor_property;
tensor_property.restriction = true;
tensor_property.restricted_value_int8 = {16.0f / 256.0f, 127};
property.outputs = {{0, tensor_property}};
property.version = 2;
property.quantizable_int16 = false;
break;
}
case BuiltinOperator_LOGISTIC: {
property.inputs = {{0, {}}};
// Logistic requires output with 1/256 as scale and -128 as zero point.
TensorProperty tensor_property;
tensor_property.restriction = true;
tensor_property.restricted_value_int8 = {1 / 256.0f, -128};
tensor_property.restricted_value_int16 = {1 / 32768.0f, 0};
property.outputs = {{0, tensor_property}};
property.version = 2;
break;
}
case BuiltinOperator_LSTM:
case BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM: {
if (!op_variant.is_quantizable) {
// Early exist for 5 input LSTM.
// It is not supported in this tooling yet.
property.quantizable = false;
break;
}
// LSTM needs 5 intermediate tensors. This agrees with the fully quantized
// kernels in lstm_eval.cc
if (op_variant.use_layer_norm && op_variant.use_projection &&
op_variant.use_peephole) {
static const float alpha = static_cast<float>(std::pow(2, -10));
TensorProperty tensor_property_9;
tensor_property_9.number_of_bits = 16;
tensor_property_9.symmetric = true;
TensorProperty tensor_property_12;
tensor_property_12.use_derived_scale = true;
tensor_property_12.number_of_bits = 32;
tensor_property_12.derived_scale = {{20}, {}, {alpha}};
TensorProperty tensor_property_13;
tensor_property_13.use_derived_scale = true;
tensor_property_13.number_of_bits = 32;
tensor_property_13.derived_scale = {{21}, {}, {alpha}};
TensorProperty tensor_property_14;
tensor_property_14.use_derived_scale = true;
tensor_property_14.number_of_bits = 32;
tensor_property_14.derived_scale = {{22}, {}, {alpha}};
TensorProperty tensor_property_15;
tensor_property_15.use_derived_scale = true;
tensor_property_15.number_of_bits = 32;
tensor_property_15.derived_scale = {{23}, {}, {alpha}};
TensorProperty tensor_property_17;
tensor_property_17.use_derived_scale = true;
tensor_property_17.number_of_bits = 32;
tensor_property_17.derived_scale = {{16}, {4}, {}};
TensorProperty tensor_property_19;
tensor_property_19.extend_to_power_of_two = true;
tensor_property_19.number_of_bits = 16;
tensor_property_19.state_tensor = true;
tensor_property_19.symmetric = true;
TensorProperty tensor_property_20;
tensor_property_20.number_of_bits = 16;
tensor_property_20.symmetric = true;
property.inputs = {
{0, {}},
{1, {}},
{2, {}},
{3, {}},
{4, {}},
{5, {}},
{6, {}},
{7, {}},
{8, {}},
{9, tensor_property_9},
{10, tensor_property_9},
{11, tensor_property_9},
{16, {}},
{19, tensor_property_19},
{20, tensor_property_20},
{21, tensor_property_20},
{22, tensor_property_20},
{23, tensor_property_20},
{12, tensor_property_12},
{13, tensor_property_13},
{14, tensor_property_14},
{15, tensor_property_15},
{17, tensor_property_17},
};
property.outputs = {{0, {}}};
property.intermediates = {
{0, tensor_property_20},
{1, tensor_property_20},
{2, tensor_property_20},
{3, tensor_property_20},
{4, {}},
};
property.restrict_scale = {{18, 0}};
property.version = 2;
}
if (op_variant.use_layer_norm && op_variant.use_projection &&
!op_variant.use_peephole) {
static const float alpha = static_cast<float>(std::pow(2, -10));
TensorProperty tensor_property_12;
tensor_property_12.use_derived_scale = true;
tensor_property_12.number_of_bits = 32;
tensor_property_12.derived_scale = {{20}, {}, {alpha}};
TensorProperty tensor_property_13;
tensor_property_13.use_derived_scale = true;
tensor_property_13.number_of_bits = 32;
tensor_property_13.derived_scale = {{21}, {}, {alpha}};
TensorProperty tensor_property_14;
tensor_property_14.use_derived_scale = true;
tensor_property_14.number_of_bits = 32;
tensor_property_14.derived_scale = {{22}, {}, {alpha}};
TensorProperty tensor_property_15;
tensor_property_15.use_derived_scale = true;
tensor_property_15.number_of_bits = 32;
tensor_property_15.derived_scale = {{23}, {}, {alpha}};
TensorProperty tensor_property_17;
tensor_property_17.use_derived_scale = true;
tensor_property_17.number_of_bits = 32;
tensor_property_17.derived_scale = {{16}, {4}, {}};
TensorProperty tensor_property_19;
tensor_property_19.extend_to_power_of_two = true;
tensor_property_19.number_of_bits = 16;
tensor_property_19.state_tensor = true;
tensor_property_19.symmetric = true;
TensorProperty tensor_property_20;
tensor_property_20.number_of_bits = 16;
tensor_property_20.symmetric = true;
property.inputs = {
{0, {}},
{1, {}},
{2, {}},
{3, {}},
{4, {}},
{5, {}},
{6, {}},
{7, {}},
{8, {}},
{16, {}},
{19, tensor_property_19},
{20, tensor_property_20},
{21, tensor_property_20},
{22, tensor_property_20},
{23, tensor_property_20},
{12, tensor_property_12},
{13, tensor_property_13},
{14, tensor_property_14},
{15, tensor_property_15},
{17, tensor_property_17},
};
property.outputs = {{0, {}}};
property.intermediates = {
{0, tensor_property_20},
{1, tensor_property_20},
{2, tensor_property_20},
{3, tensor_property_20},
{4, {}},
};
property.restrict_scale = {{18, 0}};
property.version = 2;
}
if (op_variant.use_layer_norm && !op_variant.use_projection &&
op_variant.use_peephole) {
static const float alpha = static_cast<float>(std::pow(2, -10));
TensorProperty tensor_property_9;
tensor_property_9.number_of_bits = 16;
tensor_property_9.symmetric = true;
TensorProperty tensor_property_12;
tensor_property_12.use_derived_scale = true;
tensor_property_12.number_of_bits = 32;
tensor_property_12.derived_scale = {{20}, {}, {alpha}};
TensorProperty tensor_property_13;
tensor_property_13.use_derived_scale = true;
tensor_property_13.number_of_bits = 32;
tensor_property_13.derived_scale = {{21}, {}, {alpha}};
TensorProperty tensor_property_14;
tensor_property_14.use_derived_scale = true;
tensor_property_14.number_of_bits = 32;
tensor_property_14.derived_scale = {{22}, {}, {alpha}};
TensorProperty tensor_property_15;
tensor_property_15.use_derived_scale = true;
tensor_property_15.number_of_bits = 32;
tensor_property_15.derived_scale = {{23}, {}, {alpha}};
TensorProperty tensor_property_19;
tensor_property_19.extend_to_power_of_two = true;
tensor_property_19.number_of_bits = 16;
tensor_property_19.state_tensor = true;
tensor_property_19.symmetric = true;
TensorProperty tensor_property_20;
tensor_property_20.number_of_bits = 16;
tensor_property_20.symmetric = true;
property.inputs = {
{0, {}},
{1, {}},
{2, {}},
{3, {}},
{4, {}},
{5, {}},
{6, {}},
{7, {}},
{8, {}},
{9, tensor_property_9},
{10, tensor_property_9},
{11, tensor_property_9},
{19, tensor_property_19},
{20, tensor_property_20},
{21, tensor_property_20},
{22, tensor_property_20},
{23, tensor_property_20},
{12, tensor_property_12},
{13, tensor_property_13},
{14, tensor_property_14},
{15, tensor_property_15},
};
property.outputs = {{0, {}}};
property.intermediates = {
{0, tensor_property_20},
{1, tensor_property_20},
{2, tensor_property_20},
{3, tensor_property_20},
// Without projection, hidden state (4), output (0) and input
// activation state (18) are the same except that the very first
// inference of input activation is not captured in hidden and
// output.
// This is not an issue because this intermediate tensor is not used
// in the kernel and its quantization parameters are ignored.
{4, {}},
};
property.restrict_scale = {{18, 0}};
property.version = 2;
}
if (op_variant.use_layer_norm && !op_variant.use_projection &&
!op_variant.use_peephole) {
static const float alpha = static_cast<float>(std::pow(2, -10));
TensorProperty tensor_property_12;
tensor_property_12.use_derived_scale = true;
tensor_property_12.number_of_bits = 32;
tensor_property_12.derived_scale = {{20}, {}, {alpha}};
TensorProperty tensor_property_13;
tensor_property_13.use_derived_scale = true;
tensor_property_13.number_of_bits = 32;
tensor_property_13.derived_scale = {{21}, {}, {alpha}};
TensorProperty tensor_property_14;
tensor_property_14.use_derived_scale = true;
tensor_property_14.number_of_bits = 32;
tensor_property_14.derived_scale = {{22}, {}, {alpha}};
TensorProperty tensor_property_15;
tensor_property_15.use_derived_scale = true;
tensor_property_15.number_of_bits = 32;
tensor_property_15.derived_scale = {{23}, {}, {alpha}};
TensorProperty tensor_property_19;
tensor_property_19.extend_to_power_of_two = true;
tensor_property_19.number_of_bits = 16;
tensor_property_19.state_tensor = true;
tensor_property_19.symmetric = true;
TensorProperty tensor_property_20;
tensor_property_20.number_of_bits = 16;
tensor_property_20.symmetric = true;
property.inputs = {
{0, {}},
{1, {}},
{2, {}},
{3, {}},
{4, {}},
{5, {}},
{6, {}},
{7, {}},
{8, {}},
{19, tensor_property_19},
{20, tensor_property_20},
{21, tensor_property_20},
{22, tensor_property_20},
{23, tensor_property_20},
{12, tensor_property_12},
{13, tensor_property_13},
{14, tensor_property_14},
{15, tensor_property_15},
};
property.outputs = {{0, {}}};
property.intermediates = {
{0, tensor_property_20},
{1, tensor_property_20},
{2, tensor_property_20},
{3, tensor_property_20},
// Without projection, hidden state (4), output (0) and input
// activation state (18) are the same except that the very first
// inference of input activation is not captured in hidden and
// output.
// This is not an issue because this intermediate tensor is not used
// in the kernel and its quantization parameters are ignored.
{4, {}},
};
property.restrict_scale = {{18, 0}};
property.version = 2;
}
if (!op_variant.use_layer_norm && op_variant.use_projection &&
op_variant.use_peephole) {
TensorProperty tensor_property_9;
tensor_property_9.number_of_bits = 16;
tensor_property_9.symmetric = true;
// Without layer norm, we choose to quantize bias with the scale of
// input and its corresponding weight. The other choice will
// be to use the scale of recurrent and its corresponding weight but we
// choose to use the smaller scale, which means higher resolution.
TensorProperty tensor_property_12;
tensor_property_12.use_derived_scale = true;
tensor_property_12.number_of_bits = 32;
tensor_property_12.derived_scale = {{0, 1}, {}, {}};
TensorProperty tensor_property_13;
tensor_property_13.use_derived_scale = true;
tensor_property_13.number_of_bits = 32;
tensor_property_13.derived_scale = {{0, 2}, {}, {}};
TensorProperty tensor_property_14;
tensor_property_14.use_derived_scale = true;
tensor_property_14.number_of_bits = 32;
tensor_property_14.derived_scale = {{0, 3}, {}, {}};
TensorProperty tensor_property_15;
tensor_property_15.use_derived_scale = true;
tensor_property_15.number_of_bits = 32;
tensor_property_15.derived_scale = {{0, 4}, {}, {}};
TensorProperty tensor_property_17;
tensor_property_17.use_derived_scale = true;
tensor_property_17.number_of_bits = 32;
tensor_property_17.derived_scale = {{16}, {4}, {}};
TensorProperty tensor_property_19;
tensor_property_19.extend_to_power_of_two = true;
tensor_property_19.number_of_bits = 16;
tensor_property_19.state_tensor = true;
tensor_property_19.symmetric = true;
property.inputs = {
{0, {}},
{1, {}},
{2, {}},
{3, {}},
{4, {}},
{5, {}},
{6, {}},
{7, {}},
{8, {}},
{9, tensor_property_9},
{10, tensor_property_9},
{11, tensor_property_9},
{16, {}},
{19, tensor_property_19},
{12, tensor_property_12},
{13, tensor_property_13},
{14, tensor_property_14},
{15, tensor_property_15},
{17, tensor_property_17},
};
property.outputs = {{0, {}}};
property.intermediates = {
// Without layer normalization, intermediate tensors 0, 1, 2, 3 are
// not used and their quantization parameters are ignored.
{0, {}},
{1, {}},
{2, {}},
{3, {}},
// Hidden state is quantized as usual.
{4, {}},
};
property.restrict_scale = {{18, 0}};
property.version = 2;
}
if (!op_variant.use_layer_norm && op_variant.use_projection &&
!op_variant.use_peephole) {
// Without layer norm, we choose to quantize bias with the scale of
// input and its corresponding weight. The other choice will
// be to use the scale of recurrent and its corresponding weight but we
// choose to use the smaller scale, which means higher resolution.
TensorProperty tensor_property_12;
tensor_property_12.use_derived_scale = true;
tensor_property_12.number_of_bits = 32;
tensor_property_12.derived_scale = {{0, 1}, {}, {}};
TensorProperty tensor_property_13;
tensor_property_13.use_derived_scale = true;
tensor_property_13.number_of_bits = 32;
tensor_property_13.derived_scale = {{0, 2}, {}, {}};
TensorProperty tensor_property_14;
tensor_property_14.use_derived_scale = true;
tensor_property_14.number_of_bits = 32;
tensor_property_14.derived_scale = {{0, 3}, {}, {}};
TensorProperty tensor_property_15;
tensor_property_15.use_derived_scale = true;
tensor_property_15.number_of_bits = 32;
tensor_property_15.derived_scale = {{0, 4}, {}, {}};
TensorProperty tensor_property_17;
tensor_property_17.use_derived_scale = true;
tensor_property_17.number_of_bits = 32;
tensor_property_17.derived_scale = {{16}, {4}, {}};
TensorProperty tensor_property_19;
tensor_property_19.extend_to_power_of_two = true;
tensor_property_19.number_of_bits = 16;
tensor_property_19.state_tensor = true;
tensor_property_19.symmetric = true;
property.inputs = {
{0, {}},
{1, {}},
{2, {}},
{3, {}},
{4, {}},
{5, {}},
{6, {}},
{7, {}},
{8, {}},
{16, {}},
{19, tensor_property_19},
{12, tensor_property_12},
{13, tensor_property_13},
{14, tensor_property_14},
{15, tensor_property_15},
{17, tensor_property_17},
};
property.outputs = {{0, {}}};
property.intermediates = {
// Without layer normalization, intermediate tensors 0, 1, 2, 3 are
// not used and their quantization parameters are ignored.
{0, {}},
{1, {}},
{2, {}},
{3, {}},
// Hidden state is quantized as usual.
{4, {}},
};
property.restrict_scale = {{18, 0}};
property.version = 2;
}
if (!op_variant.use_layer_norm && !op_variant.use_projection &&
op_variant.use_peephole) {
TensorProperty tensor_property_9;
tensor_property_9.number_of_bits = 16;
tensor_property_9.symmetric = true;
// Without layer norm, we choose to quantize bias with the scale of
// input and its corresponding weight. The other choice will
// be to use the scale of recurrent and its corresponding weight but we
// choose to use the smaller scale, which means higher resolution.
TensorProperty tensor_property_12;
tensor_property_12.use_derived_scale = true;
tensor_property_12.number_of_bits = 32;
tensor_property_12.derived_scale = {{0, 1}, {}, {}};
TensorProperty tensor_property_13;
tensor_property_13.use_derived_scale = true;
tensor_property_13.number_of_bits = 32;
tensor_property_13.derived_scale = {{0, 2}, {}, {}};
TensorProperty tensor_property_14;
tensor_property_14.use_derived_scale = true;
tensor_property_14.number_of_bits = 32;
tensor_property_14.derived_scale = {{0, 3}, {}, {}};
TensorProperty tensor_property_15;
tensor_property_15.use_derived_scale = true;
tensor_property_15.number_of_bits = 32;
tensor_property_15.derived_scale = {{0, 4}, {}, {}};
TensorProperty tensor_property_19;
tensor_property_19.extend_to_power_of_two = true;
tensor_property_19.number_of_bits = 16;
tensor_property_19.state_tensor = true;
tensor_property_19.symmetric = true;
property.inputs = {
{0, {}},
{1, {}},
{2, {}},
{3, {}},
{4, {}},
{5, {}},
{6, {}},
{7, {}},
{8, {}},
{9, tensor_property_9},
{10, tensor_property_9},
{11, tensor_property_9},
{19, tensor_property_19},
{12, tensor_property_12},
{13, tensor_property_13},
{14, tensor_property_14},
{15, tensor_property_15},
};
property.outputs = {{0, {}}};
property.intermediates = {
// Without layer normalization, intermediate tensors 0, 1, 2, 3 are
// not used and their quantization parameters are ignored.
{0, {}},
{1, {}},
{2, {}},
{3, {}},
// Without projection, hidden state (4), output (0) and input
// activation state (18) are the same except that the very first
// inference of input activation is not captured in hidden and
// output.
// This is not an issue because this intermediate tensor is not used
// in the kernel and its quantization parameters are ignored.
{4, {}},
};
property.restrict_scale = {{18, 0}};
property.version = 2;
}
if (!op_variant.use_layer_norm && !op_variant.use_projection &&
!op_variant.use_peephole) {
// Without layer norm, we choose to quantize bias with the scale of
// input and its corresponding weight. The other choice will
// be to use the scale of recurrent and its corresponding weight but we
// choose to use the smaller scale, which means higher resolution.
TensorProperty tensor_property_12;
tensor_property_12.use_derived_scale = true;
tensor_property_12.number_of_bits = 32;
tensor_property_12.derived_scale = {{0, 1}, {}, {}};
TensorProperty tensor_property_13;
tensor_property_13.use_derived_scale = true;
tensor_property_13.number_of_bits = 32;
tensor_property_13.derived_scale = {{0, 2}, {}, {}};
TensorProperty tensor_property_14;
tensor_property_14.use_derived_scale = true;
tensor_property_14.number_of_bits = 32;
tensor_property_14.derived_scale = {{0, 3}, {}, {}};
TensorProperty tensor_property_15;
tensor_property_15.use_derived_scale = true;
tensor_property_15.number_of_bits = 32;
tensor_property_15.derived_scale = {{0, 4}, {}, {}};
TensorProperty tensor_property_19;
tensor_property_19.extend_to_power_of_two = true;
tensor_property_19.number_of_bits = 16;
tensor_property_19.state_tensor = true;
tensor_property_19.symmetric = true;
property.inputs = {
{0, {}},
{1, {}},
{2, {}},
{3, {}},
{4, {}},
{5, {}},
{6, {}},
{7, {}},
{8, {}},
{19, tensor_property_19},
{12, tensor_property_12},
{13, tensor_property_13},
{14, tensor_property_14},
{15, tensor_property_15},
};
property.outputs = {{0, {}}};
property.intermediates = {
// Without layer normalization, intermediate tensors 0, 1, 2, 3 are
// not used and their quantization parameters are ignored.
{0, {}},
{1, {}},
{2, {}},
{3, {}},
// Without projection, hidden state (4), output (0) and input
// activation state (18) are the same except that the very first
// inference of input activation is not captured in hidden and
// output.
// This is not an issue because this intermediate tensor is not used
// in the kernel and its quantization parameters are ignored.
{4, {}},
};
property.restrict_scale = {{18, 0}};
property.version = 2;
}
property.quantizable_int16 = false;
break;
}
case BuiltinOperator_L2_NORMALIZATION: {
property.inputs = {{0, {}}};
// L2 Norm requires output with 1/128 as scale and 0 as zero point.
TensorProperty tensor_property;
tensor_property.restriction = true;
tensor_property.restricted_value_int8 = {1 / 128.0f, 0};
property.outputs = {{0, tensor_property}};
property.version = 2;
property.quantizable_int16 = false;
break;
}
case BuiltinOperator_MAX_POOL_2D:
property.inputs = {{0, {}}};
property.outputs = {{0, {}}};
property.restrict_same_input_output_scale = true;
property.version = 2;
break;
case BuiltinOperator_MAXIMUM:
property.arbitrary_inputs = true;
property.outputs = {{0, {}}};
property.restrict_same_input_output_scale = true;
property.quantize_input_as_activations = true;
property.version = 2;
break;
case BuiltinOperator_MEAN:
property.inputs = {{0, {}}};
property.outputs = {{0, {}}};
property.version = 2;
break;
case BuiltinOperator_MINIMUM:
property.arbitrary_inputs = true;
property.outputs = {{0, {}}};
property.restrict_same_input_output_scale = true;
property.quantize_input_as_activations = true;
property.version = 2;
break;
case BuiltinOperator_MUL:
property.inputs = {{0, {}}, {1, {}}};
property.outputs = {{0, {}}};
property.quantize_input_as_activations = true;
property.version = 2;
break;
case BuiltinOperator_PACK:
property.arbitrary_inputs = true;
property.outputs = {{0, {}}};
property.restrict_same_input_output_scale = true;
property.version = 2;
break;
case BuiltinOperator_PAD:
case BuiltinOperator_PADV2:
property.inputs = {{0, {}}};
property.outputs = {{0, {}}};
property.restrict_same_input_output_scale = true;
property.version = 2;
break;
case BuiltinOperator_QUANTIZE:
property.inputs = {{0, {}}};
property.outputs = {{0, {}}};
property.version = 2;
break;
case BuiltinOperator_PRELU:
property.inputs = {{0, {}}, {1, {}}};
property.outputs = {{0, {}}};
property.restrict_same_input_output_scale = false;
property.version = 1;
property.quantizable_int16 = false;
break;
case BuiltinOperator_LEAKY_RELU:
property.inputs = {{0, {}}};
property.outputs = {{0, {}}};
property.version = 2;
break;
case BuiltinOperator_RELU:
case BuiltinOperator_RELU6:
property.inputs = {{0, {}}};
property.outputs = {{0, {}}};
property.version = 2;
break;
case BuiltinOperator_RELU_N1_TO_1:
property.inputs = {{0, {}}};
property.outputs = {{0, {}}};
property.version = 1;
property.quantizable_int16 = false;
break;
case BuiltinOperator_RESHAPE:
property.inputs = {{0, {}}};
property.outputs = {{0, {}}};
property.restrict_same_input_output_scale = true;
property.version = 1;
break;
case BuiltinOperator_RESIZE_BILINEAR:
case BuiltinOperator_RESIZE_NEAREST_NEIGHBOR:
property.inputs = {{0, {}}};
property.outputs = {{0, {}}};
property.restrict_same_input_output_scale = true;
property.version = 2;
break;
case BuiltinOperator_REVERSE_V2:
property.inputs = {{0, {}}};
property.outputs = {{0, {}}};
property.restrict_same_input_output_scale = true;
property.version = 3;
break;
case BuiltinOperator_SCATTER_ND:
property.inputs = {{1, {}}};
property.outputs = {{0, {}}};
property.restrict_same_input_output_scale = true;
property.version = 1;
break;
case BuiltinOperator_SELECT:
property.inputs = {{1, {}}, {2, {}}};
property.outputs = {{0, {}}};
property.restrict_same_input_output_scale = true;
property.version = 1;
break;
case BuiltinOperator_SHAPE:
property.inputs = {{0, {}}};
// Shape has no quantizable output.
property.version = 1;
break;
case BuiltinOperator_SLICE:
// We skip inputs 1 and 2 since they aren't real valued (they are the
// index and size).
property.inputs = {{0, {}}};
property.outputs = {{0, {}}};
property.restrict_same_input_output_scale = true;
property.version = 2;
break;
case BuiltinOperator_SQUEEZE:
property.inputs = {{0, {}}};
property.outputs = {{0, {}}};
property.restrict_same_input_output_scale = true;
property.version = 1;
break;
case BuiltinOperator_SOFTMAX: {
property.inputs = {{0, {}}};
// Softmax requires output with 1/256 as scale and -128 as zero point.
TensorProperty tensor_property;
tensor_property.restriction = true;
tensor_property.restricted_value_int8 = {1 / 256.0f, -128};
tensor_property.restricted_value_int16 = {1 / 32768.0f, 0};
property.outputs = {{0, tensor_property}};
property.version = 2;
break;
}
case BuiltinOperator_STRIDED_SLICE:
property.inputs = {{0, {}}};
property.outputs = {{0, {}}};
property.restrict_same_input_output_scale = true;
property.version = 2;
break;
case BuiltinOperator_SQUARED_DIFFERENCE:
case BuiltinOperator_SUB:
property.inputs = {{0, {}}, {1, {}}};
property.outputs = {{0, {}}};
property.version = 2;
property.quantize_input_as_activations = true;
break;
case BuiltinOperator_SUM:
property.inputs = {{0, {}}};
property.outputs = {{0, {}}};
property.version = 2;
break;
case BuiltinOperator_TANH: {
property.inputs = {{0, {}}};
// Tanh requires output with 1/128 as scale and 0 as zero point.
TensorProperty tensor_property;
tensor_property.restriction = true;
tensor_property.restricted_value_int8 = {1 / 128.0f, 0};
tensor_property.restricted_value_int16 = {1 / 32768.0f, 0};
property.outputs = {{0, tensor_property}};
property.version = 2;
break;
}
case BuiltinOperator_SVDF: {
TensorProperty tensor_property_time;
// Only 10bits are needed because 6bits are reserved for the reduce
// operation after element-wise multiplication between state and time
// weights.
tensor_property_time.number_of_bits = 10;
TensorProperty tensor_property_bias;
tensor_property_bias.use_derived_scale = true;
tensor_property_bias.number_of_bits = 32;
tensor_property_bias.derived_scale = {{2, 4}, {}, {}};
TensorProperty tensor_property_state;
tensor_property_state.number_of_bits = 16;
tensor_property_state.state_tensor = true;
property.inputs = {{0, {}},
{1, {}},
{2, tensor_property_time},
{4, tensor_property_state},
{3, tensor_property_bias}};
property.outputs = {{0, {}}};
property.version = 3;
property.quantizable_int16 = false;
break;
}
case BuiltinOperator_TILE:
property.inputs = {{0, {}}};
property.outputs = {{0, {}}};
property.restrict_same_input_output_scale = true;
property.version = 3;
break;
case BuiltinOperator_TRANSPOSE:
property.inputs = {{0, {}}};
property.outputs = {{0, {}}};
property.restrict_same_input_output_scale = true;
property.version = 2;
break;
case BuiltinOperator_UNPACK:
property.inputs = {{0, {}}};
property.arbitrary_outputs = true;
property.restrict_same_input_output_scale = true;
property.version = 1;
break;
case BuiltinOperator_MIRROR_PAD:
property.inputs = {{0, {}}};
property.outputs = {{0, {}}};
property.restrict_same_input_output_scale = true;
property.version = 2;
property.quantizable_int16 = false;
break;
case BuiltinOperator_REDUCE_PROD:
property.inputs = {{0, {}}};
property.outputs = {{0, {}}};
property.version = 2;
break;
case BuiltinOperator_REDUCE_MAX:
case BuiltinOperator_REDUCE_MIN:
property.inputs = {{0, {}}};
property.outputs = {{0, {}}};
property.restrict_same_input_output_scale = true;
property.version = 2;
break;
case BuiltinOperator_WHERE:
property.inputs = {{0, {}}};
property.outputs = {{0, {}}};
property.version = 1;
break;
case BuiltinOperator_ASSIGN_VARIABLE:
property.inputs = {{1, {}}};
property.quantize_input_as_activations = true;
property.version = 1;
break;
case BuiltinOperator_READ_VARIABLE:
property.outputs = {{0, {}}};
property.version = 1;
break;
case BuiltinOperator_VAR_HANDLE:
property.version = 1;
break;
default:
// No quantized implementation exists for this operation.
property.quantizable = false;
property.quantizable_int16 = false;
}
return property;
} // NOLINT(readability/fn_size)
// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_ops.td)
} // namespace operator_property
} // namespace optimize
} // namespace tflite