blob: f7ce0492866ec067c758cd29c21b60953d64fe6f [file] [log] [blame]
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/kernel_util.h"
#include <stdint.h>
#include <stdlib.h>
#include <algorithm>
#include <complex>
#include <limits>
#include <memory>
#ifndef TF_LITE_STATIC_MEMORY
#include <string>
#endif // TF_LITE_STATIC_MEMORY
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/cppmath.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#if defined(__APPLE__)
#include "TargetConditionals.h"
#endif
namespace tflite {
namespace {
// Assumes tensor_index is a valid index (in bounds)
inline TfLiteTensor* GetTensorAtIndex(const TfLiteContext* context,
int tensor_index) {
if (context->tensors != nullptr) {
return &context->tensors[tensor_index];
} else {
return context->GetTensor(context, tensor_index);
}
}
// Validate in a single place to reduce binary size
inline TfLiteStatus ValidateTensorIndexingSafe(const TfLiteContext* context,
int index, int max_size,
const int* tensor_indices,
int* tensor_index) {
if (index < 0 || index >= max_size) {
TF_LITE_KERNEL_LOG(const_cast<TfLiteContext*>(context),
"Invalid tensor index %d (not in [0, %d))\n", index,
max_size);
return kTfLiteError;
}
if (tensor_indices[index] == kTfLiteOptionalTensor) {
TF_LITE_KERNEL_LOG(const_cast<TfLiteContext*>(context),
"Tensor at index %d was optional but was expected\n",
index);
return kTfLiteError;
}
*tensor_index = tensor_indices[index];
return kTfLiteOk;
}
// Same as above but returns -1 for invalid inputs instead of status + logging
// error.
inline int ValidateTensorIndexing(const TfLiteContext* context, int index,
int max_size, const int* tensor_indices) {
if (index >= 0 && index < max_size) {
const int tensor_index = tensor_indices[index];
if (tensor_index != kTfLiteOptionalTensor) {
return tensor_index;
}
}
return -1;
}
inline TfLiteTensor* GetMutableInput(const TfLiteContext* context,
const TfLiteNode* node, int index) {
const int tensor_index = ValidateTensorIndexing(
context, index, node->inputs->size, node->inputs->data);
if (tensor_index < 0) {
return nullptr;
}
return GetTensorAtIndex(context, tensor_index);
}
inline TfLiteStatus GetMutableInputSafe(const TfLiteContext* context,
const TfLiteNode* node, int index,
const TfLiteTensor** tensor) {
int tensor_index;
TF_LITE_ENSURE_OK(
context, ValidateTensorIndexingSafe(context, index, node->inputs->size,
node->inputs->data, &tensor_index));
*tensor = GetTensorAtIndex(context, tensor_index);
return kTfLiteOk;
}
} // anonymous namespace.
const TfLiteTensor* GetInput(const TfLiteContext* context,
const TfLiteNode* node, int index) {
return GetMutableInput(context, node, index);
}
TfLiteStatus GetInputSafe(const TfLiteContext* context, const TfLiteNode* node,
int index, const TfLiteTensor** tensor) {
return GetMutableInputSafe(context, node, index, tensor);
}
TfLiteTensor* GetVariableInput(TfLiteContext* context, const TfLiteNode* node,
int index) {
TfLiteTensor* tensor = GetMutableInput(context, node, index);
return tensor->is_variable ? tensor : nullptr;
}
TfLiteTensor* GetOutput(TfLiteContext* context, const TfLiteNode* node,
int index) {
const int tensor_index = ValidateTensorIndexing(
context, index, node->outputs->size, node->outputs->data);
if (tensor_index < 0) {
return nullptr;
}
return GetTensorAtIndex(context, tensor_index);
}
TfLiteStatus GetOutputSafe(const TfLiteContext* context, const TfLiteNode* node,
int index, TfLiteTensor** tensor) {
int tensor_index;
TF_LITE_ENSURE_OK(
context, ValidateTensorIndexingSafe(context, index, node->outputs->size,
node->outputs->data, &tensor_index));
*tensor = GetTensorAtIndex(context, tensor_index);
return kTfLiteOk;
}
const TfLiteTensor* GetOptionalInputTensor(const TfLiteContext* context,
const TfLiteNode* node, int index) {
return GetInput(context, node, index);
}
#ifndef TF_LITE_STATIC_MEMORY
TfLiteTensor* GetTemporary(TfLiteContext* context, const TfLiteNode* node,
int index) {
const int tensor_index = ValidateTensorIndexing(
context, index, node->temporaries->size, node->temporaries->data);
if (tensor_index < 0) {
return nullptr;
}
return GetTensorAtIndex(context, tensor_index);
}
TfLiteStatus GetTemporarySafe(const TfLiteContext* context,
const TfLiteNode* node, int index,
TfLiteTensor** tensor) {
int tensor_index;
TF_LITE_ENSURE_OK(context, ValidateTensorIndexingSafe(
context, index, node->temporaries->size,
node->temporaries->data, &tensor_index));
*tensor = GetTensorAtIndex(context, tensor_index);
return kTfLiteOk;
}
const TfLiteTensor* GetIntermediates(TfLiteContext* context,
const TfLiteNode* node, int index) {
const int tensor_index = ValidateTensorIndexing(
context, index, node->intermediates->size, node->intermediates->data);
if (tensor_index < 0) {
return nullptr;
}
return GetTensorAtIndex(context, tensor_index);
}
TfLiteStatus GetIntermediatesSafe(const TfLiteContext* context,
const TfLiteNode* node, int index,
TfLiteTensor** tensor) {
int tensor_index;
TF_LITE_ENSURE_OK(context, ValidateTensorIndexingSafe(
context, index, node->intermediates->size,
node->intermediates->data, &tensor_index));
*tensor = GetTensorAtIndex(context, tensor_index);
return kTfLiteOk;
}
#endif // TF_LITE_STATIC_MEMORY
// Per-axis
TfLiteStatus PopulateConvolutionQuantizationParams(
TfLiteContext* context, const TfLiteTensor* input,
const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output,
const TfLiteFusedActivation& activation, int32_t* multiplier, int* shift,
int32_t* output_activation_min, int32_t* output_activation_max,
int32_t* per_channel_multiplier, int32_t* per_channel_shift) {
const auto* affine_quantization =
reinterpret_cast<TfLiteAffineQuantization*>(filter->quantization.params);
return PopulateConvolutionQuantizationParams(
context, input, filter, bias, output, activation, multiplier, shift,
output_activation_min, output_activation_max, per_channel_multiplier,
per_channel_shift, affine_quantization->scale->size);
}
// Per-axis & per-tensor
TfLiteStatus PopulateConvolutionQuantizationParams(
TfLiteContext* context, const TfLiteTensor* input,
const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output,
const TfLiteFusedActivation& activation, int32_t* multiplier, int* shift,
int32_t* output_activation_min, int32_t* output_activation_max,
int32_t* per_channel_multiplier, int32_t* per_channel_shift,
int num_channels) {
TF_LITE_ENSURE_EQ(context, input->quantization.type,
kTfLiteAffineQuantization);
TF_LITE_ENSURE_EQ(context, filter->quantization.type,
kTfLiteAffineQuantization);
// TODO(jianlijianli): Enable bias type check and bias scale == input scale
// * filter scale for each channel in affine quantization once bias
// quantization is properly populated.
// TF_LITE_ENSURE_EQ(context, bias->quantization.type,
// kTfLiteAffineQuantization);
// Check data type.
const auto* affine_quantization =
reinterpret_cast<TfLiteAffineQuantization*>(filter->quantization.params);
TF_LITE_ENSURE(context, affine_quantization);
TF_LITE_ENSURE(context, affine_quantization->scale);
const bool is_per_channel = affine_quantization->scale->size > 1;
if (is_per_channel) {
// Currently only Int8/Int16 is supported for per channel quantization.
TF_LITE_ENSURE(context,
input->type == kTfLiteInt8 || input->type == kTfLiteInt16);
TF_LITE_ENSURE_EQ(context, filter->type, kTfLiteInt8);
TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size, num_channels);
TF_LITE_ENSURE_EQ(
context, num_channels,
filter->dims->data[affine_quantization->quantized_dimension]);
}
// Populate multiplier and shift using affine quantization.
const float input_scale = input->params.scale;
const float output_scale = output->params.scale;
const float* filter_scales = affine_quantization->scale->data;
for (int i = 0; i < num_channels; ++i) {
// If per-tensor quantization parameter is specified, broadcast it along the
// quantization dimension (channels_out).
const float scale = is_per_channel ? filter_scales[i] : filter_scales[0];
const double filter_scale = static_cast<double>(scale);
const double effective_output_scale = static_cast<double>(input_scale) *
filter_scale /
static_cast<double>(output_scale);
int32_t significand;
int channel_shift;
QuantizeMultiplier(effective_output_scale, &significand, &channel_shift);
per_channel_multiplier[i] = significand;
per_channel_shift[i] = channel_shift;
}
// Populate scalar quantization parameters.
// This check on legacy quantization parameters is kept only for backward
// compatibility.
if (input->type == kTfLiteUInt8) {
// Check bias scale == input scale * filter scale.
double real_multiplier = 0.0;
TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler(
context, input, filter, bias, output, &real_multiplier));
int exponent;
// Populate quantization parameters with multiplier and shift.
QuantizeMultiplier(real_multiplier, multiplier, &exponent);
*shift = -exponent;
}
if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8 ||
input->type == kTfLiteInt16) {
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
context, activation, output, output_activation_min,
output_activation_max));
}
return kTfLiteOk;
}
TfLiteStatus GetQuantizedConvolutionMultipler(TfLiteContext* context,
const TfLiteTensor* input,
const TfLiteTensor* filter,
const TfLiteTensor* bias,
TfLiteTensor* output,
double* multiplier) {
const double input_product_scale = static_cast<double>(input->params.scale) *
static_cast<double>(filter->params.scale);
// The following conditions must be guaranteed by the training pipeline.
if (bias) {
const double bias_scale = static_cast<double>(bias->params.scale);
// Here we're making sure the input_product_scale & bias_scale are about the
// same. Since we have:
// (output - output_zp) * output_scale =
// input_product_scale * input_product + bias * bias_scale ---- (0)
//
// (0) equals:
// (input_product + bias) * input_product_scale ----- (1)
// +
// bias * (bias_scale - input_product_scale) ------ (2)
//
// For the real kernel computation, we're doing (1), so we really need to
// make sure (2) has minimum impact on the output, so:
// bias * (bias_scale - input_product_scale) / output_scale should be
// a small number for an integer.
// Since normally bias should be within a small range.
// We should expect (bias_scale - input_product_scale) / output_scale to
// be a small number like 0.02.
const double scale_diff = std::abs(input_product_scale - bias_scale);
const double output_scale = static_cast<double>(output->params.scale);
TF_LITE_ENSURE(context, scale_diff / output_scale <= 0.02);
}
return GetQuantizedConvolutionMultipler(context, input, filter, output,
multiplier);
}
TfLiteStatus GetQuantizedConvolutionMultipler(TfLiteContext* context,
const TfLiteTensor* input,
const TfLiteTensor* filter,
TfLiteTensor* output,
double* multiplier) {
const double input_product_scale =
static_cast<double>(input->params.scale * filter->params.scale);
TF_LITE_ENSURE(context, input_product_scale >= 0);
*multiplier = input_product_scale / static_cast<double>(output->params.scale);
return kTfLiteOk;
}
namespace {
inline TfLiteStatus Quantize(TfLiteContext* context, float scale,
int32_t zero_point, float f, int32_t& q) {
const float tmp = TfLiteRound(f / scale);
const bool no_integer_overflow_from_quantization =
(tmp >= static_cast<float>(std::numeric_limits<int32_t>::min()) &&
tmp <= static_cast<float>(std::numeric_limits<int32_t>::max()));
TF_LITE_ENSURE(context, no_integer_overflow_from_quantization);
q = zero_point + static_cast<int32_t>(tmp);
return kTfLiteOk;
}
TfLiteStatus CalculateActivationRangeQuantizedImpl(
TfLiteContext* context, TfLiteFusedActivation activation, int32_t qmin,
int32_t qmax, TfLiteTensor* output, int32_t* act_min, int32_t* act_max) {
const auto scale = output->params.scale;
const auto zero_point = output->params.zero_point;
int32_t tmp_q;
if (activation == kTfLiteActRelu) {
TF_LITE_ENSURE_OK(context,
Quantize(context, scale, zero_point, 0.0, tmp_q));
*act_min = std::max(qmin, tmp_q);
*act_max = qmax;
} else if (activation == kTfLiteActRelu6) {
TF_LITE_ENSURE_OK(context,
Quantize(context, scale, zero_point, 0.0, tmp_q));
*act_min = std::max(qmin, tmp_q);
TF_LITE_ENSURE_OK(context,
Quantize(context, scale, zero_point, 6.0, tmp_q));
*act_max = std::min(qmax, tmp_q);
} else if (activation == kTfLiteActReluN1To1) {
TF_LITE_ENSURE_OK(context,
Quantize(context, scale, zero_point, -1.0, tmp_q));
*act_min = std::max(qmin, tmp_q);
TF_LITE_ENSURE_OK(context,
Quantize(context, scale, zero_point, 1.0, tmp_q));
*act_max = std::min(qmax, tmp_q);
} else {
*act_min = qmin;
*act_max = qmax;
}
return kTfLiteOk;
}
} // namespace
TfLiteStatus CalculateActivationRangeQuantized(TfLiteContext* context,
TfLiteFusedActivation activation,
TfLiteTensor* output,
int32_t* act_min,
int32_t* act_max) {
int32_t qmin = 0;
int32_t qmax = 0;
if (output->type == kTfLiteUInt8) {
qmin = std::numeric_limits<uint8_t>::min();
qmax = std::numeric_limits<uint8_t>::max();
} else if (output->type == kTfLiteInt8) {
qmin = std::numeric_limits<int8_t>::min();
qmax = std::numeric_limits<int8_t>::max();
} else if (output->type == kTfLiteInt16) {
qmin = std::numeric_limits<int16_t>::min();
qmax = std::numeric_limits<int16_t>::max();
} else {
TF_LITE_ENSURE(context, false);
}
return CalculateActivationRangeQuantizedImpl(context, activation, qmin, qmax,
output, act_min, act_max);
}
bool HaveSameShapes(const TfLiteTensor* input1, const TfLiteTensor* input2) {
return TfLiteIntArrayEqual(input1->dims, input2->dims);
}
#ifndef TF_LITE_STATIC_MEMORY
// TODO(b/172067338): Having this function be part of TF_LITE_STATIC_MEMORY
// build results in a 6KB size increase, even though the function is unsused for
// that build. What appears to be happening is that while the linker drops the
// unsused function, the string library that gets pulled in is not dropped,
// resulting in the increased binary size.
std::string GetShapeDebugString(const TfLiteIntArray* shape) {
std::string str;
for (int d = 0; d < shape->size; ++d) {
if (str.empty())
str = "[" + std::to_string(shape->data[d]);
else
str += ", " + std::to_string(shape->data[d]);
}
str += "]";
return str;
}
TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context,
const TfLiteTensor* input1,
const TfLiteTensor* input2,
TfLiteIntArray** output_shape) {
int dims1 = NumDimensions(input1);
int dims2 = NumDimensions(input2);
int out_dims = std::max(dims1, dims2);
if (NumElements(input1) == 0) {
*output_shape = TfLiteIntArrayCopy(input1->dims);
return kTfLiteOk;
}
std::unique_ptr<TfLiteIntArray, void (*)(TfLiteIntArray*)> shape(
TfLiteIntArrayCreate(out_dims), TfLiteIntArrayFree);
for (int i = 0; i < out_dims; ++i) {
int d1 = i >= dims1 ? 1 : SizeOfDimension(input1, dims1 - i - 1);
int d2 = i >= dims2 ? 1 : SizeOfDimension(input2, dims2 - i - 1);
if (!(d1 == d2 || d1 == 1 || d2 == 1)) {
context->ReportError(context,
"Given shapes, %s and %s, are not broadcastable.",
GetShapeDebugString(input1->dims).c_str(),
GetShapeDebugString(input2->dims).c_str());
return kTfLiteError;
}
shape->data[out_dims - i - 1] = std::max(d1, d2);
}
*output_shape = shape.release();
return kTfLiteOk;
}
TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context,
const TfLiteTensor* input1,
const TfLiteTensor* input2,
const TfLiteTensor* input3,
TfLiteIntArray** output_shape) {
int dims1 = NumDimensions(input1);
int dims2 = NumDimensions(input2);
int dims3 = NumDimensions(input3);
int out_dims = std::max(std::max(dims1, dims2), dims3);
std::unique_ptr<TfLiteIntArray, void (*)(TfLiteIntArray*)> shape(
TfLiteIntArrayCreate(out_dims), TfLiteIntArrayFree);
for (int i = 0; i < out_dims; ++i) {
int d1 = i >= dims1 ? 1 : SizeOfDimension(input1, dims1 - i - 1);
int d2 = i >= dims2 ? 1 : SizeOfDimension(input2, dims2 - i - 1);
int d3 = i >= dims3 ? 1 : SizeOfDimension(input3, dims3 - i - 1);
int max_value = std::max(std::max(d1, d2), d3);
if (!(d1 == 1 || d1 == max_value) || !(d2 == 1 || d2 == max_value) ||
!(d3 == 1 || d3 == max_value)) {
context->ReportError(
context, "Given shapes, %s, %s and %s, are not broadcastable.",
GetShapeDebugString(input1->dims).c_str(),
GetShapeDebugString(input2->dims).c_str(),
GetShapeDebugString(input3->dims).c_str());
return kTfLiteError;
}
shape->data[out_dims - i - 1] = max_value;
}
*output_shape = shape.release();
return kTfLiteOk;
}
#endif // TF_LITE_STATIC_MEMORY
// Size of string is not constant, return 0 in such case.
int TfLiteTypeGetSize(TfLiteType type) {
switch (type) {
case kTfLiteUInt8:
TF_LITE_ASSERT_EQ(sizeof(uint8_t), 1);
return 1;
case kTfLiteInt8:
TF_LITE_ASSERT_EQ(sizeof(int8_t), 1);
return 1;
case kTfLiteBool:
return sizeof(bool);
case kTfLiteInt16:
TF_LITE_ASSERT_EQ(sizeof(int16_t), 2);
return 2;
case kTfLiteFloat16:
TF_LITE_ASSERT_EQ(sizeof(int16_t), 2);
return 2;
case kTfLiteFloat32:
TF_LITE_ASSERT_EQ(sizeof(float), 4);
return 4;
case kTfLiteInt32:
TF_LITE_ASSERT_EQ(sizeof(int32_t), 4);
return 4;
case kTfLiteUInt32:
TF_LITE_ASSERT_EQ(sizeof(uint32_t), 4);
return 4;
case kTfLiteInt64:
TF_LITE_ASSERT_EQ(sizeof(int64_t), 8);
return 8;
case kTfLiteUInt64:
TF_LITE_ASSERT_EQ(sizeof(uint64_t), 8);
return 8;
case kTfLiteFloat64:
TF_LITE_ASSERT_EQ(sizeof(double), 8);
return 8;
case kTfLiteComplex64:
TF_LITE_ASSERT_EQ(sizeof(std::complex<float>), 8);
return 8;
case kTfLiteComplex128:
TF_LITE_ASSERT_EQ(sizeof(std::complex<double>), 16);
return 16;
default:
return 0;
}
}
bool IsMobilePlatform() {
#if defined(ANDROID) || defined(__ANDROID__)
return true;
#elif defined(__APPLE__)
#if TARGET_IPHONE_SIMULATOR || TARGET_OS_IPHONE
return true;
#endif
#endif
return false;
}
} // namespace tflite