blob: 59049360ca6b422ec788c98b30ababfc0d6d105a [file] [log] [blame]
/*
* Copyright (C) 2017 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define LOG_TAG "Operations"
#include "SVDF.h"
#include <algorithm>
#include <vector>
#include "CpuExecutor.h"
#include "CpuOperationUtils.h"
#include "Tracing.h"
namespace android {
namespace nn {
SVDF::SVDF(const Operation& operation, RunTimeOperandInfo* operands) {
NNTRACE_TRANS("SVDF::SVDF");
input_ = GetInput(operation, operands, kInputTensor);
weights_feature_ = GetInput(operation, operands, kWeightsFeatureTensor);
weights_time_ = GetInput(operation, operands, kWeightsTimeTensor);
bias_ = GetInput(operation, operands, kBiasTensor);
state_in_ = GetInput(operation, operands, kStateInTensor);
const auto& rankOperand = *GetInput(operation, operands, kRankParam);
params_.rank_ = getScalarDataWithDefault<int>(rankOperand, 0);
const auto& activationOperand = *GetInput(operation, operands, kActivationParam);
params_.activation_ = static_cast<TfLiteFusedActivation>(getScalarDataWithDefault<int>(
activationOperand, TfLiteFusedActivation::kTfLiteActNone));
state_out_ = GetOutput(operation, operands, kStateOutTensor);
output_ = GetOutput(operation, operands, kOutputTensor);
}
bool SVDF::Prepare(const Operation& operation, RunTimeOperandInfo* operands, Shape* stateShape,
Shape* outputShape) {
NNTRACE_TRANS("SVDF::Prepare");
// Check we have all the inputs and outputs we need.
const int num_inputs = NumInputsWithValues(operation, operands);
NN_CHECK(num_inputs == 6 || num_inputs == 7);
constexpr int requiredInputs[] = {
kInputTensor, kWeightsFeatureTensor, kWeightsTimeTensor, kStateInTensor,
kRankParam, kActivationParam,
};
for (const int requiredInput : requiredInputs) {
NN_RET_CHECK(!IsNullInput(GetInput(operation, operands, requiredInput)))
<< "required input " << requiredInput << " is omitted";
}
NN_CHECK_EQ(NumOutputs(operation), 2);
// Check that the scalar operands' buffers are large enough.
const auto& rankOperand = *GetInput(operation, operands, kRankParam);
NN_RET_CHECK(rankOperand.length >= sizeof(int));
const auto& activationOperand = *GetInput(operation, operands, kActivationParam);
NN_RET_CHECK(activationOperand.length >= sizeof(int));
const RunTimeOperandInfo* input = GetInput(operation, operands, SVDF::kInputTensor);
const RunTimeOperandInfo* weights_feature =
GetInput(operation, operands, SVDF::kWeightsFeatureTensor);
const RunTimeOperandInfo* weights_time =
GetInput(operation, operands, SVDF::kWeightsTimeTensor);
// Check all the parameters of tensor match within themselves and match the
// input configuration.
const int rank = getScalarData<int>(*GetInput(operation, operands, kRankParam));
const uint32_t batch_size = SizeOfDimension(input, 0);
const uint32_t num_filters = SizeOfDimension(weights_feature, 0);
NN_CHECK_EQ(num_filters % rank, 0);
const uint32_t num_units = num_filters / rank;
const uint32_t memory_size = SizeOfDimension(weights_time, 1);
NN_CHECK_EQ(SizeOfDimension(input, 1), SizeOfDimension(weights_feature, 1));
NN_CHECK_EQ(SizeOfDimension(weights_time, 0), num_filters);
const RunTimeOperandInfo* bias = GetInput(operation, operands, kBiasTensor);
if (!IsNullInput(bias)) {
NN_CHECK_EQ(SizeOfDimension(bias, 0), num_units);
}
// Resize state.
const Shape& inputShape = input->shape();
stateShape->type = inputShape.type;
stateShape->dimensions = {batch_size, memory_size * num_filters};
stateShape->offset = inputShape.offset;
stateShape->scale = inputShape.scale;
// Resize output.
outputShape->type = inputShape.type;
outputShape->dimensions = {batch_size, num_units};
outputShape->offset = inputShape.offset;
outputShape->scale = inputShape.scale;
return true;
}
bool SVDF::Eval() {
NNTRACE_TRANS("SVDF::Eval");
switch (input_->type) {
case OperandType::TENSOR_FLOAT16: {
std::vector<float> inputDataFloat32(getNumberOfElements(input_->shape()));
convertFloat16ToFloat32(reinterpret_cast<_Float16*>(input_->buffer), &inputDataFloat32);
std::vector<float> inputStateDataFloat32(getNumberOfElements(state_in_->shape()));
convertFloat16ToFloat32(reinterpret_cast<_Float16*>(state_in_->buffer),
&inputStateDataFloat32);
std::vector<float> biasDataFloat32(getNumberOfElements(bias_->shape()));
if (!IsNullInput(bias_)) {
convertFloat16ToFloat32(reinterpret_cast<_Float16*>(bias_->buffer),
&biasDataFloat32);
}
std::vector<float> weightsFeatureDataFloat32(
getNumberOfElements(weights_feature_->shape()));
convertFloat16ToFloat32(reinterpret_cast<_Float16*>(weights_feature_->buffer),
&weightsFeatureDataFloat32);
std::vector<float> weightsTimeDataFloat32(getNumberOfElements(weights_time_->shape()));
convertFloat16ToFloat32(reinterpret_cast<_Float16*>(weights_time_->buffer),
&weightsTimeDataFloat32);
std::vector<float> outputDataFloat32(getNumberOfElements(output_->shape()));
std::vector<float> outputStateDataFloat32(getNumberOfElements(state_out_->shape()));
EvalFloat32(inputDataFloat32.data(), inputStateDataFloat32.data(),
biasDataFloat32.data(), weightsFeatureDataFloat32.data(),
weightsTimeDataFloat32.data(), outputDataFloat32.data(),
outputStateDataFloat32.data());
convertFloat32ToFloat16(outputDataFloat32,
reinterpret_cast<_Float16*>(output_->buffer));
convertFloat32ToFloat16(outputStateDataFloat32,
reinterpret_cast<_Float16*>(state_out_->buffer));
break;
}
case OperandType::TENSOR_FLOAT32: {
EvalFloat32(reinterpret_cast<float*>(input_->buffer),
reinterpret_cast<float*>(state_in_->buffer),
reinterpret_cast<float*>(bias_->buffer),
reinterpret_cast<float*>(weights_feature_->buffer),
reinterpret_cast<float*>(weights_time_->buffer),
reinterpret_cast<float*>(output_->buffer),
reinterpret_cast<float*>(state_out_->buffer));
break;
}
default: {
LOG(ERROR) << "Unsupported data type: " << static_cast<int>(input_->type);
return false;
}
}
return true;
}
void SVDF::EvalFloat32(const float* inputData, const float* inputStateData, const float* biasData,
const float* weightsFeatureData, const float* weightsTimeData,
float* outputData, float* outputStateData) {
NNTRACE_COMP("SVDF::EvalFloat32");
const int rank = params_.rank_;
const int batch_size = SizeOfDimension(input_, 0);
const int input_size = SizeOfDimension(input_, 1);
const int num_filters = SizeOfDimension(weights_feature_, 0);
const int num_units = num_filters / rank;
const int memory_size = SizeOfDimension(weights_time_, 1);
memcpy(outputStateData, inputStateData, sizeof(float) * batch_size * memory_size * num_filters);
// Compute conv1d(inputs, weights_feature).
for (int b = 0; b < batch_size; b++) {
float* state_ptr_batch = outputStateData + b * memory_size * num_filters;
for (int c = 0; c < num_filters; c++) {
float* state_ptr = state_ptr_batch + c * memory_size;
state_ptr[memory_size - 1] = 0.0;
}
}
// Clear scratch (the matmul is accumulative).
float scratch[batch_size * num_filters];
std::fill_n(scratch, batch_size * num_filters, 0.0f);
tflite::tensor_utils::MatrixBatchVectorMultiplyAccumulate(weightsFeatureData, num_filters,
input_size, inputData, batch_size,
scratch, /*result_stride=*/1);
// Copy the latest activation from scratch into activation_state:
// The last, i.e. (memory_size-1)th entry for each batch, and filter.
for (int i = 0; i < batch_size * num_filters; ++i) {
outputStateData[i * memory_size + memory_size - 1] = scratch[i];
}
// Begin ApplyTimeWeightsBiasAndActivation
// Compute matmul(state, weights_time).
for (int b = 0; b < batch_size; b++) {
float* state_out_ptr_batch = outputStateData + b * memory_size * num_filters;
float* scratch_ptr_batch = scratch + b * num_filters;
tflite::tensor_utils::BatchVectorBatchVectorDotProduct(
weightsTimeData, state_out_ptr_batch, memory_size, num_filters, scratch_ptr_batch,
/*result_stride=*/1);
}
// Initialize output with bias if provided.
if (!IsNullInput(bias_)) {
tflite::tensor_utils::VectorBatchVectorAssign(biasData, num_units, batch_size, outputData);
} else {
std::fill_n(outputData, batch_size * num_units, 0.0f);
}
// Reduction sum
for (int b = 0; b < batch_size; b++) {
float* output_ptr_batch = outputData + b * num_units;
float* scratch_ptr_batch = scratch + b * num_filters;
tflite::tensor_utils::ReductionSumVector(scratch_ptr_batch, output_ptr_batch, num_units,
rank);
}
// Apply activation.
for (int b = 0; b < batch_size; b++) {
float* output_ptr_batch = outputData + b * num_units;
tflite::tensor_utils::ApplyActivationToVector(output_ptr_batch, num_units,
params_.activation_, output_ptr_batch);
}
// Finished ApplyTimeWeightsBiasAndActivation
// Right shift the state.
for (int b = 0; b < batch_size; b++) {
float* state_out_ptr_batch = outputStateData + b * memory_size * num_filters;
for (int f = 0; f < num_filters; f++) {
std::copy(state_out_ptr_batch + 1, state_out_ptr_batch + memory_size,
state_out_ptr_batch);
state_out_ptr_batch[memory_size - 1] = 0.0;
state_out_ptr_batch += memory_size;
}
}
}
} // namespace nn
} // namespace android