blob: 844361da28dbb2c0a9b17afbccf95b81e7be1bc1 [file] [log] [blame]
/*
* Copyright (C) 2017 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define LOG_TAG "Operations"
#include "SVDF.h"
#include "CpuExecutor.h"
#include "CpuOperationUtils.h"
#include "HalInterfaces.h"
#include "Tracing.h"
namespace android {
namespace nn {
using namespace hal;
SVDF::SVDF(const Operation& operation,
std::vector<RunTimeOperandInfo>& operands) {
NNTRACE_TRANS("SVDF::SVDF");
input_ = GetInput(operation, operands, kInputTensor);
weights_feature_ = GetInput(operation, operands, kWeightsFeatureTensor);
weights_time_ = GetInput(operation, operands, kWeightsTimeTensor);
bias_ = GetInput(operation, operands, kBiasTensor);
state_in_ = GetInput(operation, operands, kStateInTensor);
params_.rank_ = getScalarData<int>(*GetInput(operation, operands, kRankParam));
params_.activation_ = static_cast<TfLiteFusedActivation>(getScalarData<int>(
*GetInput(operation, operands, kActivationParam)));
state_out_ = GetOutput(operation, operands, kStateOutTensor);
output_ = GetOutput(operation, operands, kOutputTensor);
}
bool SVDF::Prepare(const Operation &operation,
std::vector<RunTimeOperandInfo> &operands,
Shape *stateShape,
Shape *outputShape) {
NNTRACE_TRANS("SVDF::Prepare");
// Check we have all the inputs and outputs we need.
const int num_inputs = NumInputsWithValues(operation, operands);
NN_CHECK(num_inputs == 6 || num_inputs == 7);
NN_CHECK_EQ(NumOutputs(operation), 2);
const RunTimeOperandInfo *input =
GetInput(operation, operands, SVDF::kInputTensor);
const RunTimeOperandInfo *weights_feature =
GetInput(operation, operands, SVDF::kWeightsFeatureTensor);
const RunTimeOperandInfo *weights_time =
GetInput(operation, operands, SVDF::kWeightsTimeTensor);
// Check all the parameters of tensor match within themselves and match the
// input configuration.
const int rank = getScalarData<int>(*GetInput(operation, operands, kRankParam));
const uint32_t batch_size = SizeOfDimension(input, 0);
const uint32_t num_filters = SizeOfDimension(weights_feature, 0);
NN_CHECK_EQ(num_filters % rank, 0);
const uint32_t num_units = num_filters / rank;
const uint32_t memory_size = SizeOfDimension(weights_time, 1);
NN_CHECK_EQ(SizeOfDimension(input, 1), SizeOfDimension(weights_feature, 1));
NN_CHECK_EQ(SizeOfDimension(weights_time, 0), num_filters);
const RunTimeOperandInfo *bias =
GetInput(operation, operands, kBiasTensor);
if (!IsNullInput(bias)) {
NN_CHECK_EQ(SizeOfDimension(bias, 0), num_units);
}
// Resize state.
const Shape &inputShape = input->shape();
stateShape->type = inputShape.type;
stateShape->dimensions = { batch_size, memory_size * num_filters };
stateShape->offset = inputShape.offset;
stateShape->scale = inputShape.scale;
// Resize output.
outputShape->type = inputShape.type;
outputShape->dimensions = { batch_size, num_units };
outputShape->offset = inputShape.offset;
outputShape->scale = inputShape.scale;
return true;
}
bool SVDF::Eval() {
NNTRACE_TRANS("SVDF::Eval");
switch (input_->type) {
case OperandType::TENSOR_FLOAT16: {
std::vector<float> inputDataFloat32(getNumberOfElements(input_->shape()));
convertFloat16ToFloat32(reinterpret_cast<_Float16*>(input_->buffer), &inputDataFloat32);
std::vector<float> inputStateDataFloat32(getNumberOfElements(state_in_->shape()));
convertFloat16ToFloat32(reinterpret_cast<_Float16*>(state_in_->buffer),
&inputStateDataFloat32);
std::vector<float> biasDataFloat32(getNumberOfElements(bias_->shape()));
if (!IsNullInput(bias_)) {
convertFloat16ToFloat32(reinterpret_cast<_Float16*>(bias_->buffer),
&biasDataFloat32);
}
std::vector<float> weightsFeatureDataFloat32(
getNumberOfElements(weights_feature_->shape()));
convertFloat16ToFloat32(reinterpret_cast<_Float16*>(weights_feature_->buffer),
&weightsFeatureDataFloat32);
std::vector<float> weightsTimeDataFloat32(getNumberOfElements(weights_time_->shape()));
convertFloat16ToFloat32(reinterpret_cast<_Float16*>(weights_time_->buffer),
&weightsTimeDataFloat32);
std::vector<float> outputDataFloat32(getNumberOfElements(output_->shape()));
std::vector<float> outputStateDataFloat32(getNumberOfElements(state_out_->shape()));
EvalFloat32(inputDataFloat32.data(), inputStateDataFloat32.data(),
biasDataFloat32.data(), weightsFeatureDataFloat32.data(),
weightsTimeDataFloat32.data(), outputDataFloat32.data(),
outputStateDataFloat32.data());
convertFloat32ToFloat16(outputDataFloat32,
reinterpret_cast<_Float16*>(output_->buffer));
convertFloat32ToFloat16(outputStateDataFloat32,
reinterpret_cast<_Float16*>(state_out_->buffer));
break;
}
case OperandType::TENSOR_FLOAT32: {
EvalFloat32(reinterpret_cast<float*>(input_->buffer),
reinterpret_cast<float*>(state_in_->buffer),
reinterpret_cast<float*>(bias_->buffer),
reinterpret_cast<float*>(weights_feature_->buffer),
reinterpret_cast<float*>(weights_time_->buffer),
reinterpret_cast<float*>(output_->buffer),
reinterpret_cast<float*>(state_out_->buffer));
break;
}
default: {
LOG(ERROR) << "Unsupported data type: " << static_cast<int>(input_->type);
return false;
}
}
return true;
}
void SVDF::EvalFloat32(const float* inputData, const float* inputStateData, const float* biasData,
const float* weightsFeatureData, const float* weightsTimeData,
float* outputData, float* outputStateData) {
NNTRACE_COMP("SVDF::EvalFloat32");
const int rank = params_.rank_;
const int batch_size = SizeOfDimension(input_, 0);
const int input_size = SizeOfDimension(input_, 1);
const int num_filters = SizeOfDimension(weights_feature_, 0);
const int num_units = num_filters / rank;
const int memory_size = SizeOfDimension(weights_time_, 1);
memcpy(outputStateData, inputStateData, sizeof(float) * batch_size * memory_size * num_filters);
// Compute conv1d(inputs, weights_feature).
for (int b = 0; b < batch_size; b++) {
float* state_ptr_batch = outputStateData + b * memory_size * num_filters;
for (int c = 0; c < num_filters; c++) {
float* state_ptr = state_ptr_batch + c * memory_size;
state_ptr[memory_size - 1] = 0.0;
}
}
// The state left most column is used to save current cycle activation. This
// is achieved by starting at state->data.f[memory_size - 1] and having the
// stride equal to memory_size.
tflite::tensor_utils::MatrixBatchVectorMultiplyAccumulate(
weightsFeatureData, num_filters, input_size, inputData, batch_size,
&outputStateData[memory_size - 1], memory_size);
// Compute matmul(state, weights_time).
// The right most column is used to save temporary output (with the size of
// num_filters). This is achieved by starting at state->data.f and having the
// stride equal to memory_size.
float scratch[batch_size * num_filters];
for (int b = 0; b < batch_size; b++) {
float* state_out_ptr_batch = outputStateData + b * memory_size * num_filters;
float* scratch_ptr_batch = scratch + b * num_filters;
tflite::tensor_utils::BatchVectorBatchVectorDotProduct(
weightsTimeData, state_out_ptr_batch, memory_size, num_filters, scratch_ptr_batch,
/*result_stride=*/1);
}
// Initialize output with bias if provided.
if (!IsNullInput(bias_)) {
tflite::tensor_utils::VectorBatchVectorAssign(biasData, num_units, batch_size, outputData);
} else {
tflite::tensor_utils::ZeroVector(outputData, batch_size * num_units);
}
// Reduction sum
for (int b = 0; b < batch_size; b++) {
float* output_ptr_batch = outputData + b * num_units;
float* scratch_ptr_batch = scratch + b * num_filters;
tflite::tensor_utils::ReductionSumVector(scratch_ptr_batch, output_ptr_batch, num_units,
rank);
}
// Apply activation.
for (int b = 0; b < batch_size; b++) {
float* output_ptr_batch = outputData + b * num_units;
tflite::tensor_utils::ApplyActivationToVector(output_ptr_batch, num_units,
params_.activation_, output_ptr_batch);
}
// Right shift the state.
for (int b = 0; b < batch_size; b++) {
float* state_out_ptr_batch = outputStateData + b * memory_size * num_filters;
for (int f = 0; f < num_filters; f++) {
tflite::tensor_utils::VectorShiftLeft(state_out_ptr_batch, memory_size,
/*shift_value=*/0.0);
state_out_ptr_batch += memory_size;
}
}
}
} // namespace nn
} // namespace android