Remove scratch buffers and redundant output from quantized lstm
* Remove scratch buffers
* Remove redundant state output
* Update tests
Test: NeuralNetworksTest_operations
Test: NeuralNetworksTest_static
Bug: 113561892
Change-Id: I93cb7567b78c10e2c2c75c1b5ada80516b427706
Merged-In: I93cb7567b78c10e2c2c75c1b5ada80516b427706
(cherry picked from commit 42fc2f949317f2941eb0c626db0fe4f5d5817b32)
diff --git a/common/CpuExecutor.cpp b/common/CpuExecutor.cpp
index ea51177..e321475 100644
--- a/common/CpuExecutor.cpp
+++ b/common/CpuExecutor.cpp
@@ -2503,29 +2503,19 @@
outShape);
} break;
case OperationType::QUANTIZED_16BIT_LSTM: {
- if (!allParametersPresent(5, 5)) {
+ if (!allParametersPresent(5, 2)) {
return ANEURALNETWORKS_BAD_DATA;
}
- RunTimeOperandInfo& concatTemp = mOperands[outs[QuantizedLSTMCell::kConcatTempTensor]];
- RunTimeOperandInfo& activationTemp =
- mOperands[outs[QuantizedLSTMCell::kActivationTempTensor]];
- RunTimeOperandInfo& outputStateOut =
- mOperands[outs[QuantizedLSTMCell::kOutputStateOutTensor]];
RunTimeOperandInfo& cellStateOut =
mOperands[outs[QuantizedLSTMCell::kCellStateOutTensor]];
RunTimeOperandInfo& output = mOperands[outs[QuantizedLSTMCell::kOutputTensor]];
- Shape concatTempShape, activationTempShape, outputStateOutShape, cellStateOutShape,
- outputShape;
+ Shape cellStateOutShape, outputShape;
QuantizedLSTMCell quantizedLSTMCell(operation, mOperands);
- success = QuantizedLSTMCell::prepare(operation, mOperands, &concatTempShape,
- &activationTempShape, &outputStateOutShape,
- &cellStateOutShape, &outputShape) &&
- setInfoAndAllocateIfNeeded(&concatTemp, concatTempShape) &&
- setInfoAndAllocateIfNeeded(&activationTemp, activationTempShape) &&
- setInfoAndAllocateIfNeeded(&outputStateOut, outputStateOutShape) &&
+ success = QuantizedLSTMCell::prepare(operation, mOperands, &cellStateOutShape,
+ &outputShape) &&
setInfoAndAllocateIfNeeded(&cellStateOut, cellStateOutShape) &&
setInfoAndAllocateIfNeeded(&output, outputShape) && quantizedLSTMCell.eval();
} break;
diff --git a/common/Utils.cpp b/common/Utils.cpp
index a26ff1e..45c7bd5 100644
--- a/common/Utils.cpp
+++ b/common/Utils.cpp
@@ -1581,8 +1581,8 @@
outExpectedTypes);
}
case ANEURALNETWORKS_QUANTIZED_16BIT_LSTM: {
- if (inputCount != 5 || outputCount != 5) {
- logInvalidInOutNumber(5, 5);
+ if (inputCount != 5 || outputCount != 2) {
+ logInvalidInOutNumber(5, 2);
return ANEURALNETWORKS_BAD_DATA;
}
NN_RETURN_IF_ERROR(validateHalVersion(opType, halVersion, HalVersion::V1_2));
@@ -1590,10 +1590,8 @@
OperandType::TENSOR_QUANT8_ASYMM, OperandType::TENSOR_QUANT8_ASYMM,
OperandType::TENSOR_QUANT8_ASYMM, OperandType::TENSOR_INT32,
OperandType::TENSOR_QUANT16_SYMM};
- std::vector<OperandType> outExpectedTypes = {
- OperandType::TENSOR_QUANT8_ASYMM, OperandType::TENSOR_QUANT16_SYMM,
- OperandType::TENSOR_QUANT8_ASYMM, OperandType::TENSOR_QUANT16_SYMM,
- OperandType::TENSOR_QUANT8_ASYMM};
+ std::vector<OperandType> outExpectedTypes = {OperandType::TENSOR_QUANT16_SYMM,
+ OperandType::TENSOR_QUANT8_ASYMM};
return validateOperationOperandTypes(operands, inputCount, inputIndexes,
inExpectedTypes, outputCount, outputIndexes,
outExpectedTypes);
diff --git a/common/operations/QuantizedLSTM.cpp b/common/operations/QuantizedLSTM.cpp
index 60ec091..bcbf91b 100644
--- a/common/operations/QuantizedLSTM.cpp
+++ b/common/operations/QuantizedLSTM.cpp
@@ -55,8 +55,8 @@
const Dims<4>& bias_dims, const int16_t* prevCellState_data_int16,
const Dims<4>& prevCellState_dims, int16_t* output_state_data_int16,
const Dims<4>& output_state_dims, uint8_t* output_activ_data_uint8,
- const Dims<4>& output_activ_dims, uint8_t* concatTemp_data_uint8,
- const Dims<4>& concatTemp_dims, int16_t* activ_temp_data_int16,
+ const Dims<4>& output_activ_dims, uint8_t* concat_temp_data_uint8,
+ const Dims<4>& concat_temp_dims, int16_t* activ_temp_data_int16,
const Dims<4>& activ_temp_dims, int32_t weights_zero_point,
int32_t accum_multiplier, int accum_shift) {
// Gather dimensions information, and perform consistency checks.
@@ -90,7 +90,7 @@
Dims<4> const* concat_input_arrays_dims[2] = {&input_dims, &prev_activ_dims};
tflite::reference_ops::Concatenation<tflite::FusedActivationFunctionType::kNone, uint8_t>(
0, concat_input_arrays_data, concat_input_arrays_dims, 2,
- concatTemp_data_uint8, concatTemp_dims);
+ concat_temp_data_uint8, concat_temp_dims);
// Implementation of the fully connected node inside the LSTM cell.
// The operands are 8-bit integers, the accumulators are internally 32bit
@@ -104,7 +104,7 @@
int32_t accum = bias_data_int32[out_c];
// Accumulation loop.
for (int d = 0; d < fc_accum_depth; ++d) {
- int16_t input_val = concatTemp_data_uint8[b * fc_accum_depth + d] - 128;
+ int16_t input_val = concat_temp_data_uint8[b * fc_accum_depth + d] - 128;
int16_t weights_val =
weights_data_uint8[out_c * fc_accum_depth + d] - weights_zero_point;
accum += input_val * weights_val;
@@ -207,17 +207,13 @@
bias_ = GetInput(operation, operands, kBiasTensor);
prevCellState_ = GetInput(operation, operands, kPrevCellStateTensor);
- concatTemp_ = GetOutput(operation, operands, kConcatTempTensor);
- activationTemp_ = GetOutput(operation, operands, kActivationTempTensor);
- outputStateOut_ = GetOutput(operation, operands, kOutputStateOutTensor);
cellStateOut_ = GetOutput(operation, operands, kCellStateOutTensor);
output_ = GetOutput(operation, operands, kOutputTensor);
}
bool QuantizedLSTMCell::prepare(const Operation& operation,
- std::vector<RunTimeOperandInfo>& operands, Shape* concatTempShape,
- Shape* activationTempShape, Shape* outputStateOutShape,
- Shape* cellStateOutShape, Shape* outputShape) {
+ std::vector<RunTimeOperandInfo>& operands, Shape* cellStateOutShape,
+ Shape* outputShape) {
auto input = GetInput(operation, operands, kInputTensor);
NN_CHECK_EQ(NumDimensions(input), 2);
NN_CHECK_EQ(input->scale, 1. / 128.0);
@@ -264,15 +260,6 @@
// We only support StateIntegerBits == 4
NN_CHECK(stateIntegerBits == 4);
- *concatTempShape = input->shape();
- concatTempShape->dimensions[1] = totalDepth;
-
- activationTempShape->type = OperandType::TENSOR_QUANT16_SYMM;
- activationTempShape->dimensions = {numBatches, 4 * activationDepth};
- activationTempShape->scale = prevOutput->scale;
- activationTempShape->offset = prevOutput->zeroPoint;
-
- *outputStateOutShape = prevOutput->shape();
*cellStateOutShape = prevCellState->shape();
*outputShape = prevOutput->shape();
return true;
@@ -280,6 +267,18 @@
bool QuantizedLSTMCell::eval() {
NNTRACE_COMP("QuantizedLSTM::eval");
+
+ Shape concatTempShape;
+ concatTempShape.dimensions = {SizeOfDimension(input_, 0),
+ SizeOfDimension(input_, 1) + SizeOfDimension(prevOutput_, 1)};
+
+ Shape activationTempShape;
+ activationTempShape.dimensions = {SizeOfDimension(input_, 0),
+ 4 * getSizeOfDimension(concatTempShape, 1)};
+
+ std::vector<uint8_t> concatTemp(getNumberOfElements(concatTempShape));
+ std::vector<int16_t> activationTemp(getNumberOfElements(activationTempShape));
+
// From https://arxiv.org/pdf/1712.05877, for a fully-connected layer,
// accumulator multiplier is equal to:
// (input scale) * (weights scale) / (fully-connected output scale)
@@ -301,15 +300,10 @@
GetBuffer<const int16_t>(prevCellState_), convertShapeToDims(prevCellState_->shape()),
// Outputs.
GetBuffer<int16_t>(cellStateOut_), convertShapeToDims(cellStateOut_->shape()),
- GetBuffer<uint8_t>(output_), convertShapeToDims(output_->shape()),
- GetBuffer<uint8_t>(concatTemp_), convertShapeToDims(concatTemp_->shape()),
- GetBuffer<int16_t>(activationTemp_), convertShapeToDims(activationTemp_->shape()),
- weights_->zeroPoint, accumMultiplier, accumShift);
- // Copy output_ to outputStateOut_ to use it in the next time step
- const int flat_output_size = output_->shape().dimensions[0] * output_->shape().dimensions[1];
- memcpy(outputStateOut_->buffer, output_->buffer, flat_output_size);
- outputStateOut_->scale = output_->scale;
- outputStateOut_->zeroPoint = output_->zeroPoint;
+ GetBuffer<uint8_t>(output_), convertShapeToDims(output_->shape()), concatTemp.data(),
+ convertShapeToDims(concatTempShape), activationTemp.data(),
+ convertShapeToDims(activationTempShape), weights_->zeroPoint, accumMultiplier,
+ accumShift);
return true;
}
diff --git a/common/operations/QuantizedLSTM.h b/common/operations/QuantizedLSTM.h
index 4e85ddf..6754b49 100644
--- a/common/operations/QuantizedLSTM.h
+++ b/common/operations/QuantizedLSTM.h
@@ -2,6 +2,7 @@
#define FRAMEWORKS_ML_NN_QUANTIZEDLSTM_H
#include "HalOperation.h"
+#include "OperationsUtils.h"
#include <vector>
@@ -9,7 +10,6 @@
namespace nn {
struct RunTimeOperandInfo;
-struct Shape;
class QuantizedLSTMCell {
public:
@@ -17,8 +17,7 @@
std::vector<RunTimeOperandInfo>& operands);
static bool prepare(const android::hardware::neuralnetworks::V1_2::Operation& operation,
- std::vector<RunTimeOperandInfo>& operands, Shape* concatTempShape,
- Shape* activationTempShape, Shape* outputStateShape, Shape* cellStateShape,
+ std::vector<RunTimeOperandInfo>& operands, Shape* cellStateShape,
Shape* outputShape);
bool eval();
@@ -29,11 +28,8 @@
static constexpr int kBiasTensor = 3;
static constexpr int kPrevCellStateTensor = 4;
// Outputs:
- static constexpr int kConcatTempTensor = 0;
- static constexpr int kActivationTempTensor = 1;
- static constexpr int kOutputStateOutTensor = 2;
- static constexpr int kCellStateOutTensor = 3;
- static constexpr int kOutputTensor = 4;
+ static constexpr int kCellStateOutTensor = 0;
+ static constexpr int kOutputTensor = 1;
private:
const RunTimeOperandInfo* input_;
@@ -42,9 +38,6 @@
const RunTimeOperandInfo* bias_;
const RunTimeOperandInfo* prevCellState_;
- RunTimeOperandInfo* concatTemp_;
- RunTimeOperandInfo* activationTemp_;
- RunTimeOperandInfo* outputStateOut_;
RunTimeOperandInfo* cellStateOut_;
RunTimeOperandInfo* output_;
};
diff --git a/common/operations/QuantizedLSTMTest.cpp b/common/operations/QuantizedLSTMTest.cpp
index 73e87cf..a25b00f 100644
--- a/common/operations/QuantizedLSTMTest.cpp
+++ b/common/operations/QuantizedLSTMTest.cpp
@@ -55,27 +55,15 @@
// clang-format on
std::vector<std::vector<uint32_t>> output_shapes = {
- {numBatches, inputSize + outputSize}, // concatTemp
- {numBatches, 4 * cellSize}, // activationTemp
- {numBatches, outputSize}, // outputStateOut
{numBatches, cellSize}, // cellStateOut
{numBatches, outputSize}}; // output
std::vector<uint32_t> outputs;
// clang-format off
- OperandType concatTempOperandType(Type::TENSOR_QUANT8_ASYMM, output_shapes[0],
- 1. / 128., 128);
- outputs.push_back(model_.addOperand(&concatTempOperandType));
- OperandType activationTempOperandType(Type::TENSOR_QUANT16_SYMM, output_shapes[1],
- 1. / 128., 0);
- outputs.push_back(model_.addOperand(&activationTempOperandType));
- OperandType outputStateOutOperandType(Type::TENSOR_QUANT8_ASYMM, output_shapes[2],
- 1. / 128., 128);
- outputs.push_back(model_.addOperand(&outputStateOutOperandType));
- OperandType cellStateOutOperandType(Type::TENSOR_QUANT16_SYMM, output_shapes[3],
+ OperandType cellStateOutOperandType(Type::TENSOR_QUANT16_SYMM, output_shapes[0],
1. / 2048., 0);
outputs.push_back(model_.addOperand(&cellStateOutOperandType));
- OperandType outputOperandType(Type::TENSOR_QUANT8_ASYMM, output_shapes[4],
+ OperandType outputOperandType(Type::TENSOR_QUANT8_ASYMM, output_shapes[1],
1. / 128., 128);
outputs.push_back(model_.addOperand(&outputOperandType));
// clang-format on
@@ -87,11 +75,8 @@
prevOutput_.insert(prevOutput_.end(), numBatches * outputSize, 128);
prevCellState_.insert(prevCellState_.end(), numBatches * cellSize, 0);
- reserveOutputTensor(&concatTemp_, output_shapes[0]);
- reserveOutputTensor(&activationTemp_, output_shapes[1]);
- reserveOutputTensor(&outputStateOut_, output_shapes[2]);
- reserveOutputTensor(&cellStateOut_, output_shapes[3]);
- reserveOutputTensor(&output_, output_shapes[4]);
+ reserveOutputTensor(&cellStateOut_, output_shapes[0]);
+ reserveOutputTensor(&output_, output_shapes[1]);
model_.finish();
}
@@ -116,14 +101,6 @@
setInputTensor(&execution, QuantizedLSTMCell::kPrevCellStateTensor, prevCellState_),
Result::NO_ERROR);
// Set all the outputs.
- ASSERT_EQ(setOutputTensor(&execution, QuantizedLSTMCell::kConcatTempTensor, &concatTemp_),
- Result::NO_ERROR);
- ASSERT_EQ(setOutputTensor(&execution, QuantizedLSTMCell::kActivationTempTensor,
- &activationTemp_),
- Result::NO_ERROR);
- ASSERT_EQ(setOutputTensor(&execution, QuantizedLSTMCell::kOutputStateOutTensor,
- &outputStateOut_),
- Result::NO_ERROR);
ASSERT_EQ(
setOutputTensor(&execution, QuantizedLSTMCell::kCellStateOutTensor, &cellStateOut_),
Result::NO_ERROR);
@@ -133,8 +110,8 @@
ASSERT_EQ(execution.compute(), Result::NO_ERROR);
// Put state outputs into inputs for the next step
- prevOutput_.swap(outputStateOut_);
- prevCellState_.swap(cellStateOut_);
+ prevOutput_ = output_;
+ prevCellState_ = cellStateOut_;
}
int inputSize() { return inputSize_; }
@@ -153,9 +130,6 @@
std::vector<int32_t> bias_;
std::vector<int16_t> prevCellState_;
// Outputs
- std::vector<uint8_t> concatTemp_;
- std::vector<int16_t> activationTemp_;
- std::vector<uint8_t> outputStateOut_;
std::vector<int16_t> cellStateOut_;
std::vector<uint8_t> output_;
diff --git a/runtime/include/NeuralNetworks.h b/runtime/include/NeuralNetworks.h
index 9ab44c7..731721e 100644
--- a/runtime/include/NeuralNetworks.h
+++ b/runtime/include/NeuralNetworks.h
@@ -3009,28 +3009,12 @@
* 32768, zeroPoint = 0).
*
* Outputs:
- * * 0: A 2-D tensor of type {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
- * and shape [numBatches, inputSize + outputSize]. This tensor is a
- * scratch buffer used to store concatenation of the input and the
- * output from previous time step to pass it to the fully-connected
- * layer. Tensor is quantized with a fixed quantization range of
- * [-1, 127/128] (scale = 1/128, zeroPoint = 128).
- * * 1: A 2-D tensor of type {@link ANEURALNETWORKS_TENSOR_QUANT16_SYMM}
- * and shape [numBatches, 4 * cellSize]. This tensor is a scratch
- * buffer used to store the result of the fully-connected layer.
- * * 2: A 2-D tensor of type {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
- * and shape [numBathes, outputSize] which contains a copy of the
- * output from the current time step. NN API requires this tensor to
- * pass the output value through time. Tensor is quantized with a fixed
- * quantization range of [-1, 127/128] (scale = 1/128, zeroPoint =
- * 128).
- * * 3: A 2-D tensor of type {@link ANEURALNETWORKS_TENSOR_QUANT16_SYMM}
+ * * 0: A 2-D tensor of type {@link ANEURALNETWORKS_TENSOR_QUANT16_SYMM}
* and shape [numBatches, cellSize] which contains a cell state from
- * the current time step. NN API requires this tensor to pass the cell
- * state value through time. Tensor is quantized using a quantization
+ * the current time step. Tensor is quantized using a quantization
* range of [-2^4, 2^4 * 32767/32768] (scale = 2^4 / 32768, zeroPoint =
* 0).
- * * 4: A 2-D tensor of type {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ * * 1: A 2-D tensor of type {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
* and shape [numBathes, outputSize] which contains the output value.
* Tensor is quantized with a fixed quantization range of [-1, 127/128]
* (scale = 1/128, zeroPoint = 128).
diff --git a/runtime/test/generated/examples/quantized_lstm.example.cpp b/runtime/test/generated/examples/quantized_lstm.example.cpp
index 0a2d3b8..126ae86 100644
--- a/runtime/test/generated/examples/quantized_lstm.example.cpp
+++ b/runtime/test/generated/examples/quantized_lstm.example.cpp
@@ -29,9 +29,9 @@
// int -> INT32 map
{},
// int -> QUANT8_ASYMM map
- {{0, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, {2, {140, 151, 146, 112, 136, 156, 142, 112}}, {4, {140, 151, 146, 112, 136, 156, 142, 112}}},
+ {{1, {140, 151, 146, 112, 136, 156, 142, 112}}},
// int -> QUANT16_SYMM map
- {{1, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, {3, {1485, 1177, 1373, -1023, 1019, 1355, 1097, -1235}}},
+ {{0, {1485, 1177, 1373, -1023, 1019, 1355, 1097, -1235}}},
// int -> FLOAT16 map
{},
// int -> BOOL8 map
diff --git a/runtime/test/generated/models/quantized_lstm.model.cpp b/runtime/test/generated/models/quantized_lstm.model.cpp
index 0e9e846..6aa73fd 100644
--- a/runtime/test/generated/models/quantized_lstm.model.cpp
+++ b/runtime/test/generated/models/quantized_lstm.model.cpp
@@ -6,30 +6,25 @@
OperandType type2(Type::TENSOR_QUANT8_ASYMM, {16, 6}, 0.00408021f, 100);
OperandType type3(Type::TENSOR_INT32, {16}, 3.1876640625e-05f, 0);
OperandType type4(Type::TENSOR_QUANT16_SYMM, {2, 4}, 0.00048828125f, 0);
- OperandType type5(Type::TENSOR_QUANT8_ASYMM, {2, 6}, 0.0078125f, 128);
- OperandType type6(Type::TENSOR_QUANT16_SYMM, {2, 16}, 0.0078125f, 0);
// Phase 1, operands
auto input = model->addOperand(&type0);
auto prevOutput = model->addOperand(&type1);
auto weights = model->addOperand(&type2);
auto bias = model->addOperand(&type3);
auto prevCellState = model->addOperand(&type4);
- auto concatTemp = model->addOperand(&type5);
- auto activationTemp = model->addOperand(&type6);
- auto outputStateOut = model->addOperand(&type1);
auto cellStateOut = model->addOperand(&type4);
auto output = model->addOperand(&type1);
// Phase 2, operations
- model->addOperation(ANEURALNETWORKS_QUANTIZED_16BIT_LSTM, {input, prevOutput, weights, bias, prevCellState}, {concatTemp, activationTemp, outputStateOut, cellStateOut, output});
+ model->addOperation(ANEURALNETWORKS_QUANTIZED_16BIT_LSTM, {input, prevOutput, weights, bias, prevCellState}, {cellStateOut, output});
// Phase 3, inputs and outputs
model->identifyInputsAndOutputs(
{input, prevOutput, weights, bias, prevCellState},
- {concatTemp, activationTemp, outputStateOut, cellStateOut, output});
+ {cellStateOut, output});
assert(model->isValid());
}
inline bool is_ignored(int i) {
- static std::set<int> ignore = {0, 1};
+ static std::set<int> ignore = {};
return ignore.find(i) != ignore.end();
}
diff --git a/runtime/test/generated/vts_models/quantized_lstm.model.cpp b/runtime/test/generated/vts_models/quantized_lstm.model.cpp
index a91645c..b26dd9e 100644
--- a/runtime/test/generated/vts_models/quantized_lstm.model.cpp
+++ b/runtime/test/generated/vts_models/quantized_lstm.model.cpp
@@ -49,33 +49,6 @@
.location = {.poolIndex = 0, .offset = 0, .length = 0},
},
{
- .type = OperandType::TENSOR_QUANT8_ASYMM,
- .dimensions = {2, 6},
- .numberOfConsumers = 0,
- .scale = 0.0078125f,
- .zeroPoint = 128,
- .lifetime = OperandLifeTime::MODEL_OUTPUT,
- .location = {.poolIndex = 0, .offset = 0, .length = 0},
- },
- {
- .type = OperandType::TENSOR_QUANT16_SYMM,
- .dimensions = {2, 16},
- .numberOfConsumers = 0,
- .scale = 0.0078125f,
- .zeroPoint = 0,
- .lifetime = OperandLifeTime::MODEL_OUTPUT,
- .location = {.poolIndex = 0, .offset = 0, .length = 0},
- },
- {
- .type = OperandType::TENSOR_QUANT8_ASYMM,
- .dimensions = {2, 4},
- .numberOfConsumers = 0,
- .scale = 0.0078125f,
- .zeroPoint = 128,
- .lifetime = OperandLifeTime::MODEL_OUTPUT,
- .location = {.poolIndex = 0, .offset = 0, .length = 0},
- },
- {
.type = OperandType::TENSOR_QUANT16_SYMM,
.dimensions = {2, 4},
.numberOfConsumers = 0,
@@ -99,12 +72,12 @@
{
.type = OperationType::QUANTIZED_16BIT_LSTM,
.inputs = {0, 1, 2, 3, 4},
- .outputs = {5, 6, 7, 8, 9},
+ .outputs = {5, 6},
}
};
const std::vector<uint32_t> inputIndexes = {0, 1, 2, 3, 4};
- const std::vector<uint32_t> outputIndexes = {5, 6, 7, 8, 9};
+ const std::vector<uint32_t> outputIndexes = {5, 6};
std::vector<uint8_t> operandValues = {};
const std::vector<hidl_memory> pools = {};
@@ -119,7 +92,7 @@
}
inline bool is_ignored(int i) {
- static std::set<int> ignore = {0, 1};
+ static std::set<int> ignore = {};
return ignore.find(i) != ignore.end();
}
diff --git a/runtime/test/specs/V1_2/quantized_lstm.mod.py b/runtime/test/specs/V1_2/quantized_lstm.mod.py
index 09ec0a1..618c75e 100644
--- a/runtime/test/specs/V1_2/quantized_lstm.mod.py
+++ b/runtime/test/specs/V1_2/quantized_lstm.mod.py
@@ -29,9 +29,6 @@
bias = Input("bias", ("TENSOR_INT32", (4 * n_cell,), weights_scale / 128, 0))
prev_cell_state = Input("prevCellState", ("TENSOR_QUANT16_SYMM", (n_batch, n_cell), 1 / 2048, 0))
-concat_temp = IgnoredOutput("concatTemp", ("TENSOR_QUANT8_ASYMM", (n_batch, n_input + n_output), 1 / 128, 128))
-activation_temp = IgnoredOutput("activationTemp", ("TENSOR_QUANT16_SYMM", (n_batch, 4 * n_cell), 1 / 128, 0))
-output_state_out = Output("outputStateOut", ("TENSOR_QUANT8_ASYMM", (n_batch, n_output), 1 / 128, 128))
cell_state_out = Output("cellStateOut", ("TENSOR_QUANT16_SYMM", (n_batch, n_cell), 1 / 2048, 0))
output = Output("output", ("TENSOR_QUANT8_ASYMM", (n_batch, n_output), 1 / 128, 128))
@@ -42,7 +39,7 @@
weights,
bias,
prev_cell_state
-).To([concat_temp, activation_temp, output_state_out, cell_state_out, output])
+).To([cell_state_out, output])
input_dict = {
input_: [166, 179, 50, 150],
@@ -68,12 +65,8 @@
prev_cell_state: [876, 1034, 955, -909, 761, 1029, 796, -1036]
}
-golden_output = [140, 151, 146, 112, 136, 156, 142, 112]
output_dict = {
- concat_temp: [0] * (n_batch * (n_input + n_output)),
- activation_temp: [0] * (n_batch * 4 * n_cell),
- output_state_out: golden_output,
cell_state_out: [1485, 1177, 1373, -1023, 1019, 1355, 1097, -1235],
- output: golden_output
+ output: [140, 151, 146, 112, 136, 156, 142, 112]
}
Example((input_dict, output_dict), model=model)