Remove scratch buffers and redundant output from quantized lstm

* Remove scratch buffers
* Remove redundant state output
* Update tests

Test: NeuralNetworksTest_operations
Test: NeuralNetworksTest_static
Bug: 113561892
Change-Id: I93cb7567b78c10e2c2c75c1b5ada80516b427706
Merged-In: I93cb7567b78c10e2c2c75c1b5ada80516b427706
(cherry picked from commit 42fc2f949317f2941eb0c626db0fe4f5d5817b32)
diff --git a/common/CpuExecutor.cpp b/common/CpuExecutor.cpp
index ea51177..e321475 100644
--- a/common/CpuExecutor.cpp
+++ b/common/CpuExecutor.cpp
@@ -2503,29 +2503,19 @@
                                outShape);
         } break;
         case OperationType::QUANTIZED_16BIT_LSTM: {
-            if (!allParametersPresent(5, 5)) {
+            if (!allParametersPresent(5, 2)) {
                 return ANEURALNETWORKS_BAD_DATA;
             }
 
-            RunTimeOperandInfo& concatTemp = mOperands[outs[QuantizedLSTMCell::kConcatTempTensor]];
-            RunTimeOperandInfo& activationTemp =
-                    mOperands[outs[QuantizedLSTMCell::kActivationTempTensor]];
-            RunTimeOperandInfo& outputStateOut =
-                    mOperands[outs[QuantizedLSTMCell::kOutputStateOutTensor]];
             RunTimeOperandInfo& cellStateOut =
                     mOperands[outs[QuantizedLSTMCell::kCellStateOutTensor]];
             RunTimeOperandInfo& output = mOperands[outs[QuantizedLSTMCell::kOutputTensor]];
 
-            Shape concatTempShape, activationTempShape, outputStateOutShape, cellStateOutShape,
-                    outputShape;
+            Shape cellStateOutShape, outputShape;
             QuantizedLSTMCell quantizedLSTMCell(operation, mOperands);
 
-            success = QuantizedLSTMCell::prepare(operation, mOperands, &concatTempShape,
-                                                 &activationTempShape, &outputStateOutShape,
-                                                 &cellStateOutShape, &outputShape) &&
-                      setInfoAndAllocateIfNeeded(&concatTemp, concatTempShape) &&
-                      setInfoAndAllocateIfNeeded(&activationTemp, activationTempShape) &&
-                      setInfoAndAllocateIfNeeded(&outputStateOut, outputStateOutShape) &&
+            success = QuantizedLSTMCell::prepare(operation, mOperands, &cellStateOutShape,
+                                                 &outputShape) &&
                       setInfoAndAllocateIfNeeded(&cellStateOut, cellStateOutShape) &&
                       setInfoAndAllocateIfNeeded(&output, outputShape) && quantizedLSTMCell.eval();
         } break;
diff --git a/common/Utils.cpp b/common/Utils.cpp
index a26ff1e..45c7bd5 100644
--- a/common/Utils.cpp
+++ b/common/Utils.cpp
@@ -1581,8 +1581,8 @@
                                                  outExpectedTypes);
         }
         case ANEURALNETWORKS_QUANTIZED_16BIT_LSTM: {
-            if (inputCount != 5 || outputCount != 5) {
-                logInvalidInOutNumber(5, 5);
+            if (inputCount != 5 || outputCount != 2) {
+                logInvalidInOutNumber(5, 2);
                 return ANEURALNETWORKS_BAD_DATA;
             }
             NN_RETURN_IF_ERROR(validateHalVersion(opType, halVersion, HalVersion::V1_2));
@@ -1590,10 +1590,8 @@
                     OperandType::TENSOR_QUANT8_ASYMM, OperandType::TENSOR_QUANT8_ASYMM,
                     OperandType::TENSOR_QUANT8_ASYMM, OperandType::TENSOR_INT32,
                     OperandType::TENSOR_QUANT16_SYMM};
-            std::vector<OperandType> outExpectedTypes = {
-                    OperandType::TENSOR_QUANT8_ASYMM, OperandType::TENSOR_QUANT16_SYMM,
-                    OperandType::TENSOR_QUANT8_ASYMM, OperandType::TENSOR_QUANT16_SYMM,
-                    OperandType::TENSOR_QUANT8_ASYMM};
+            std::vector<OperandType> outExpectedTypes = {OperandType::TENSOR_QUANT16_SYMM,
+                                                         OperandType::TENSOR_QUANT8_ASYMM};
             return validateOperationOperandTypes(operands, inputCount, inputIndexes,
                                                  inExpectedTypes, outputCount, outputIndexes,
                                                  outExpectedTypes);
diff --git a/common/operations/QuantizedLSTM.cpp b/common/operations/QuantizedLSTM.cpp
index 60ec091..bcbf91b 100644
--- a/common/operations/QuantizedLSTM.cpp
+++ b/common/operations/QuantizedLSTM.cpp
@@ -55,8 +55,8 @@
                        const Dims<4>& bias_dims, const int16_t* prevCellState_data_int16,
                        const Dims<4>& prevCellState_dims, int16_t* output_state_data_int16,
                        const Dims<4>& output_state_dims, uint8_t* output_activ_data_uint8,
-                       const Dims<4>& output_activ_dims, uint8_t* concatTemp_data_uint8,
-                       const Dims<4>& concatTemp_dims, int16_t* activ_temp_data_int16,
+                       const Dims<4>& output_activ_dims, uint8_t* concat_temp_data_uint8,
+                       const Dims<4>& concat_temp_dims, int16_t* activ_temp_data_int16,
                        const Dims<4>& activ_temp_dims, int32_t weights_zero_point,
                        int32_t accum_multiplier, int accum_shift) {
   // Gather dimensions information, and perform consistency checks.
@@ -90,7 +90,7 @@
   Dims<4> const* concat_input_arrays_dims[2] = {&input_dims, &prev_activ_dims};
   tflite::reference_ops::Concatenation<tflite::FusedActivationFunctionType::kNone, uint8_t>(
       0, concat_input_arrays_data, concat_input_arrays_dims, 2,
-      concatTemp_data_uint8, concatTemp_dims);
+      concat_temp_data_uint8, concat_temp_dims);
 
   // Implementation of the fully connected node inside the LSTM cell.
   // The operands are 8-bit integers, the accumulators are internally 32bit
@@ -104,7 +104,7 @@
       int32_t accum = bias_data_int32[out_c];
       // Accumulation loop.
       for (int d = 0; d < fc_accum_depth; ++d) {
-        int16_t input_val = concatTemp_data_uint8[b * fc_accum_depth + d] - 128;
+        int16_t input_val = concat_temp_data_uint8[b * fc_accum_depth + d] - 128;
         int16_t weights_val =
             weights_data_uint8[out_c * fc_accum_depth + d] - weights_zero_point;
         accum += input_val * weights_val;
@@ -207,17 +207,13 @@
     bias_ = GetInput(operation, operands, kBiasTensor);
     prevCellState_ = GetInput(operation, operands, kPrevCellStateTensor);
 
-    concatTemp_ = GetOutput(operation, operands, kConcatTempTensor);
-    activationTemp_ = GetOutput(operation, operands, kActivationTempTensor);
-    outputStateOut_ = GetOutput(operation, operands, kOutputStateOutTensor);
     cellStateOut_ = GetOutput(operation, operands, kCellStateOutTensor);
     output_ = GetOutput(operation, operands, kOutputTensor);
 }
 
 bool QuantizedLSTMCell::prepare(const Operation& operation,
-                                std::vector<RunTimeOperandInfo>& operands, Shape* concatTempShape,
-                                Shape* activationTempShape, Shape* outputStateOutShape,
-                                Shape* cellStateOutShape, Shape* outputShape) {
+                                std::vector<RunTimeOperandInfo>& operands, Shape* cellStateOutShape,
+                                Shape* outputShape) {
     auto input = GetInput(operation, operands, kInputTensor);
     NN_CHECK_EQ(NumDimensions(input), 2);
     NN_CHECK_EQ(input->scale, 1. / 128.0);
@@ -264,15 +260,6 @@
     // We only support StateIntegerBits == 4
     NN_CHECK(stateIntegerBits == 4);
 
-    *concatTempShape = input->shape();
-    concatTempShape->dimensions[1] = totalDepth;
-
-    activationTempShape->type = OperandType::TENSOR_QUANT16_SYMM;
-    activationTempShape->dimensions = {numBatches, 4 * activationDepth};
-    activationTempShape->scale = prevOutput->scale;
-    activationTempShape->offset = prevOutput->zeroPoint;
-
-    *outputStateOutShape = prevOutput->shape();
     *cellStateOutShape = prevCellState->shape();
     *outputShape = prevOutput->shape();
     return true;
@@ -280,6 +267,18 @@
 
 bool QuantizedLSTMCell::eval() {
     NNTRACE_COMP("QuantizedLSTM::eval");
+
+    Shape concatTempShape;
+    concatTempShape.dimensions = {SizeOfDimension(input_, 0),
+                                  SizeOfDimension(input_, 1) + SizeOfDimension(prevOutput_, 1)};
+
+    Shape activationTempShape;
+    activationTempShape.dimensions = {SizeOfDimension(input_, 0),
+                                      4 * getSizeOfDimension(concatTempShape, 1)};
+
+    std::vector<uint8_t> concatTemp(getNumberOfElements(concatTempShape));
+    std::vector<int16_t> activationTemp(getNumberOfElements(activationTempShape));
+
     // From https://arxiv.org/pdf/1712.05877, for a fully-connected layer,
     // accumulator multiplier is equal to:
     // (input scale) * (weights scale) / (fully-connected output scale)
@@ -301,15 +300,10 @@
             GetBuffer<const int16_t>(prevCellState_), convertShapeToDims(prevCellState_->shape()),
             // Outputs.
             GetBuffer<int16_t>(cellStateOut_), convertShapeToDims(cellStateOut_->shape()),
-            GetBuffer<uint8_t>(output_), convertShapeToDims(output_->shape()),
-            GetBuffer<uint8_t>(concatTemp_), convertShapeToDims(concatTemp_->shape()),
-            GetBuffer<int16_t>(activationTemp_), convertShapeToDims(activationTemp_->shape()),
-            weights_->zeroPoint, accumMultiplier, accumShift);
-    // Copy output_ to outputStateOut_ to use it in the next time step
-    const int flat_output_size = output_->shape().dimensions[0] * output_->shape().dimensions[1];
-    memcpy(outputStateOut_->buffer, output_->buffer, flat_output_size);
-    outputStateOut_->scale = output_->scale;
-    outputStateOut_->zeroPoint = output_->zeroPoint;
+            GetBuffer<uint8_t>(output_), convertShapeToDims(output_->shape()), concatTemp.data(),
+            convertShapeToDims(concatTempShape), activationTemp.data(),
+            convertShapeToDims(activationTempShape), weights_->zeroPoint, accumMultiplier,
+            accumShift);
     return true;
 }
 
diff --git a/common/operations/QuantizedLSTM.h b/common/operations/QuantizedLSTM.h
index 4e85ddf..6754b49 100644
--- a/common/operations/QuantizedLSTM.h
+++ b/common/operations/QuantizedLSTM.h
@@ -2,6 +2,7 @@
 #define FRAMEWORKS_ML_NN_QUANTIZEDLSTM_H
 
 #include "HalOperation.h"
+#include "OperationsUtils.h"
 
 #include <vector>
 
@@ -9,7 +10,6 @@
 namespace nn {
 
 struct RunTimeOperandInfo;
-struct Shape;
 
 class QuantizedLSTMCell {
    public:
@@ -17,8 +17,7 @@
                       std::vector<RunTimeOperandInfo>& operands);
 
     static bool prepare(const android::hardware::neuralnetworks::V1_2::Operation& operation,
-                        std::vector<RunTimeOperandInfo>& operands, Shape* concatTempShape,
-                        Shape* activationTempShape, Shape* outputStateShape, Shape* cellStateShape,
+                        std::vector<RunTimeOperandInfo>& operands, Shape* cellStateShape,
                         Shape* outputShape);
     bool eval();
 
@@ -29,11 +28,8 @@
     static constexpr int kBiasTensor = 3;
     static constexpr int kPrevCellStateTensor = 4;
     // Outputs:
-    static constexpr int kConcatTempTensor = 0;
-    static constexpr int kActivationTempTensor = 1;
-    static constexpr int kOutputStateOutTensor = 2;
-    static constexpr int kCellStateOutTensor = 3;
-    static constexpr int kOutputTensor = 4;
+    static constexpr int kCellStateOutTensor = 0;
+    static constexpr int kOutputTensor = 1;
 
    private:
     const RunTimeOperandInfo* input_;
@@ -42,9 +38,6 @@
     const RunTimeOperandInfo* bias_;
     const RunTimeOperandInfo* prevCellState_;
 
-    RunTimeOperandInfo* concatTemp_;
-    RunTimeOperandInfo* activationTemp_;
-    RunTimeOperandInfo* outputStateOut_;
     RunTimeOperandInfo* cellStateOut_;
     RunTimeOperandInfo* output_;
 };
diff --git a/common/operations/QuantizedLSTMTest.cpp b/common/operations/QuantizedLSTMTest.cpp
index 73e87cf..a25b00f 100644
--- a/common/operations/QuantizedLSTMTest.cpp
+++ b/common/operations/QuantizedLSTMTest.cpp
@@ -55,27 +55,15 @@
         // clang-format on
 
         std::vector<std::vector<uint32_t>> output_shapes = {
-                {numBatches, inputSize + outputSize},  // concatTemp
-                {numBatches, 4 * cellSize},            // activationTemp
-                {numBatches, outputSize},              // outputStateOut
                 {numBatches, cellSize},                // cellStateOut
                 {numBatches, outputSize}};             // output
 
         std::vector<uint32_t> outputs;
         // clang-format off
-        OperandType concatTempOperandType(Type::TENSOR_QUANT8_ASYMM, output_shapes[0],
-                                          1. / 128., 128);
-        outputs.push_back(model_.addOperand(&concatTempOperandType));
-        OperandType activationTempOperandType(Type::TENSOR_QUANT16_SYMM, output_shapes[1],
-                                              1. / 128., 0);
-        outputs.push_back(model_.addOperand(&activationTempOperandType));
-        OperandType outputStateOutOperandType(Type::TENSOR_QUANT8_ASYMM, output_shapes[2],
-                                              1. / 128., 128);
-        outputs.push_back(model_.addOperand(&outputStateOutOperandType));
-        OperandType cellStateOutOperandType(Type::TENSOR_QUANT16_SYMM, output_shapes[3],
+        OperandType cellStateOutOperandType(Type::TENSOR_QUANT16_SYMM, output_shapes[0],
                                             1. / 2048., 0);
         outputs.push_back(model_.addOperand(&cellStateOutOperandType));
-        OperandType outputOperandType(Type::TENSOR_QUANT8_ASYMM, output_shapes[4],
+        OperandType outputOperandType(Type::TENSOR_QUANT8_ASYMM, output_shapes[1],
                                       1. / 128., 128);
         outputs.push_back(model_.addOperand(&outputOperandType));
         // clang-format on
@@ -87,11 +75,8 @@
         prevOutput_.insert(prevOutput_.end(), numBatches * outputSize, 128);
         prevCellState_.insert(prevCellState_.end(), numBatches * cellSize, 0);
 
-        reserveOutputTensor(&concatTemp_, output_shapes[0]);
-        reserveOutputTensor(&activationTemp_, output_shapes[1]);
-        reserveOutputTensor(&outputStateOut_, output_shapes[2]);
-        reserveOutputTensor(&cellStateOut_, output_shapes[3]);
-        reserveOutputTensor(&output_, output_shapes[4]);
+        reserveOutputTensor(&cellStateOut_, output_shapes[0]);
+        reserveOutputTensor(&output_, output_shapes[1]);
 
         model_.finish();
     }
@@ -116,14 +101,6 @@
                 setInputTensor(&execution, QuantizedLSTMCell::kPrevCellStateTensor, prevCellState_),
                 Result::NO_ERROR);
         // Set all the outputs.
-        ASSERT_EQ(setOutputTensor(&execution, QuantizedLSTMCell::kConcatTempTensor, &concatTemp_),
-                  Result::NO_ERROR);
-        ASSERT_EQ(setOutputTensor(&execution, QuantizedLSTMCell::kActivationTempTensor,
-                                  &activationTemp_),
-                  Result::NO_ERROR);
-        ASSERT_EQ(setOutputTensor(&execution, QuantizedLSTMCell::kOutputStateOutTensor,
-                                  &outputStateOut_),
-                  Result::NO_ERROR);
         ASSERT_EQ(
                 setOutputTensor(&execution, QuantizedLSTMCell::kCellStateOutTensor, &cellStateOut_),
                 Result::NO_ERROR);
@@ -133,8 +110,8 @@
         ASSERT_EQ(execution.compute(), Result::NO_ERROR);
 
         // Put state outputs into inputs for the next step
-        prevOutput_.swap(outputStateOut_);
-        prevCellState_.swap(cellStateOut_);
+        prevOutput_ = output_;
+        prevCellState_ = cellStateOut_;
     }
 
     int inputSize() { return inputSize_; }
@@ -153,9 +130,6 @@
     std::vector<int32_t> bias_;
     std::vector<int16_t> prevCellState_;
     // Outputs
-    std::vector<uint8_t> concatTemp_;
-    std::vector<int16_t> activationTemp_;
-    std::vector<uint8_t> outputStateOut_;
     std::vector<int16_t> cellStateOut_;
     std::vector<uint8_t> output_;
 
diff --git a/runtime/include/NeuralNetworks.h b/runtime/include/NeuralNetworks.h
index 9ab44c7..731721e 100644
--- a/runtime/include/NeuralNetworks.h
+++ b/runtime/include/NeuralNetworks.h
@@ -3009,28 +3009,12 @@
      *      32768, zeroPoint = 0).
      *
      * Outputs:
-     * * 0: A 2-D tensor of type {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
-     *      and shape [numBatches, inputSize + outputSize]. This tensor is a
-     *      scratch buffer used to store concatenation of the input and the
-     *      output from previous time step to pass it to the fully-connected
-     *      layer. Tensor is quantized with a fixed quantization range of
-     *      [-1, 127/128] (scale = 1/128, zeroPoint = 128).
-     * * 1: A 2-D tensor of type {@link ANEURALNETWORKS_TENSOR_QUANT16_SYMM}
-     *      and shape [numBatches, 4 * cellSize]. This tensor is a scratch
-     *      buffer used to store the result of the fully-connected layer.
-     * * 2: A 2-D tensor of type {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
-     *      and shape [numBathes, outputSize] which contains a copy of the
-     *      output from the current time step. NN API requires this tensor to
-     *      pass the output value through time. Tensor is quantized with a fixed
-     *      quantization range of [-1, 127/128] (scale = 1/128, zeroPoint =
-     *      128).
-     * * 3: A 2-D tensor of type {@link ANEURALNETWORKS_TENSOR_QUANT16_SYMM}
+     * * 0: A 2-D tensor of type {@link ANEURALNETWORKS_TENSOR_QUANT16_SYMM}
      *      and shape [numBatches, cellSize] which contains a cell state from
-     *      the current time step. NN API requires this tensor to pass the cell
-     *      state value through time. Tensor is quantized using a quantization
+     *      the current time step. Tensor is quantized using a quantization
      *      range of [-2^4, 2^4 * 32767/32768] (scale = 2^4 / 32768, zeroPoint =
      *      0).
-     * * 4: A 2-D tensor of type {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * * 1: A 2-D tensor of type {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
      *      and shape [numBathes, outputSize] which contains the output value.
      *      Tensor is quantized with a fixed quantization range of [-1, 127/128]
      *      (scale = 1/128, zeroPoint = 128).
diff --git a/runtime/test/generated/examples/quantized_lstm.example.cpp b/runtime/test/generated/examples/quantized_lstm.example.cpp
index 0a2d3b8..126ae86 100644
--- a/runtime/test/generated/examples/quantized_lstm.example.cpp
+++ b/runtime/test/generated/examples/quantized_lstm.example.cpp
@@ -29,9 +29,9 @@
   // int -> INT32 map
   {},
   // int -> QUANT8_ASYMM map
-  {{0, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, {2, {140, 151, 146, 112, 136, 156, 142, 112}}, {4, {140, 151, 146, 112, 136, 156, 142, 112}}},
+  {{1, {140, 151, 146, 112, 136, 156, 142, 112}}},
   // int -> QUANT16_SYMM map
-  {{1, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, {3, {1485, 1177, 1373, -1023, 1019, 1355, 1097, -1235}}},
+  {{0, {1485, 1177, 1373, -1023, 1019, 1355, 1097, -1235}}},
   // int -> FLOAT16 map
   {},
   // int -> BOOL8 map
diff --git a/runtime/test/generated/models/quantized_lstm.model.cpp b/runtime/test/generated/models/quantized_lstm.model.cpp
index 0e9e846..6aa73fd 100644
--- a/runtime/test/generated/models/quantized_lstm.model.cpp
+++ b/runtime/test/generated/models/quantized_lstm.model.cpp
@@ -6,30 +6,25 @@
   OperandType type2(Type::TENSOR_QUANT8_ASYMM, {16, 6}, 0.00408021f, 100);
   OperandType type3(Type::TENSOR_INT32, {16}, 3.1876640625e-05f, 0);
   OperandType type4(Type::TENSOR_QUANT16_SYMM, {2, 4}, 0.00048828125f, 0);
-  OperandType type5(Type::TENSOR_QUANT8_ASYMM, {2, 6}, 0.0078125f, 128);
-  OperandType type6(Type::TENSOR_QUANT16_SYMM, {2, 16}, 0.0078125f, 0);
   // Phase 1, operands
   auto input = model->addOperand(&type0);
   auto prevOutput = model->addOperand(&type1);
   auto weights = model->addOperand(&type2);
   auto bias = model->addOperand(&type3);
   auto prevCellState = model->addOperand(&type4);
-  auto concatTemp = model->addOperand(&type5);
-  auto activationTemp = model->addOperand(&type6);
-  auto outputStateOut = model->addOperand(&type1);
   auto cellStateOut = model->addOperand(&type4);
   auto output = model->addOperand(&type1);
   // Phase 2, operations
-  model->addOperation(ANEURALNETWORKS_QUANTIZED_16BIT_LSTM, {input, prevOutput, weights, bias, prevCellState}, {concatTemp, activationTemp, outputStateOut, cellStateOut, output});
+  model->addOperation(ANEURALNETWORKS_QUANTIZED_16BIT_LSTM, {input, prevOutput, weights, bias, prevCellState}, {cellStateOut, output});
   // Phase 3, inputs and outputs
   model->identifyInputsAndOutputs(
     {input, prevOutput, weights, bias, prevCellState},
-    {concatTemp, activationTemp, outputStateOut, cellStateOut, output});
+    {cellStateOut, output});
   assert(model->isValid());
 }
 
 inline bool is_ignored(int i) {
-  static std::set<int> ignore = {0, 1};
+  static std::set<int> ignore = {};
   return ignore.find(i) != ignore.end();
 }
 
diff --git a/runtime/test/generated/vts_models/quantized_lstm.model.cpp b/runtime/test/generated/vts_models/quantized_lstm.model.cpp
index a91645c..b26dd9e 100644
--- a/runtime/test/generated/vts_models/quantized_lstm.model.cpp
+++ b/runtime/test/generated/vts_models/quantized_lstm.model.cpp
@@ -49,33 +49,6 @@
             .location = {.poolIndex = 0, .offset = 0, .length = 0},
         },
         {
-            .type = OperandType::TENSOR_QUANT8_ASYMM,
-            .dimensions = {2, 6},
-            .numberOfConsumers = 0,
-            .scale = 0.0078125f,
-            .zeroPoint = 128,
-            .lifetime = OperandLifeTime::MODEL_OUTPUT,
-            .location = {.poolIndex = 0, .offset = 0, .length = 0},
-        },
-        {
-            .type = OperandType::TENSOR_QUANT16_SYMM,
-            .dimensions = {2, 16},
-            .numberOfConsumers = 0,
-            .scale = 0.0078125f,
-            .zeroPoint = 0,
-            .lifetime = OperandLifeTime::MODEL_OUTPUT,
-            .location = {.poolIndex = 0, .offset = 0, .length = 0},
-        },
-        {
-            .type = OperandType::TENSOR_QUANT8_ASYMM,
-            .dimensions = {2, 4},
-            .numberOfConsumers = 0,
-            .scale = 0.0078125f,
-            .zeroPoint = 128,
-            .lifetime = OperandLifeTime::MODEL_OUTPUT,
-            .location = {.poolIndex = 0, .offset = 0, .length = 0},
-        },
-        {
             .type = OperandType::TENSOR_QUANT16_SYMM,
             .dimensions = {2, 4},
             .numberOfConsumers = 0,
@@ -99,12 +72,12 @@
         {
             .type = OperationType::QUANTIZED_16BIT_LSTM,
             .inputs = {0, 1, 2, 3, 4},
-            .outputs = {5, 6, 7, 8, 9},
+            .outputs = {5, 6},
         }
     };
 
     const std::vector<uint32_t> inputIndexes = {0, 1, 2, 3, 4};
-    const std::vector<uint32_t> outputIndexes = {5, 6, 7, 8, 9};
+    const std::vector<uint32_t> outputIndexes = {5, 6};
     std::vector<uint8_t> operandValues = {};
     const std::vector<hidl_memory> pools = {};
 
@@ -119,7 +92,7 @@
 }
 
 inline bool is_ignored(int i) {
-  static std::set<int> ignore = {0, 1};
+  static std::set<int> ignore = {};
   return ignore.find(i) != ignore.end();
 }
 
diff --git a/runtime/test/specs/V1_2/quantized_lstm.mod.py b/runtime/test/specs/V1_2/quantized_lstm.mod.py
index 09ec0a1..618c75e 100644
--- a/runtime/test/specs/V1_2/quantized_lstm.mod.py
+++ b/runtime/test/specs/V1_2/quantized_lstm.mod.py
@@ -29,9 +29,6 @@
 bias = Input("bias", ("TENSOR_INT32", (4 * n_cell,), weights_scale / 128, 0))
 prev_cell_state = Input("prevCellState", ("TENSOR_QUANT16_SYMM", (n_batch, n_cell), 1 / 2048, 0))
 
-concat_temp = IgnoredOutput("concatTemp", ("TENSOR_QUANT8_ASYMM", (n_batch, n_input + n_output), 1 / 128, 128))
-activation_temp = IgnoredOutput("activationTemp", ("TENSOR_QUANT16_SYMM", (n_batch, 4 * n_cell), 1 / 128, 0))
-output_state_out = Output("outputStateOut", ("TENSOR_QUANT8_ASYMM", (n_batch, n_output), 1 / 128, 128))
 cell_state_out = Output("cellStateOut", ("TENSOR_QUANT16_SYMM", (n_batch, n_cell), 1 / 2048, 0))
 output = Output("output", ("TENSOR_QUANT8_ASYMM", (n_batch, n_output), 1 / 128, 128))
 
@@ -42,7 +39,7 @@
                         weights,
                         bias,
                         prev_cell_state
-).To([concat_temp, activation_temp, output_state_out, cell_state_out, output])
+).To([cell_state_out, output])
 
 input_dict = {
     input_: [166, 179, 50,  150],
@@ -68,12 +65,8 @@
     prev_cell_state: [876, 1034, 955, -909, 761, 1029, 796, -1036]
 }
 
-golden_output = [140, 151, 146, 112, 136, 156, 142, 112]
 output_dict = {
-    concat_temp: [0] * (n_batch * (n_input + n_output)),
-    activation_temp: [0] * (n_batch * 4 * n_cell),
-    output_state_out: golden_output,
     cell_state_out: [1485, 1177, 1373, -1023, 1019, 1355, 1097, -1235],
-    output: golden_output
+    output: [140, 151, 146, 112, 136, 156, 142, 112]
 }
 Example((input_dict, output_dict), model=model)