Adjust Hexagon NN library's output quantized range

The Hexagon NN library chooses its own quantized ranges. To restore the
quantized range provided by the NN API, new operations are added to the
end of a hexagon nn graph to dequantize to float and quantize to set the
NN API's range.

Bug: 67478959
Test: mm
Test: runs on cts and vts binaries
Change-Id: I9cabf4c7c92335aaa0f04bd5e8d2133a6a53f001
diff --git a/1.0/HexagonModel.cpp b/1.0/HexagonModel.cpp
index e165057..136818d 100644
--- a/1.0/HexagonModel.cpp
+++ b/1.0/HexagonModel.cpp
@@ -191,7 +191,7 @@
     return hexagon::getPadding(padding);
 }
 
-hexagon_nn_input Model::createQuantizationValue(uint32_t operand, uint32_t quant_value) {
+hexagon_nn_input Model::createQuantizationValue(uint32_t operand, int32_t quant_value) {
     OperandInfo& operandInfo = mOperands[operand];
     float real_value = (quant_value - operandInfo.zeroPoint) * operandInfo.scale;
     return createValues<float>({real_value});
@@ -526,12 +526,30 @@
 
 bool Model::addOutputs() {
     // prepare OP_OUTPUT's inputs
-    std::vector<hexagon_nn_input> ins(mOutputs.size());
-    for (size_t i = 0; i < mOutputs.size(); ++i) {
-        OperandInfo& operand = mOperands[mOutputs[i]];
+    std::vector<hexagon_nn_input> ins;
+    for (size_t out : mOutputs) {
+        OperandInfo& operand = mOperands[out];
         HEXAGON_SOFT_ASSERT_NE(operand.hexagon_input, hexagon_nn_input{},
                                "output operand has not been registered");
-        ins[i] = operand.hexagon_input;
+
+        if (operand.type == OperandType::TENSOR_QUANT8_ASYMM) {
+            // Adjust quantized range of outputs
+            uint32_t dequant = addOperationInternal(
+                OP_Dequantize, NN_PAD_NA,
+                {operand.hexagon_input, operand.hexagon_input_min, operand.hexagon_input_max},
+                {make_hexagon_nn_output(operand.dimensions, sizeof(float))});
+            uint32_t quant =
+                addOperationInternal(OP_Quantize, NN_PAD_NA,
+                                     {{.src_id = dequant, .output_idx = 0},
+                                      createQuantizationValue(out, 0),
+                                      createQuantizationValue(out, 255)},
+                                     {make_hexagon_nn_output(operand.dimensions, sizeof(uint8_t)),
+                                      make_hexagon_nn_output({1, 1, 1, 1}, sizeof(float)),
+                                      make_hexagon_nn_output({1, 1, 1, 1}, sizeof(float))});
+            ins.push_back({.src_id = quant, .output_idx = 0});
+        } else {
+            ins.push_back(operand.hexagon_input);
+        }
     }
 
     // add single output node for entire graph
diff --git a/1.0/HexagonModel.h b/1.0/HexagonModel.h
index 4661ad9..797620b 100644
--- a/1.0/HexagonModel.h
+++ b/1.0/HexagonModel.h
@@ -90,7 +90,7 @@
     const hexagon_nn_input& getTensor(uint32_t operand);
     const hexagon_nn_input& getQuantizationMin(uint32_t operand);
     const hexagon_nn_input& getQuantizationMax(uint32_t operand);
-    hexagon_nn_input createQuantizationValue(uint32_t operand, uint32_t quant_value);
+    hexagon_nn_input createQuantizationValue(uint32_t operand, int32_t quant_value);
     hexagon_nn_input createConvFilterTensor(uint32_t operand);
     hexagon_nn_input createDepthwiseFilterTensor(uint32_t operand, int32_t depth_multiplier);
     hexagon_nn_input createFullyConnectedWeightTensor(uint32_t operand);