Compute RGG golden results from FP model.

Before this CL, the golden results in RGG tests are computed from
nnapi-reference with the actual data type. This CL changes the logic to
compute the golden results from an equivalent float32 model to avoid
bias if possible.

Currently only the golden results of single-operation models are
computed from fp32 models. This is because there is no multi-operation
quant model in RGG tests yet.

Fixes: 150805665
Test: NNT_static
Change-Id: I4e268d3aadeebdcdae1d7010a4d0564f45b11b82
Merged-In: I4e268d3aadeebdcdae1d7010a4d0564f45b11b82
(cherry picked from commit 0e75e10d3b63837a44fa16484d318b7174dc18f9)
diff --git a/runtime/test/fuzzing/TestRandomGraph.cpp b/runtime/test/fuzzing/TestRandomGraph.cpp
index 7aece22..99d2aff 100644
--- a/runtime/test/fuzzing/TestRandomGraph.cpp
+++ b/runtime/test/fuzzing/TestRandomGraph.cpp
@@ -223,13 +223,54 @@
         return false;
     }
 
+    // Compute the golden output results of the test model on nnapi-reference. If possible, the
+    // golden results will be computed from an equivalent float32 model to avoid bias avoid bias
+    // from quantized CPU implementation.
+    void computeGoldenResults() {
+        SCOPED_TRACE("computeGoldenResults");
+
+        // Convert the test model to an equivalent float32 model if possible.
+        auto fpModel = convertToFloat32Model(mTestModel);
+        const TestModel& goldenModel = fpModel.has_value() ? fpModel.value() : mTestModel;
+
+        // Create model.
+        generated_tests::GeneratedModel model;
+        generated_tests::createModel(goldenModel, &model);
+        ASSERT_TRUE(model.isValid());
+        ASSERT_EQ(model.finish(), Result::NO_ERROR);
+
+        // Create compilation for nnapi-reference.
+        ASSERT_TRUE(mDevices.find(kRefDeviceName) != mDevices.end());
+        const auto refDevice = mDevices[kRefDeviceName];
+        test_wrapper::Compilation compilation;
+        ASSERT_EQ(compilation.createForDevice(&model, refDevice), Result::NO_ERROR);
+        ASSERT_EQ(compilation.finish(), Result::NO_ERROR);
+
+        // Create request.
+        test_wrapper::Execution execution(&compilation);
+        std::vector<TestBuffer> outputs;
+        generated_tests::createRequest(goldenModel, &execution, &outputs);
+
+        // Compute result.
+        ASSERT_EQ(execution.compute(), Result::NO_ERROR);
+
+        if (fpModel.has_value()) {
+            // Quantize the execution results as golden values.
+            setExpectedOutputsFromFloat32Results(outputs, &mTestModel);
+        } else {
+            for (uint32_t i = 0; i < outputs.size(); i++) {
+                auto outputIndex = mTestModel.main.outputIndexes[i];
+                mTestModel.main.operands[outputIndex].data = outputs[i];
+            }
+        }
+    }
+
     // Compile and execute the generated graph on a device selected by name.
     void computeAndVerifyResultsForDevice(const test_wrapper::Model* model, uint32_t numOps,
                                           const std::string& name) {
         SCOPED_TRACE("Device: " + name);
         ASSERT_TRUE(mDevices.find(name) != mDevices.end());
         const auto device = mDevices[name];
-        bool isRef = name.compare(kRefDeviceName) == 0;
 
         // Check if the device fully supports the graph.
         constexpr int kMaxNumberOperations = 1000;
@@ -239,8 +280,6 @@
                                                                         1, supported),
                   ANEURALNETWORKS_NO_ERROR);
         if (!std::all_of(supported, supported + numOps, [](bool v) { return v; })) {
-            // The reference device should always support all operations.
-            ASSERT_FALSE(isRef);
             std::cout << "[          ]   SKIP: " << name << " does not support the graph.\n";
             return;
         }
@@ -251,7 +290,7 @@
         int64_t featureLevel;
         ASSERT_EQ(ANeuralNetworksDevice_getFeatureLevel(device, &featureLevel),
                   ANEURALNETWORKS_NO_ERROR);
-        if (!isRef && shouldSkipTest(featureLevel)) return;
+        if (shouldSkipTest(featureLevel)) return;
 
         // Create compilation for device.
         test_wrapper::Compilation compilation;
@@ -261,7 +300,6 @@
         // is supported, but model is too big (too many operations and/or too-large constants) for
         // device.
         if (compileReturn == Result::OP_FAILED) {
-            ASSERT_FALSE(isRef);
             std::cout << "[          ]   SKIP: " << name << " failed at compilation step.\n";
             return;
         }
@@ -278,24 +316,16 @@
         // still fail, e.g. there may be operand shapes that are unknown until execution time, and
         // at execution time turn out to be too big.
         if (executeReturn == Result::OP_FAILED) {
-            ASSERT_FALSE(isRef);
             std::cout << "[          ]   SKIP: " << name << " failed at execution step.\n";
             return;
         }
         ASSERT_EQ(executeReturn, Result::NO_ERROR);
 
-        // Record the execution results as golden values.
-        if (isRef) {
-            for (uint32_t i = 0; i < outputs.size(); i++) {
-                auto outputIndex = mTestModel.main.outputIndexes[i];
-                mTestModel.main.operands[outputIndex].data = outputs[i];
-            }
-        }
-
-        if (featureLevel >= __ANDROID_API_Q__ && !isRef) {
+        if (featureLevel >= __ANDROID_API_Q__) {
             checkResults(mTestModel, outputs, mCriteria);
         }
 
+        // Dump test results on failure for debugging.
         if (::testing::Test::HasFailure() || mDumpSpec) {
             dumpTestResults(name, outputs);
         }
@@ -303,10 +333,12 @@
 
     // Compile and execute the generated graph normally (i.e., allow runtime to
     // distribute across devices).
-    void computeAndVerifyResults(const test_wrapper::Model* model, bool shouldCheckResults) {
+    void computeAndVerifyResults(const std::string& name, const test_wrapper::Model* model,
+                                 bool shouldCheckResults) {
         // Because we're not using the introspection/control API, the CpuDevice
         // is available as a fallback, and hence we assume that compilation and
         // execution will succeed.
+        SCOPED_TRACE(name);
 
         // Create compilation.
         test_wrapper::Compilation compilation(model);
@@ -322,6 +354,11 @@
         if (shouldCheckResults) {
             checkResults(mTestModel, outputs, mCriteria);
         }
+
+        // Dump test results on failure for debugging.
+        if (::testing::Test::HasFailure() || mDumpSpec) {
+            dumpTestResults(name, outputs);
+        }
     }
 
     // Main test entrance.
@@ -338,22 +375,19 @@
         ASSERT_TRUE(model.isValid());
         ASSERT_EQ(model.finish(), Result::NO_ERROR);
 
-        // Compute reference result.
-        computeAndVerifyResultsForDevice(&model, numOperations, kRefDeviceName);
+        // Compute reference results.
+        computeGoldenResults();
 
         // Compute on each available device.
         for (auto& pair : mDevices) {
-            // Skip the nnapi reference device.
-            if (pair.first.compare(kRefDeviceName) == 0) continue;
             computeAndVerifyResultsForDevice(&model, numOperations, pair.first);
         }
 
         if (numOperations > 1) {
             if (!shouldSkipTest(mStandardDevicesFeatureLevel)) {
-                // Compute normally (i.e., allow runtime to distribute across
-                // devices).
-                SCOPED_TRACE("Compute normally");
-                computeAndVerifyResults(&model, mStandardDevicesFeatureLevel >= __ANDROID_API_Q__);
+                // Compute normally (i.e., allow runtime to distribute across devices).
+                computeAndVerifyResults("Compute normally", &model,
+                                        mStandardDevicesFeatureLevel >= __ANDROID_API_Q__);
             }
 
 #ifndef NNTEST_CTS
@@ -364,9 +398,8 @@
                 // though some are of feature level < __ANDROID_API_Q__: In this
                 // case, we don't take feature level as an indication of
                 // reliability, as we do with real devices.
-                SCOPED_TRACE("Compute across synthetic devices");
                 DeviceManager::get()->forTest_setDevices(mSyntheticDevices);
-                computeAndVerifyResults(&model, true);
+                computeAndVerifyResults("Compute across synthetic devices", &model, true);
                 DeviceManager::get()->forTest_setDevices(mStandardDevices);
             }
 #endif
@@ -778,13 +811,8 @@
 TEST_RANDOM_GRAPH_WITH_DATA_TYPE_AND_RANK(TENSOR_BOOL8, 2);
 TEST_RANDOM_GRAPH_WITH_DATA_TYPE_AND_RANK(TENSOR_BOOL8, 1);
 
-#ifdef NNTEST_CTS
 INSTANTIATE_TEST_CASE_P(TestRandomGraph, SingleOperationTest, ::testing::Range(0u, 50u));
 INSTANTIATE_TEST_CASE_P(TestRandomGraph, RandomGraphTest, ::testing::Range(0u, 50u));
-#else
-INSTANTIATE_TEST_CASE_P(TestRandomGraph, SingleOperationTest, ::testing::Range(0u, 100u));
-INSTANTIATE_TEST_CASE_P(TestRandomGraph, RandomGraphTest, ::testing::Range(0u, 100u));
-#endif
 
 }  // namespace fuzzing_test
 }  // namespace nn
diff --git a/tools/test_generator/test_harness/TestHarness.cpp b/tools/test_generator/test_harness/TestHarness.cpp
index c535d4b..f373327 100644
--- a/tools/test_generator/test_harness/TestHarness.cpp
+++ b/tools/test_generator/test_harness/TestHarness.cpp
@@ -452,6 +452,27 @@
         "TENSOR_QUANT8_ASYMM_SIGNED",
 };
 
+bool isScalarType(TestOperandType type) {
+    static const std::vector<bool> kIsScalarOperandType = {
+            true,   // TestOperandType::FLOAT32
+            true,   // TestOperandType::INT32
+            true,   // TestOperandType::UINT32
+            false,  // TestOperandType::TENSOR_FLOAT32
+            false,  // TestOperandType::TENSOR_INT32
+            false,  // TestOperandType::TENSOR_QUANT8_ASYMM
+            true,   // TestOperandType::BOOL
+            false,  // TestOperandType::TENSOR_QUANT16_SYMM
+            false,  // TestOperandType::TENSOR_FLOAT16
+            false,  // TestOperandType::TENSOR_BOOL8
+            true,   // TestOperandType::FLOAT16
+            false,  // TestOperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL
+            false,  // TestOperandType::TENSOR_QUANT16_ASYMM
+            false,  // TestOperandType::TENSOR_QUANT8_SYMM
+            false,  // TestOperandType::TENSOR_QUANT8_ASYMM_SIGNED
+    };
+    return kIsScalarOperandType[static_cast<int>(type)];
+}
+
 std::string getOperandClassInSpecFile(TestOperandLifeTime lifetime) {
     switch (lifetime) {
         case TestOperandLifeTime::SUBGRAPH_INPUT:
@@ -613,4 +634,164 @@
     mOs << "}\n";
 }
 
+template <typename T>
+static TestOperand convertOperandToFloat32(const TestOperand& op) {
+    TestOperand converted = op;
+    converted.type =
+            isScalarType(op.type) ? TestOperandType::FLOAT32 : TestOperandType::TENSOR_FLOAT32;
+    converted.scale = 0.0f;
+    converted.zeroPoint = 0;
+
+    const uint32_t numberOfElements = getNumberOfElements(converted);
+    converted.data = TestBuffer(numberOfElements * sizeof(float));
+    const T* data = op.data.get<T>();
+    float* floatData = converted.data.getMutable<float>();
+
+    if (op.scale != 0.0f) {
+        std::transform(data, data + numberOfElements, floatData, [&op](T val) {
+            return (static_cast<float>(val) - op.zeroPoint) * op.scale;
+        });
+    } else {
+        std::transform(data, data + numberOfElements, floatData,
+                       [](T val) { return static_cast<float>(val); });
+    }
+    return converted;
+}
+
+std::optional<TestModel> convertToFloat32Model(const TestModel& testModel) {
+    // Only single-operation graphs are supported.
+    if (testModel.referenced.size() > 0 || testModel.main.operations.size() > 1) {
+        return std::nullopt;
+    }
+
+    // Check for unsupported operations.
+    CHECK(!testModel.main.operations.empty());
+    const auto& operation = testModel.main.operations[0];
+    // Do not convert type-casting operations.
+    if (operation.type == TestOperationType::DEQUANTIZE ||
+        operation.type == TestOperationType::QUANTIZE ||
+        operation.type == TestOperationType::CAST) {
+        return std::nullopt;
+    }
+    // HASHTABLE_LOOKUP has different behavior in float and quant data types: float
+    // HASHTABLE_LOOKUP will output logical zero when there is a key miss, while quant
+    // HASHTABLE_LOOKUP will output byte zero.
+    if (operation.type == TestOperationType::HASHTABLE_LOOKUP) {
+        return std::nullopt;
+    }
+
+    auto convert = [&testModel, &operation](const TestOperand& op, uint32_t index) {
+        switch (op.type) {
+            case TestOperandType::TENSOR_FLOAT32:
+            case TestOperandType::FLOAT32:
+            case TestOperandType::TENSOR_BOOL8:
+            case TestOperandType::BOOL:
+            case TestOperandType::UINT32:
+                return op;
+            case TestOperandType::INT32:
+                // The third input of PAD_V2 uses INT32 to specify the padded value.
+                if (operation.type == TestOperationType::PAD_V2 && index == operation.inputs[2]) {
+                    // The scale and zero point is inherited from the first input.
+                    const uint32_t input0Index = operation.inputs[0];
+                    const auto& input0 = testModel.main.operands[input0Index];
+                    TestOperand scalarWithScaleAndZeroPoint = op;
+                    scalarWithScaleAndZeroPoint.scale = input0.scale;
+                    scalarWithScaleAndZeroPoint.zeroPoint = input0.zeroPoint;
+                    return convertOperandToFloat32<int32_t>(scalarWithScaleAndZeroPoint);
+                }
+                return op;
+            case TestOperandType::TENSOR_INT32:
+                if (op.scale != 0.0f || op.zeroPoint != 0) {
+                    return convertOperandToFloat32<int32_t>(op);
+                }
+                return op;
+            case TestOperandType::TENSOR_FLOAT16:
+            case TestOperandType::FLOAT16:
+                return convertOperandToFloat32<_Float16>(op);
+            case TestOperandType::TENSOR_QUANT8_ASYMM:
+                return convertOperandToFloat32<uint8_t>(op);
+            case TestOperandType::TENSOR_QUANT8_ASYMM_SIGNED:
+                return convertOperandToFloat32<int8_t>(op);
+            case TestOperandType::TENSOR_QUANT16_ASYMM:
+                return convertOperandToFloat32<uint16_t>(op);
+            case TestOperandType::TENSOR_QUANT16_SYMM:
+                return convertOperandToFloat32<int16_t>(op);
+            default:
+                CHECK(false) << "OperandType not supported";
+                return TestOperand{};
+        }
+    };
+
+    TestModel converted = testModel;
+    for (uint32_t i = 0; i < testModel.main.operands.size(); i++) {
+        converted.main.operands[i] = convert(testModel.main.operands[i], i);
+    }
+    return converted;
+}
+
+template <typename T>
+static void setDataFromFloat32Buffer(const TestBuffer& fpBuffer, TestOperand* op) {
+    const uint32_t numberOfElements = getNumberOfElements(*op);
+    const float* floatData = fpBuffer.get<float>();
+    T* data = op->data.getMutable<T>();
+
+    if (op->scale != 0.0f) {
+        std::transform(floatData, floatData + numberOfElements, data, [op](float val) {
+            int32_t unclamped = std::round(val / op->scale) + op->zeroPoint;
+            int32_t clamped = std::clamp<int32_t>(unclamped, std::numeric_limits<T>::min(),
+                                                  std::numeric_limits<T>::max());
+            return static_cast<T>(clamped);
+        });
+    } else {
+        std::transform(floatData, floatData + numberOfElements, data,
+                       [](float val) { return static_cast<T>(val); });
+    }
+}
+
+void setExpectedOutputsFromFloat32Results(const std::vector<TestBuffer>& results,
+                                          TestModel* model) {
+    CHECK_EQ(model->referenced.size(), 0u) << "Subgraphs not supported";
+    CHECK_EQ(model->main.operations.size(), 1u) << "Only single-operation graph is supported";
+
+    for (uint32_t i = 0; i < results.size(); i++) {
+        uint32_t outputIndex = model->main.outputIndexes[i];
+        auto& op = model->main.operands[outputIndex];
+        switch (op.type) {
+            case TestOperandType::TENSOR_FLOAT32:
+            case TestOperandType::FLOAT32:
+            case TestOperandType::TENSOR_BOOL8:
+            case TestOperandType::BOOL:
+            case TestOperandType::INT32:
+            case TestOperandType::UINT32:
+                op.data = results[i];
+                break;
+            case TestOperandType::TENSOR_INT32:
+                if (op.scale != 0.0f) {
+                    setDataFromFloat32Buffer<int32_t>(results[i], &op);
+                } else {
+                    op.data = results[i];
+                }
+                break;
+            case TestOperandType::TENSOR_FLOAT16:
+            case TestOperandType::FLOAT16:
+                setDataFromFloat32Buffer<_Float16>(results[i], &op);
+                break;
+            case TestOperandType::TENSOR_QUANT8_ASYMM:
+                setDataFromFloat32Buffer<uint8_t>(results[i], &op);
+                break;
+            case TestOperandType::TENSOR_QUANT8_ASYMM_SIGNED:
+                setDataFromFloat32Buffer<int8_t>(results[i], &op);
+                break;
+            case TestOperandType::TENSOR_QUANT16_ASYMM:
+                setDataFromFloat32Buffer<uint16_t>(results[i], &op);
+                break;
+            case TestOperandType::TENSOR_QUANT16_SYMM:
+                setDataFromFloat32Buffer<int16_t>(results[i], &op);
+                break;
+            default:
+                CHECK(false) << "OperandType not supported";
+        }
+    }
+}
+
 }  // namespace test_helper
diff --git a/tools/test_generator/test_harness/include/TestHarness.h b/tools/test_generator/test_harness/include/TestHarness.h
index fd5ba8f..c67200c 100644
--- a/tools/test_generator/test_harness/include/TestHarness.h
+++ b/tools/test_generator/test_harness/include/TestHarness.h
@@ -537,6 +537,14 @@
     std::ostream& mOs;
 };
 
+// Convert the test model to an equivalent float32 model. It will return std::nullopt if the
+// conversion is not supported, or if there is no equivalent float32 model.
+std::optional<TestModel> convertToFloat32Model(const TestModel& testModel);
+
+// Used together with convertToFloat32Model. Convert the results computed from the float model to
+// the actual data type in the original model.
+void setExpectedOutputsFromFloat32Results(const std::vector<TestBuffer>& results, TestModel* model);
+
 }  // namespace test_helper
 
 #endif  // ANDROID_FRAMEWORKS_ML_NN_TOOLS_TEST_GENERATOR_TEST_HARNESS_TEST_HARNESS_H