Refactor RGG tests to use TestModel utilities.

Before this change, the RGG has its own logic creating a NDK
model/request, and comparing the final results. This CL makes RGG to
adopt the new TestModel utilities.

Fixes: 139442221
Bug: 150805665
Test: NNT_static
Test: NNT_static_fuzzing
Change-Id: I041f45026ed271abad4632abd7ec5360f432efda
Merged-In: I041f45026ed271abad4632abd7ec5360f432efda
(cherry picked from commit 53fb979943af6671ff581dcca5cf53a7818d1e74)
diff --git a/runtime/test/Android.bp b/runtime/test/Android.bp
index 47939da..05cb3da 100644
--- a/runtime/test/Android.bp
+++ b/runtime/test/Android.bp
@@ -156,6 +156,7 @@
     name: "NeuralNetworksTest_static_fuzzing",
     defaults: ["NeuralNetworksTest_default_libs"],
     srcs: [
+        "GeneratedTestUtils.cpp",
         "TestNeuralNetworksWrapper.cpp",
         "fuzzing/OperationManager.cpp",
         "fuzzing/RandomGraphGenerator.cpp",
@@ -169,6 +170,7 @@
         "libgmock",
         "libneuralnetworks_static",
         "libneuralnetworks_common",
+        "libneuralnetworks_generated_test_harness",
     ],
     shared_libs: ["libmemunreachable"],
     header_libs: [
diff --git a/runtime/test/fuzzing/RandomGraphGenerator.cpp b/runtime/test/fuzzing/RandomGraphGenerator.cpp
index 3191838..dd516c8 100644
--- a/runtime/test/fuzzing/RandomGraphGenerator.cpp
+++ b/runtime/test/fuzzing/RandomGraphGenerator.cpp
@@ -24,8 +24,10 @@
 #include <set>
 #include <string>
 #include <unordered_map>
+#include <utility>
 #include <vector>
 
+#include "TestHarness.h"
 #include "TestNeuralNetworksWrapper.h"
 #include "fuzzing/OperationManager.h"
 #include "fuzzing/RandomGraphGeneratorUtils.h"
@@ -37,6 +39,7 @@
 
 using test_wrapper::Result;
 using test_wrapper::Type;
+using namespace test_helper;
 
 // Construct a RandomOperand from OperandSignature.
 RandomOperand::RandomOperand(const OperandSignature& operand, Type dataType, uint32_t rank)
@@ -205,191 +208,69 @@
     return true;
 }
 
-void RandomGraph::createModel(test_wrapper::Model* model) {
-    NN_FUZZER_LOG << "Create Model";
+static TestOperandLifeTime convertToTestOperandLifeTime(RandomOperandType type) {
+    switch (type) {
+        case RandomOperandType::INPUT:
+            return TestOperandLifeTime::SUBGRAPH_INPUT;
+        case RandomOperandType::OUTPUT:
+            return TestOperandLifeTime::SUBGRAPH_OUTPUT;
+        case RandomOperandType::INTERNAL:
+            return TestOperandLifeTime::TEMPORARY_VARIABLE;
+        case RandomOperandType::CONST:
+            return TestOperandLifeTime::CONSTANT_COPY;
+    }
+}
+
+TestModel RandomGraph::createTestModel() {
+    NN_FUZZER_LOG << "Create Test Model";
+    TestModel testModel;
 
     // Set model operands.
-    std::vector<uint32_t> modelInputs;
-    std::vector<uint32_t> modelOutputs;
     for (auto& operand : mOperands) {
-        // TODO: Model operands are always fully-specified at model construction time.
-        test_wrapper::OperandType type(operand->dataType, operand->getDimensions(), operand->scale,
-                                       operand->zeroPoint);
-        operand->opIndex = model->addOperand(&type);
+        operand->opIndex = testModel.main.operands.size();
+        TestOperand testOperand = {
+                .type = static_cast<TestOperandType>(operand->dataType),
+                .dimensions = operand->getDimensions(),
+                // It is safe to always set numberOfConsumers to 0 here because
+                // this field is not used in NDK.
+                .numberOfConsumers = 0,
+                .scale = operand->scale,
+                .zeroPoint = operand->zeroPoint,
+                .lifetime = convertToTestOperandLifeTime(operand->type),
+                .isIgnored = operand->doNotCheckAccuracy,
+        };
 
-        // For INPUT/OUTPUT, prepare vectors for identifyInputsAndOutputs(...).
-        // For CONST, set operand buffer.
-        if (operand->type == RandomOperandType::INPUT) {
-            operand->ioIndex = modelInputs.size();
-            modelInputs.push_back(operand->opIndex);
-        } else if (operand->type == RandomOperandType::OUTPUT) {
-            operand->ioIndex = modelOutputs.size();
-            modelOutputs.push_back(operand->opIndex);
-        } else if (operand->type == RandomOperandType::CONST) {
-            model->setOperandValue(operand->opIndex, operand->buffer.data(),
-                                   operand->getBufferSize());
+        // Test buffers.
+        if (testOperand.lifetime == TestOperandLifeTime::SUBGRAPH_OUTPUT) {
+            testOperand.data = TestBuffer(operand->getBufferSize());
+        } else if (testOperand.lifetime != TestOperandLifeTime::TEMPORARY_VARIABLE) {
+            testOperand.data = TestBuffer(operand->getBufferSize(), operand->buffer.data());
         }
+
+        // Input/Output indexes.
+        if (testOperand.lifetime == TestOperandLifeTime::SUBGRAPH_INPUT) {
+            testModel.main.inputIndexes.push_back(operand->opIndex);
+        } else if (testOperand.lifetime == TestOperandLifeTime::SUBGRAPH_OUTPUT) {
+            testModel.main.outputIndexes.push_back(operand->opIndex);
+        }
+        testModel.main.operands.push_back(std::move(testOperand));
     }
 
     // Set model operations.
     for (auto& operation : mOperations) {
         NN_FUZZER_LOG << "Operation: " << kOperationNames[static_cast<int32_t>(operation.opType)];
-        std::vector<uint32_t> inputIndices, outputIndices;
+        TestOperation testOperation = {.type = static_cast<TestOperationType>(operation.opType)};
         for (auto& op : operation.inputs) {
             NN_FUZZER_LOG << toString(*op);
-            inputIndices.push_back(op->opIndex);
+            testOperation.inputs.push_back(op->opIndex);
         }
         for (auto& op : operation.outputs) {
             NN_FUZZER_LOG << toString(*op);
-            outputIndices.push_back(op->opIndex);
+            testOperation.outputs.push_back(op->opIndex);
         }
-        model->addOperation(operation.opType, inputIndices, outputIndices);
+        testModel.main.operations.push_back(std::move(testOperation));
     }
-
-    // Set model inputs and outputs.
-    model->identifyInputsAndOutputs(modelInputs, modelOutputs);
-}
-
-void RandomGraph::createRequest(test_wrapper::Execution* execution,
-                                std::vector<OperandBuffer>* buffers) {
-    NN_FUZZER_LOG << "Create Request";
-    if (buffers != nullptr) buffers->clear();
-    for (const auto& operand : mOperands) {
-        if (operand->type == RandomOperandType::INPUT) {
-            EXPECT_EQ(execution->setInput(operand->ioIndex, operand->buffer.data(),
-                                          operand->getBufferSize(), nullptr),
-                      Result::NO_ERROR);
-        } else if (operand->type == RandomOperandType::OUTPUT) {
-            if (buffers == nullptr) {
-                EXPECT_EQ(execution->setOutput(operand->ioIndex, operand->buffer.data(),
-                                               operand->getBufferSize(), nullptr),
-                          Result::NO_ERROR);
-            } else {
-                // The order of the output buffers corresponds to the order in mOperands.
-                buffers->emplace_back(operand->buffer.size());
-                EXPECT_EQ(execution->setOutput(operand->ioIndex, buffers->back().data(),
-                                               operand->getBufferSize(), nullptr),
-                          Result::NO_ERROR);
-            }
-        }
-    }
-}
-
-// Check if the actual results meet the accuracy criterion.
-constexpr uint32_t kMaxNumberOfPrintedErrors = 5;
-template <typename T>
-void expectNear(const RandomOperand& op, const OperandBuffer& test,
-                const AccuracyCriterion& criterion) {
-    constexpr uint32_t kMinNumberOfElementsToTestBiasMSE = 10;
-    const T* actualBuffer = reinterpret_cast<const T*>(test.data());
-    const T* expectedBuffer = reinterpret_cast<const T*>(op.buffer.data());
-    uint32_t len = op.getNumberOfElements();
-    uint32_t numSkip = 0, numErrors = 0;
-    double bias = 0.0f, mse = 0.0f;
-    for (uint32_t i = 0; i < len; i++) {
-        SCOPED_TRACE(testing::Message() << "When comparing element " << i);
-
-        // Compare all data types in double for precision and signed arithmetic.
-        double actual = static_cast<double>(actualBuffer[i]);
-        double expected = static_cast<double>(expectedBuffer[i]);
-        double tolerableRange = criterion.atol + criterion.rtol * std::fabs(expected);
-
-        // Skip invalid floating point values.
-        if (std::isnan(expected) || std::isinf(expected) || std::isnan(actual) ||
-            std::isinf(actual) || std::fabs(expected) > 1e3) {
-            numSkip++;
-            continue;
-        }
-
-        // Accumulate bias and MSE. Use relative bias and MSE for floating point values.
-        double diff = actual - expected;
-        if constexpr (nnIsFloat<T>) {
-            diff /= std::max(1.0, std::abs(expected));
-        }
-        bias += diff;
-        mse += diff * diff;
-
-        // Print at most kMaxNumberOfPrintedErrors errors by EXPECT_NEAR.
-        if (numErrors < kMaxNumberOfPrintedErrors) EXPECT_NEAR(expected, actual, tolerableRange);
-        if (!(std::fabs(diff) <= tolerableRange)) numErrors++;
-    }
-    EXPECT_EQ(numErrors, 0u);
-
-    // Test bias and MSE.
-    if (len < numSkip + kMinNumberOfElementsToTestBiasMSE) return;
-    bias /= static_cast<double>(len - numSkip);
-    mse /= static_cast<double>(len - numSkip);
-    EXPECT_LE(std::fabs(bias), criterion.bias);
-    EXPECT_LE(mse, criterion.mse);
-}
-
-// For boolean values, we expect the number of mismatches does not exceed a certain ratio.
-void expectBooleanNearlyEqual(const RandomOperand& op, const OperandBuffer& test,
-                              float allowedErrorRatio) {
-    const bool8* actual = reinterpret_cast<const bool8*>(test.data());
-    const bool8* expected = reinterpret_cast<const bool8*>(op.buffer.data());
-    uint32_t len = op.getNumberOfElements();
-    uint32_t numErrors = 0;
-    std::stringstream errorMsg;
-    for (uint32_t i = 0; i < len; i++) {
-        if (expected[i] != actual[i]) {
-            if (numErrors < kMaxNumberOfPrintedErrors)
-                errorMsg << "    Expected: " << expected[i] << ", actual: " << actual[i]
-                         << ", when comparing element " << i << "\n";
-            numErrors++;
-        }
-    }
-    // When |len| is small, the allowedErrorCount will intentionally ceil at 1, which allows for
-    // greater tolerance.
-    uint32_t allowedErrorCount = static_cast<uint32_t>(std::ceil(allowedErrorRatio * len));
-    EXPECT_LE(numErrors, allowedErrorCount) << errorMsg.str();
-}
-
-// TODO(b/139442221): Reduce code duplication with
-//                    nn/tools/test_generator/test_harness/TestHarness.cpp.
-void RandomGraph::checkResults(const std::vector<OperandBuffer>& buffers,
-                               const AccuracyCriteria& criteria) const {
-    NN_FUZZER_LOG << "Check Results";
-    // Make sure to keep the same order as the buffers are created.
-    int i = 0;
-    for (const auto& op : mOperands) {
-        if (op->type == RandomOperandType::OUTPUT) {
-            SCOPED_TRACE(testing::Message()
-                         << "When comparing output " << op->ioIndex << " (op" << op->opIndex << ")"
-                         << " of type " << toString(op->dataType));
-            if (!op->doNotCheckAccuracy) {
-                switch (op->dataType) {
-                    case Type::TENSOR_FLOAT32:
-                        expectNear<float>(*op, buffers[i], criteria.float32);
-                        break;
-                    case Type::TENSOR_FLOAT16:
-                        expectNear<_Float16>(*op, buffers[i], criteria.float16);
-                        break;
-                    case Type::TENSOR_INT32:
-                        expectNear<int32_t>(*op, buffers[i], criteria.int32);
-                        break;
-                    case Type::TENSOR_QUANT8_ASYMM:
-                        expectNear<uint8_t>(*op, buffers[i], criteria.quant8Asymm);
-                        break;
-                    case Type::TENSOR_QUANT8_SYMM:
-                        expectNear<int8_t>(*op, buffers[i], criteria.quant8Symm);
-                        break;
-                    case Type::TENSOR_QUANT16_ASYMM:
-                        expectNear<uint16_t>(*op, buffers[i], criteria.quant16Asymm);
-                        break;
-                    case Type::TENSOR_QUANT16_SYMM:
-                        expectNear<int16_t>(*op, buffers[i], criteria.quant16Symm);
-                        break;
-                    case Type::TENSOR_BOOL8:
-                        expectBooleanNearlyEqual(*op, buffers[i], /*allowedErrorRatio=*/0.01);
-                        break;
-                    default:
-                        NN_FUZZER_CHECK(false) << "Data type not supported.";
-                }
-            }
-            i++;
-        }
-    }
+    return testModel;
 }
 
 void RandomGraph::dumpSpecFile(std::string filename, std::string testname = "") {
diff --git a/runtime/test/fuzzing/RandomGraphGenerator.h b/runtime/test/fuzzing/RandomGraphGenerator.h
index 47c6d3e..ffe177f 100644
--- a/runtime/test/fuzzing/RandomGraphGenerator.h
+++ b/runtime/test/fuzzing/RandomGraphGenerator.h
@@ -17,9 +17,11 @@
 #ifndef ANDROID_FRAMEWORKS_ML_NN_RUNTIME_TEST_FUZZING_RANDOM_GRAPH_GENERATOR_H
 #define ANDROID_FRAMEWORKS_ML_NN_RUNTIME_TEST_FUZZING_RANDOM_GRAPH_GENERATOR_H
 
+#include <memory>
 #include <string>
 #include <vector>
 
+#include "TestHarness.h"
 #include "TestNeuralNetworksWrapper.h"
 #include "fuzzing/RandomVariable.h"
 
@@ -108,36 +110,6 @@
     RandomOperation(const OperationSignature& operation);
 };
 
-struct AccuracyCriterion {
-    // We expect the driver results to be unbiased.
-    // Formula: abs(sum_{i}(diff) / sum(1)) <= bias, where
-    // * fixed point: diff = actual - expected
-    // * floating point: diff = (actual - expected) / max(1, abs(expected))
-    float bias = std::numeric_limits<float>::max();
-
-    // Set the threshold on Mean Square Error (MSE).
-    // Formula: sum_{i}(diff ^ 2) / sum(1) <= mse
-    float mse = std::numeric_limits<float>::max();
-
-    // We also set accuracy thresholds on each element to detect any particular edge cases that may
-    // be shadowed in bias or MSE. We use the similar approach as our CTS unit tests, but with much
-    // relaxed criterion.
-    // Formula: abs(actual - expected) <= atol + rtol * abs(expected)
-    //   where atol stands for Absolute TOLerance and rtol for Relative TOLerance.
-    float atol = 0.0f;
-    float rtol = 0.0f;
-};
-
-struct AccuracyCriteria {
-    AccuracyCriterion float32;
-    AccuracyCriterion float16;
-    AccuracyCriterion int32;
-    AccuracyCriterion quant8Asymm;
-    AccuracyCriterion quant8Symm;
-    AccuracyCriterion quant16Asymm;
-    AccuracyCriterion quant16Symm;
-};
-
 // The main interface of the random graph generator.
 class RandomGraph {
    public:
@@ -146,18 +118,9 @@
     // Generate a random graph with numOperations and dimensionRange from a seed.
     bool generate(uint32_t seed, uint32_t numOperations, uint32_t dimensionRange);
 
-    // Create a NDK model from the random graph.
-    void createModel(test_wrapper::Model* model);
-
-    // Set the input/output buffers to an NDK execution object. The input buffer resides in
-    // RandomOperand.buffer, the output buffer is either provided by "buffers" argument, or set
-    // buffers to nullptr to use RandomOperand.buffer to record reference result.
-    void createRequest(test_wrapper::Execution* execution,
-                       std::vector<OperandBuffer>* buffers = nullptr);
-
-    // Check if the results in buffers meet the given accuracy criteria.
-    void checkResults(const std::vector<OperandBuffer>& buffers,
-                      const AccuracyCriteria& criteria) const;
+    // Create a test model of the generated graph. The operands will always have fully-specified
+    // dimensions. The output buffers are only allocated but not initialized.
+    test_helper::TestModel createTestModel();
 
     // Dump the generated random graph to a spec file for debugging and visualization purpose.
     void dumpSpecFile(std::string filename, std::string testname);
diff --git a/runtime/test/fuzzing/TestRandomGraph.cpp b/runtime/test/fuzzing/TestRandomGraph.cpp
index 98fd3a5..32ebd75 100644
--- a/runtime/test/fuzzing/TestRandomGraph.cpp
+++ b/runtime/test/fuzzing/TestRandomGraph.cpp
@@ -22,6 +22,8 @@
 #include <set>
 #include <string>
 
+#include "GeneratedTestUtils.h"
+#include "TestHarness.h"
 #include "TestNeuralNetworksWrapper.h"
 #include "fuzzing/OperationManager.h"
 #include "fuzzing/RandomGraphGenerator.h"
@@ -46,6 +48,7 @@
 namespace nn {
 namespace fuzzing_test {
 
+using namespace test_helper;
 using test_wrapper::Result;
 constexpr char kRefDeviceName[] = "nnapi-reference";
 
@@ -199,11 +202,10 @@
         };
         if (kDisabledTests.find(mTestName) != kDisabledTests.end()) return true;
         if (featureLevel >= __ANDROID_API_Q__) return false;
-        const auto& operations = mGraph.getOperations();
-        for (const auto& op : operations) {
+        for (const auto& op : mTestModel.main.operations) {
             // Skip if testing BATCH_TO_SPACE_ND with batch dimension == 1.
-            if (op.opType == ANEURALNETWORKS_BATCH_TO_SPACE_ND &&
-                op.inputs[0]->dimensions[0].getValue() == 1)
+            if (op.type == TestOperationType::BATCH_TO_SPACE_ND &&
+                mTestModel.main.operands[op.inputs[0]].dimensions[0] == 1)
                 return true;
         }
         return false;
@@ -255,12 +257,8 @@
 
         // Create request.
         test_wrapper::Execution execution(&compilation);
-        std::vector<OperandBuffer> outputs;
-        if (isRef) {
-            mGraph.createRequest(&execution);
-        } else {
-            mGraph.createRequest(&execution, &outputs);
-        }
+        std::vector<TestBuffer> outputs;
+        generated_tests::createRequest(mTestModel, &execution, &outputs);
 
         // Compute result.
         Result executeReturn = execution.compute();
@@ -273,14 +271,23 @@
             return;
         }
         ASSERT_EQ(executeReturn, Result::NO_ERROR);
+
+        // Record the execution results as golden values.
+        if (isRef) {
+            for (uint32_t i = 0; i < outputs.size(); i++) {
+                auto outputIndex = mTestModel.main.outputIndexes[i];
+                mTestModel.main.operands[outputIndex].data = outputs[i];
+            }
+        }
+
         if (featureLevel >= __ANDROID_API_Q__ && !isRef) {
-            mGraph.checkResults(outputs, mCriteria);
+            checkResults(mTestModel, outputs, mCriteria);
         }
     }
 
     // Compile and execute the generated graph normally (i.e., allow runtime to
     // distribute across devices).
-    void computeAndVerifyResults(const test_wrapper::Model* model, bool checkResults) {
+    void computeAndVerifyResults(const test_wrapper::Model* model, bool shouldCheckResults) {
         // Because we're not using the introspection/control API, the CpuDevice
         // is available as a fallback, and hence we assume that compilation and
         // execution will succeed.
@@ -291,13 +298,13 @@
 
         // Create request.
         test_wrapper::Execution execution(&compilation);
-        std::vector<OperandBuffer> outputs;
-        mGraph.createRequest(&execution, &outputs);
+        std::vector<TestBuffer> outputs;
+        generated_tests::createRequest(mTestModel, &execution, &outputs);
 
         // Compute and verify result.
         ASSERT_EQ(execution.compute(), Result::NO_ERROR);
-        if (checkResults) {
-            mGraph.checkResults(outputs, mCriteria);
+        if (shouldCheckResults) {
+            checkResults(mTestModel, outputs, mCriteria);
         }
     }
 
@@ -307,8 +314,10 @@
         ASSERT_TRUE(mGraph.generate(kSeed, numOperations, dimensionRange));
 
         // Create a model from the random graph.
-        test_wrapper::Model model;
-        mGraph.createModel(&model);
+        mTestModel = mGraph.createTestModel();
+
+        generated_tests::GeneratedModel model;
+        generated_tests::createModel(mTestModel, &model);
         ASSERT_TRUE(model.isValid());
         ASSERT_EQ(model.finish(), Result::NO_ERROR);
 
@@ -358,6 +367,7 @@
     const uint32_t kSeed = GetParam();
     std::string mTestName;
     RandomGraph mGraph;
+    TestModel mTestModel;
     AccuracyCriteria mCriteria;
 
     static int64_t mStandardDevicesFeatureLevel;  // minimum across all devices
diff --git a/tools/test_generator/test_harness/TestHarness.cpp b/tools/test_generator/test_harness/TestHarness.cpp
index 98ab524..279973f 100644
--- a/tools/test_generator/test_harness/TestHarness.cpp
+++ b/tools/test_generator/test_harness/TestHarness.cpp
@@ -23,6 +23,8 @@
 #include <algorithm>
 #include <cmath>
 #include <functional>
+#include <limits>
+#include <map>
 #include <numeric>
 #include <string>
 #include <vector>
@@ -31,6 +33,9 @@
 
 namespace {
 
+template <typename T>
+constexpr bool nnIsFloat = std::is_floating_point_v<T> || std::is_same_v<T, _Float16>;
+
 constexpr uint32_t kMaxNumberOfPrintedErrors = 10;
 
 // TODO(b/139442217): Allow passing accuracy criteria from spec.
@@ -59,15 +64,32 @@
 
 // Check if the actual results meet the accuracy criterion.
 template <typename T>
-void expectNear(const TestOperand& op, const TestBuffer& result, double atol, double rtol) {
+void expectNear(const TestOperand& op, const TestBuffer& result,
+                const AccuracyCriterion& criterion) {
+    constexpr uint32_t kMinNumberOfElementsToTestBiasMSE = 10;
     const T* actualBuffer = result.get<T>();
     const T* expectedBuffer = op.data.get<T>();
-    uint32_t len = getNumberOfElements(op), numErrors = 0;
+    uint32_t len = getNumberOfElements(op), numErrors = 0, numSkip = 0;
+    double bias = 0.0f, mse = 0.0f;
     for (uint32_t i = 0; i < len; i++) {
         // Compare all data types in double for precision and signed arithmetic.
         double actual = static_cast<double>(actualBuffer[i]);
         double expected = static_cast<double>(expectedBuffer[i]);
-        double tolerableRange = atol + rtol * std::fabs(expected);
+        double tolerableRange = criterion.atol + criterion.rtol * std::fabs(expected);
+
+        // Skip invalid floating point values.
+        if (std::isnan(expected) || std::isinf(expected) || std::fabs(expected) > 1e3) {
+            numSkip++;
+            continue;
+        }
+
+        // Accumulate bias and MSE. Use relative bias and MSE for floating point values.
+        double diff = actual - expected;
+        if constexpr (nnIsFloat<T>) {
+            diff /= std::max(1.0, std::abs(expected));
+        }
+        bias += diff;
+        mse += diff * diff;
 
         // Print at most kMaxNumberOfPrintedErrors errors by EXPECT_NEAR.
         if (numErrors < kMaxNumberOfPrintedErrors) {
@@ -76,24 +98,34 @@
         if (std::fabs(actual - expected) > tolerableRange) numErrors++;
     }
     EXPECT_EQ(numErrors, 0u);
+
+    // Test bias and MSE.
+    if (len < numSkip + kMinNumberOfElementsToTestBiasMSE) return;
+    bias /= static_cast<double>(len - numSkip);
+    mse /= static_cast<double>(len - numSkip);
+    EXPECT_LE(std::fabs(bias), criterion.bias);
+    EXPECT_LE(mse, criterion.mse);
 }
 
-// For boolean values, we expect exact match.
-void expectBooleanEqual(const TestOperand& op, const TestBuffer& result) {
+// For boolean values, we expect the number of mismatches does not exceed a certain ratio.
+void expectBooleanNearlyEqual(const TestOperand& op, const TestBuffer& result,
+                              float allowedErrorRatio) {
     const bool8* actualBuffer = result.get<bool8>();
     const bool8* expectedBuffer = op.data.get<bool8>();
     uint32_t len = getNumberOfElements(op), numErrors = 0;
+    std::stringstream errorMsg;
     for (uint32_t i = 0; i < len; i++) {
-        bool actual = static_cast<bool>(actualBuffer[i]);
-        bool expected = static_cast<bool>(expectedBuffer[i]);
-
-        // Print at most kMaxNumberOfPrintedErrors errors by EXPECT_NEAR.
-        if (numErrors < kMaxNumberOfPrintedErrors) {
-            EXPECT_EQ(expected, actual) << "When comparing element " << i;
+        if (expectedBuffer[i] != actualBuffer[i]) {
+            if (numErrors < kMaxNumberOfPrintedErrors)
+                errorMsg << "    Expected: " << expectedBuffer[i] << ", actual: " << actualBuffer[i]
+                         << ", when comparing element " << i << "\n";
+            numErrors++;
         }
-        if (expected != actual) numErrors++;
     }
-    EXPECT_EQ(numErrors, 0u);
+    // When |len| is small, the allowedErrorCount will intentionally ceil at 1, which allows for
+    // greater tolerance.
+    uint32_t allowedErrorCount = static_cast<uint32_t>(std::ceil(allowedErrorRatio * len));
+    EXPECT_LE(numErrors, allowedErrorCount) << errorMsg.str();
 }
 
 // Calculates the expected probability from the unnormalized log-probability of
@@ -157,6 +189,50 @@
 
 }  // namespace
 
+void checkResults(const TestModel& model, const std::vector<TestBuffer>& buffers,
+                  const AccuracyCriteria& criteria) {
+    ASSERT_EQ(model.main.outputIndexes.size(), buffers.size());
+    for (uint32_t i = 0; i < model.main.outputIndexes.size(); i++) {
+        SCOPED_TRACE(testing::Message() << "When comparing output " << i);
+        const auto& operand = model.main.operands[model.main.outputIndexes[i]];
+        const auto& result = buffers[i];
+        if (operand.isIgnored) continue;
+
+        switch (operand.type) {
+            case TestOperandType::TENSOR_FLOAT32:
+                expectNear<float>(operand, result, criteria.float32);
+                break;
+            case TestOperandType::TENSOR_FLOAT16:
+                expectNear<_Float16>(operand, result, criteria.float16);
+                break;
+            case TestOperandType::TENSOR_INT32:
+            case TestOperandType::INT32:
+                expectNear<int32_t>(operand, result, criteria.int32);
+                break;
+            case TestOperandType::TENSOR_QUANT8_ASYMM:
+                expectNear<uint8_t>(operand, result, criteria.quant8Asymm);
+                break;
+            case TestOperandType::TENSOR_QUANT8_SYMM:
+                expectNear<int8_t>(operand, result, criteria.quant8Symm);
+                break;
+            case TestOperandType::TENSOR_QUANT16_ASYMM:
+                expectNear<uint16_t>(operand, result, criteria.quant16Asymm);
+                break;
+            case TestOperandType::TENSOR_QUANT16_SYMM:
+                expectNear<int16_t>(operand, result, criteria.quant16Symm);
+                break;
+            case TestOperandType::TENSOR_BOOL8:
+                expectBooleanNearlyEqual(operand, result, criteria.bool8AllowedErrorRatio);
+                break;
+            case TestOperandType::TENSOR_QUANT8_ASYMM_SIGNED:
+                expectNear<int8_t>(operand, result, criteria.quant8AsymmSigned);
+                break;
+            default:
+                FAIL() << "Data type not supported.";
+        }
+    }
+}
+
 void checkResults(const TestModel& model, const std::vector<TestBuffer>& buffers) {
     // For RANDOM_MULTINOMIAL test only.
     if (model.expectedMultinomialDistributionTolerance > 0.0f) {
@@ -164,13 +240,29 @@
         return;
     }
 
-    // Decide the tolerable range.
+    // Decide the default tolerable range.
     //
     // For floating-point models, we use the relaxed precision if either
     // - relaxed computation flag is set
     // - the model has at least one TENSOR_FLOAT16 operand
-    double fpAtol = 1e-5;
-    double fpRtol = 5.0f * 1.1920928955078125e-7;
+    //
+    // The bias and MSE criteria are implicitly set to the maximum -- we do not enforce these
+    // criteria in normal generated tests.
+    //
+    // TODO: Adjust the error limit based on testing.
+    //
+    AccuracyCriteria criteria = {
+            // The relative tolerance is 5ULP of FP32.
+            .float32 = {.atol = 1e-5, .rtol = 5.0f * 1.1920928955078125e-7},
+            // Both the absolute and relative tolerance are 5ULP of FP16.
+            .float16 = {.atol = 5.0f * 0.0009765625, .rtol = 5.0f * 0.0009765625},
+            .int32 = {.atol = 1},
+            .quant8Asymm = {.atol = 1},
+            .quant8Symm = {.atol = 1},
+            .quant16Asymm = {.atol = 1},
+            .quant16Symm = {.atol = 1},
+            .bool8AllowedErrorRatio = 0.0f,
+    };
     bool hasFloat16Inputs = false;
     model.forEachSubgraph([&hasFloat16Inputs](const TestSubgraph& subgraph) {
         if (!hasFloat16Inputs) {
@@ -181,56 +273,14 @@
         }
     });
     if (model.isRelaxed || hasFloat16Inputs) {
-        // TODO: Adjust the error limit based on testing.
-        // If in relaxed mode, set the absolute tolerance to be 5ULP of FP16.
-        fpAtol = 5.0f * 0.0009765625;
-        // Set the relative tolerance to be 5ULP of the corresponding FP precision.
-        fpRtol = 5.0f * 0.0009765625;
+        criteria.float32 = criteria.float16;
     }
     const double quant8AllowedError = getQuant8AllowedError();
+    criteria.quant8Asymm.atol = quant8AllowedError;
+    criteria.quant8AsymmSigned.atol = quant8AllowedError;
+    criteria.quant8Symm.atol = quant8AllowedError;
 
-    ASSERT_EQ(model.main.outputIndexes.size(), buffers.size());
-    for (uint32_t i = 0; i < model.main.outputIndexes.size(); i++) {
-        SCOPED_TRACE(testing::Message() << "When comparing output " << i);
-        const auto& operand = model.main.operands[model.main.outputIndexes[i]];
-        const auto& result = buffers[i];
-        if (operand.isIgnored) continue;
-
-        switch (operand.type) {
-            case TestOperandType::TENSOR_FLOAT32:
-                expectNear<float>(operand, result, fpAtol, fpRtol);
-                break;
-            case TestOperandType::TENSOR_FLOAT16:
-                expectNear<_Float16>(operand, result, fpAtol, fpRtol);
-                break;
-            case TestOperandType::TENSOR_INT32:
-                expectNear<int32_t>(operand, result, 0, 0);
-                break;
-            case TestOperandType::TENSOR_QUANT8_ASYMM:
-                expectNear<uint8_t>(operand, result, quant8AllowedError, 0);
-                break;
-            case TestOperandType::TENSOR_QUANT8_SYMM:
-                expectNear<int8_t>(operand, result, quant8AllowedError, 0);
-                break;
-            case TestOperandType::TENSOR_QUANT16_ASYMM:
-                expectNear<uint16_t>(operand, result, 1, 0);
-                break;
-            case TestOperandType::TENSOR_QUANT16_SYMM:
-                expectNear<int16_t>(operand, result, 1, 0);
-                break;
-            case TestOperandType::TENSOR_BOOL8:
-                expectBooleanEqual(operand, result);
-                break;
-            case TestOperandType::TENSOR_QUANT8_ASYMM_SIGNED:
-                expectNear<int8_t>(operand, result, quant8AllowedError, 0);
-                break;
-            case TestOperandType::INT32:
-                expectNear<int32_t>(operand, result, 0, 0);
-                break;
-            default:
-                FAIL() << "Data type not supported.";
-        }
-    }
+    checkResults(model, buffers, criteria);
 }
 
 TestModel convertQuant8AsymmOperandsToSigned(const TestModel& testModel) {
diff --git a/tools/test_generator/test_harness/include/TestHarness.h b/tools/test_generator/test_harness/include/TestHarness.h
index bd6f1d2..cc8c5b0 100644
--- a/tools/test_generator/test_harness/include/TestHarness.h
+++ b/tools/test_generator/test_harness/include/TestHarness.h
@@ -27,6 +27,7 @@
 #include <cstdlib>
 #include <cstring>
 #include <functional>
+#include <limits>
 #include <map>
 #include <memory>
 #include <random>
@@ -454,10 +455,44 @@
     std::map<std::string, const TestModel*> mTestModels;
 };
 
+struct AccuracyCriterion {
+    // We expect the driver results to be unbiased.
+    // Formula: abs(sum_{i}(diff) / sum(1)) <= bias, where
+    // * fixed point: diff = actual - expected
+    // * floating point: diff = (actual - expected) / max(1, abs(expected))
+    float bias = std::numeric_limits<float>::max();
+
+    // Set the threshold on Mean Square Error (MSE).
+    // Formula: sum_{i}(diff ^ 2) / sum(1) <= mse
+    float mse = std::numeric_limits<float>::max();
+
+    // We also set accuracy thresholds on each element to detect any particular edge cases that may
+    // be shadowed in bias or MSE. We use the similar approach as our CTS unit tests, but with much
+    // relaxed criterion.
+    // Formula: abs(actual - expected) <= atol + rtol * abs(expected)
+    //   where atol stands for Absolute TOLerance and rtol for Relative TOLerance.
+    float atol = 0.0f;
+    float rtol = 0.0f;
+};
+
+struct AccuracyCriteria {
+    AccuracyCriterion float32;
+    AccuracyCriterion float16;
+    AccuracyCriterion int32;
+    AccuracyCriterion quant8Asymm;
+    AccuracyCriterion quant8AsymmSigned;
+    AccuracyCriterion quant8Symm;
+    AccuracyCriterion quant16Asymm;
+    AccuracyCriterion quant16Symm;
+    float bool8AllowedErrorRatio = 0.1f;
+};
+
 // Check the output results against the expected values in test model by calling
 // GTEST_ASSERT/EXPECT. The index of the results corresponds to the index in
 // model.main.outputIndexes. E.g., results[i] corresponds to model.main.outputIndexes[i].
 void checkResults(const TestModel& model, const std::vector<TestBuffer>& results);
+void checkResults(const TestModel& model, const std::vector<TestBuffer>& results,
+                  const AccuracyCriteria& criteria);
 
 TestModel convertQuant8AsymmOperandsToSigned(const TestModel& testModel);