Refactor RGG tests to use TestModel utilities.
Before this change, the RGG has its own logic creating a NDK
model/request, and comparing the final results. This CL makes RGG to
adopt the new TestModel utilities.
Fixes: 139442221
Bug: 150805665
Test: NNT_static
Test: NNT_static_fuzzing
Change-Id: I041f45026ed271abad4632abd7ec5360f432efda
Merged-In: I041f45026ed271abad4632abd7ec5360f432efda
(cherry picked from commit 53fb979943af6671ff581dcca5cf53a7818d1e74)
diff --git a/runtime/test/Android.bp b/runtime/test/Android.bp
index 47939da..05cb3da 100644
--- a/runtime/test/Android.bp
+++ b/runtime/test/Android.bp
@@ -156,6 +156,7 @@
name: "NeuralNetworksTest_static_fuzzing",
defaults: ["NeuralNetworksTest_default_libs"],
srcs: [
+ "GeneratedTestUtils.cpp",
"TestNeuralNetworksWrapper.cpp",
"fuzzing/OperationManager.cpp",
"fuzzing/RandomGraphGenerator.cpp",
@@ -169,6 +170,7 @@
"libgmock",
"libneuralnetworks_static",
"libneuralnetworks_common",
+ "libneuralnetworks_generated_test_harness",
],
shared_libs: ["libmemunreachable"],
header_libs: [
diff --git a/runtime/test/fuzzing/RandomGraphGenerator.cpp b/runtime/test/fuzzing/RandomGraphGenerator.cpp
index 3191838..dd516c8 100644
--- a/runtime/test/fuzzing/RandomGraphGenerator.cpp
+++ b/runtime/test/fuzzing/RandomGraphGenerator.cpp
@@ -24,8 +24,10 @@
#include <set>
#include <string>
#include <unordered_map>
+#include <utility>
#include <vector>
+#include "TestHarness.h"
#include "TestNeuralNetworksWrapper.h"
#include "fuzzing/OperationManager.h"
#include "fuzzing/RandomGraphGeneratorUtils.h"
@@ -37,6 +39,7 @@
using test_wrapper::Result;
using test_wrapper::Type;
+using namespace test_helper;
// Construct a RandomOperand from OperandSignature.
RandomOperand::RandomOperand(const OperandSignature& operand, Type dataType, uint32_t rank)
@@ -205,191 +208,69 @@
return true;
}
-void RandomGraph::createModel(test_wrapper::Model* model) {
- NN_FUZZER_LOG << "Create Model";
+static TestOperandLifeTime convertToTestOperandLifeTime(RandomOperandType type) {
+ switch (type) {
+ case RandomOperandType::INPUT:
+ return TestOperandLifeTime::SUBGRAPH_INPUT;
+ case RandomOperandType::OUTPUT:
+ return TestOperandLifeTime::SUBGRAPH_OUTPUT;
+ case RandomOperandType::INTERNAL:
+ return TestOperandLifeTime::TEMPORARY_VARIABLE;
+ case RandomOperandType::CONST:
+ return TestOperandLifeTime::CONSTANT_COPY;
+ }
+}
+
+TestModel RandomGraph::createTestModel() {
+ NN_FUZZER_LOG << "Create Test Model";
+ TestModel testModel;
// Set model operands.
- std::vector<uint32_t> modelInputs;
- std::vector<uint32_t> modelOutputs;
for (auto& operand : mOperands) {
- // TODO: Model operands are always fully-specified at model construction time.
- test_wrapper::OperandType type(operand->dataType, operand->getDimensions(), operand->scale,
- operand->zeroPoint);
- operand->opIndex = model->addOperand(&type);
+ operand->opIndex = testModel.main.operands.size();
+ TestOperand testOperand = {
+ .type = static_cast<TestOperandType>(operand->dataType),
+ .dimensions = operand->getDimensions(),
+ // It is safe to always set numberOfConsumers to 0 here because
+ // this field is not used in NDK.
+ .numberOfConsumers = 0,
+ .scale = operand->scale,
+ .zeroPoint = operand->zeroPoint,
+ .lifetime = convertToTestOperandLifeTime(operand->type),
+ .isIgnored = operand->doNotCheckAccuracy,
+ };
- // For INPUT/OUTPUT, prepare vectors for identifyInputsAndOutputs(...).
- // For CONST, set operand buffer.
- if (operand->type == RandomOperandType::INPUT) {
- operand->ioIndex = modelInputs.size();
- modelInputs.push_back(operand->opIndex);
- } else if (operand->type == RandomOperandType::OUTPUT) {
- operand->ioIndex = modelOutputs.size();
- modelOutputs.push_back(operand->opIndex);
- } else if (operand->type == RandomOperandType::CONST) {
- model->setOperandValue(operand->opIndex, operand->buffer.data(),
- operand->getBufferSize());
+ // Test buffers.
+ if (testOperand.lifetime == TestOperandLifeTime::SUBGRAPH_OUTPUT) {
+ testOperand.data = TestBuffer(operand->getBufferSize());
+ } else if (testOperand.lifetime != TestOperandLifeTime::TEMPORARY_VARIABLE) {
+ testOperand.data = TestBuffer(operand->getBufferSize(), operand->buffer.data());
}
+
+ // Input/Output indexes.
+ if (testOperand.lifetime == TestOperandLifeTime::SUBGRAPH_INPUT) {
+ testModel.main.inputIndexes.push_back(operand->opIndex);
+ } else if (testOperand.lifetime == TestOperandLifeTime::SUBGRAPH_OUTPUT) {
+ testModel.main.outputIndexes.push_back(operand->opIndex);
+ }
+ testModel.main.operands.push_back(std::move(testOperand));
}
// Set model operations.
for (auto& operation : mOperations) {
NN_FUZZER_LOG << "Operation: " << kOperationNames[static_cast<int32_t>(operation.opType)];
- std::vector<uint32_t> inputIndices, outputIndices;
+ TestOperation testOperation = {.type = static_cast<TestOperationType>(operation.opType)};
for (auto& op : operation.inputs) {
NN_FUZZER_LOG << toString(*op);
- inputIndices.push_back(op->opIndex);
+ testOperation.inputs.push_back(op->opIndex);
}
for (auto& op : operation.outputs) {
NN_FUZZER_LOG << toString(*op);
- outputIndices.push_back(op->opIndex);
+ testOperation.outputs.push_back(op->opIndex);
}
- model->addOperation(operation.opType, inputIndices, outputIndices);
+ testModel.main.operations.push_back(std::move(testOperation));
}
-
- // Set model inputs and outputs.
- model->identifyInputsAndOutputs(modelInputs, modelOutputs);
-}
-
-void RandomGraph::createRequest(test_wrapper::Execution* execution,
- std::vector<OperandBuffer>* buffers) {
- NN_FUZZER_LOG << "Create Request";
- if (buffers != nullptr) buffers->clear();
- for (const auto& operand : mOperands) {
- if (operand->type == RandomOperandType::INPUT) {
- EXPECT_EQ(execution->setInput(operand->ioIndex, operand->buffer.data(),
- operand->getBufferSize(), nullptr),
- Result::NO_ERROR);
- } else if (operand->type == RandomOperandType::OUTPUT) {
- if (buffers == nullptr) {
- EXPECT_EQ(execution->setOutput(operand->ioIndex, operand->buffer.data(),
- operand->getBufferSize(), nullptr),
- Result::NO_ERROR);
- } else {
- // The order of the output buffers corresponds to the order in mOperands.
- buffers->emplace_back(operand->buffer.size());
- EXPECT_EQ(execution->setOutput(operand->ioIndex, buffers->back().data(),
- operand->getBufferSize(), nullptr),
- Result::NO_ERROR);
- }
- }
- }
-}
-
-// Check if the actual results meet the accuracy criterion.
-constexpr uint32_t kMaxNumberOfPrintedErrors = 5;
-template <typename T>
-void expectNear(const RandomOperand& op, const OperandBuffer& test,
- const AccuracyCriterion& criterion) {
- constexpr uint32_t kMinNumberOfElementsToTestBiasMSE = 10;
- const T* actualBuffer = reinterpret_cast<const T*>(test.data());
- const T* expectedBuffer = reinterpret_cast<const T*>(op.buffer.data());
- uint32_t len = op.getNumberOfElements();
- uint32_t numSkip = 0, numErrors = 0;
- double bias = 0.0f, mse = 0.0f;
- for (uint32_t i = 0; i < len; i++) {
- SCOPED_TRACE(testing::Message() << "When comparing element " << i);
-
- // Compare all data types in double for precision and signed arithmetic.
- double actual = static_cast<double>(actualBuffer[i]);
- double expected = static_cast<double>(expectedBuffer[i]);
- double tolerableRange = criterion.atol + criterion.rtol * std::fabs(expected);
-
- // Skip invalid floating point values.
- if (std::isnan(expected) || std::isinf(expected) || std::isnan(actual) ||
- std::isinf(actual) || std::fabs(expected) > 1e3) {
- numSkip++;
- continue;
- }
-
- // Accumulate bias and MSE. Use relative bias and MSE for floating point values.
- double diff = actual - expected;
- if constexpr (nnIsFloat<T>) {
- diff /= std::max(1.0, std::abs(expected));
- }
- bias += diff;
- mse += diff * diff;
-
- // Print at most kMaxNumberOfPrintedErrors errors by EXPECT_NEAR.
- if (numErrors < kMaxNumberOfPrintedErrors) EXPECT_NEAR(expected, actual, tolerableRange);
- if (!(std::fabs(diff) <= tolerableRange)) numErrors++;
- }
- EXPECT_EQ(numErrors, 0u);
-
- // Test bias and MSE.
- if (len < numSkip + kMinNumberOfElementsToTestBiasMSE) return;
- bias /= static_cast<double>(len - numSkip);
- mse /= static_cast<double>(len - numSkip);
- EXPECT_LE(std::fabs(bias), criterion.bias);
- EXPECT_LE(mse, criterion.mse);
-}
-
-// For boolean values, we expect the number of mismatches does not exceed a certain ratio.
-void expectBooleanNearlyEqual(const RandomOperand& op, const OperandBuffer& test,
- float allowedErrorRatio) {
- const bool8* actual = reinterpret_cast<const bool8*>(test.data());
- const bool8* expected = reinterpret_cast<const bool8*>(op.buffer.data());
- uint32_t len = op.getNumberOfElements();
- uint32_t numErrors = 0;
- std::stringstream errorMsg;
- for (uint32_t i = 0; i < len; i++) {
- if (expected[i] != actual[i]) {
- if (numErrors < kMaxNumberOfPrintedErrors)
- errorMsg << " Expected: " << expected[i] << ", actual: " << actual[i]
- << ", when comparing element " << i << "\n";
- numErrors++;
- }
- }
- // When |len| is small, the allowedErrorCount will intentionally ceil at 1, which allows for
- // greater tolerance.
- uint32_t allowedErrorCount = static_cast<uint32_t>(std::ceil(allowedErrorRatio * len));
- EXPECT_LE(numErrors, allowedErrorCount) << errorMsg.str();
-}
-
-// TODO(b/139442221): Reduce code duplication with
-// nn/tools/test_generator/test_harness/TestHarness.cpp.
-void RandomGraph::checkResults(const std::vector<OperandBuffer>& buffers,
- const AccuracyCriteria& criteria) const {
- NN_FUZZER_LOG << "Check Results";
- // Make sure to keep the same order as the buffers are created.
- int i = 0;
- for (const auto& op : mOperands) {
- if (op->type == RandomOperandType::OUTPUT) {
- SCOPED_TRACE(testing::Message()
- << "When comparing output " << op->ioIndex << " (op" << op->opIndex << ")"
- << " of type " << toString(op->dataType));
- if (!op->doNotCheckAccuracy) {
- switch (op->dataType) {
- case Type::TENSOR_FLOAT32:
- expectNear<float>(*op, buffers[i], criteria.float32);
- break;
- case Type::TENSOR_FLOAT16:
- expectNear<_Float16>(*op, buffers[i], criteria.float16);
- break;
- case Type::TENSOR_INT32:
- expectNear<int32_t>(*op, buffers[i], criteria.int32);
- break;
- case Type::TENSOR_QUANT8_ASYMM:
- expectNear<uint8_t>(*op, buffers[i], criteria.quant8Asymm);
- break;
- case Type::TENSOR_QUANT8_SYMM:
- expectNear<int8_t>(*op, buffers[i], criteria.quant8Symm);
- break;
- case Type::TENSOR_QUANT16_ASYMM:
- expectNear<uint16_t>(*op, buffers[i], criteria.quant16Asymm);
- break;
- case Type::TENSOR_QUANT16_SYMM:
- expectNear<int16_t>(*op, buffers[i], criteria.quant16Symm);
- break;
- case Type::TENSOR_BOOL8:
- expectBooleanNearlyEqual(*op, buffers[i], /*allowedErrorRatio=*/0.01);
- break;
- default:
- NN_FUZZER_CHECK(false) << "Data type not supported.";
- }
- }
- i++;
- }
- }
+ return testModel;
}
void RandomGraph::dumpSpecFile(std::string filename, std::string testname = "") {
diff --git a/runtime/test/fuzzing/RandomGraphGenerator.h b/runtime/test/fuzzing/RandomGraphGenerator.h
index 47c6d3e..ffe177f 100644
--- a/runtime/test/fuzzing/RandomGraphGenerator.h
+++ b/runtime/test/fuzzing/RandomGraphGenerator.h
@@ -17,9 +17,11 @@
#ifndef ANDROID_FRAMEWORKS_ML_NN_RUNTIME_TEST_FUZZING_RANDOM_GRAPH_GENERATOR_H
#define ANDROID_FRAMEWORKS_ML_NN_RUNTIME_TEST_FUZZING_RANDOM_GRAPH_GENERATOR_H
+#include <memory>
#include <string>
#include <vector>
+#include "TestHarness.h"
#include "TestNeuralNetworksWrapper.h"
#include "fuzzing/RandomVariable.h"
@@ -108,36 +110,6 @@
RandomOperation(const OperationSignature& operation);
};
-struct AccuracyCriterion {
- // We expect the driver results to be unbiased.
- // Formula: abs(sum_{i}(diff) / sum(1)) <= bias, where
- // * fixed point: diff = actual - expected
- // * floating point: diff = (actual - expected) / max(1, abs(expected))
- float bias = std::numeric_limits<float>::max();
-
- // Set the threshold on Mean Square Error (MSE).
- // Formula: sum_{i}(diff ^ 2) / sum(1) <= mse
- float mse = std::numeric_limits<float>::max();
-
- // We also set accuracy thresholds on each element to detect any particular edge cases that may
- // be shadowed in bias or MSE. We use the similar approach as our CTS unit tests, but with much
- // relaxed criterion.
- // Formula: abs(actual - expected) <= atol + rtol * abs(expected)
- // where atol stands for Absolute TOLerance and rtol for Relative TOLerance.
- float atol = 0.0f;
- float rtol = 0.0f;
-};
-
-struct AccuracyCriteria {
- AccuracyCriterion float32;
- AccuracyCriterion float16;
- AccuracyCriterion int32;
- AccuracyCriterion quant8Asymm;
- AccuracyCriterion quant8Symm;
- AccuracyCriterion quant16Asymm;
- AccuracyCriterion quant16Symm;
-};
-
// The main interface of the random graph generator.
class RandomGraph {
public:
@@ -146,18 +118,9 @@
// Generate a random graph with numOperations and dimensionRange from a seed.
bool generate(uint32_t seed, uint32_t numOperations, uint32_t dimensionRange);
- // Create a NDK model from the random graph.
- void createModel(test_wrapper::Model* model);
-
- // Set the input/output buffers to an NDK execution object. The input buffer resides in
- // RandomOperand.buffer, the output buffer is either provided by "buffers" argument, or set
- // buffers to nullptr to use RandomOperand.buffer to record reference result.
- void createRequest(test_wrapper::Execution* execution,
- std::vector<OperandBuffer>* buffers = nullptr);
-
- // Check if the results in buffers meet the given accuracy criteria.
- void checkResults(const std::vector<OperandBuffer>& buffers,
- const AccuracyCriteria& criteria) const;
+ // Create a test model of the generated graph. The operands will always have fully-specified
+ // dimensions. The output buffers are only allocated but not initialized.
+ test_helper::TestModel createTestModel();
// Dump the generated random graph to a spec file for debugging and visualization purpose.
void dumpSpecFile(std::string filename, std::string testname);
diff --git a/runtime/test/fuzzing/TestRandomGraph.cpp b/runtime/test/fuzzing/TestRandomGraph.cpp
index 98fd3a5..32ebd75 100644
--- a/runtime/test/fuzzing/TestRandomGraph.cpp
+++ b/runtime/test/fuzzing/TestRandomGraph.cpp
@@ -22,6 +22,8 @@
#include <set>
#include <string>
+#include "GeneratedTestUtils.h"
+#include "TestHarness.h"
#include "TestNeuralNetworksWrapper.h"
#include "fuzzing/OperationManager.h"
#include "fuzzing/RandomGraphGenerator.h"
@@ -46,6 +48,7 @@
namespace nn {
namespace fuzzing_test {
+using namespace test_helper;
using test_wrapper::Result;
constexpr char kRefDeviceName[] = "nnapi-reference";
@@ -199,11 +202,10 @@
};
if (kDisabledTests.find(mTestName) != kDisabledTests.end()) return true;
if (featureLevel >= __ANDROID_API_Q__) return false;
- const auto& operations = mGraph.getOperations();
- for (const auto& op : operations) {
+ for (const auto& op : mTestModel.main.operations) {
// Skip if testing BATCH_TO_SPACE_ND with batch dimension == 1.
- if (op.opType == ANEURALNETWORKS_BATCH_TO_SPACE_ND &&
- op.inputs[0]->dimensions[0].getValue() == 1)
+ if (op.type == TestOperationType::BATCH_TO_SPACE_ND &&
+ mTestModel.main.operands[op.inputs[0]].dimensions[0] == 1)
return true;
}
return false;
@@ -255,12 +257,8 @@
// Create request.
test_wrapper::Execution execution(&compilation);
- std::vector<OperandBuffer> outputs;
- if (isRef) {
- mGraph.createRequest(&execution);
- } else {
- mGraph.createRequest(&execution, &outputs);
- }
+ std::vector<TestBuffer> outputs;
+ generated_tests::createRequest(mTestModel, &execution, &outputs);
// Compute result.
Result executeReturn = execution.compute();
@@ -273,14 +271,23 @@
return;
}
ASSERT_EQ(executeReturn, Result::NO_ERROR);
+
+ // Record the execution results as golden values.
+ if (isRef) {
+ for (uint32_t i = 0; i < outputs.size(); i++) {
+ auto outputIndex = mTestModel.main.outputIndexes[i];
+ mTestModel.main.operands[outputIndex].data = outputs[i];
+ }
+ }
+
if (featureLevel >= __ANDROID_API_Q__ && !isRef) {
- mGraph.checkResults(outputs, mCriteria);
+ checkResults(mTestModel, outputs, mCriteria);
}
}
// Compile and execute the generated graph normally (i.e., allow runtime to
// distribute across devices).
- void computeAndVerifyResults(const test_wrapper::Model* model, bool checkResults) {
+ void computeAndVerifyResults(const test_wrapper::Model* model, bool shouldCheckResults) {
// Because we're not using the introspection/control API, the CpuDevice
// is available as a fallback, and hence we assume that compilation and
// execution will succeed.
@@ -291,13 +298,13 @@
// Create request.
test_wrapper::Execution execution(&compilation);
- std::vector<OperandBuffer> outputs;
- mGraph.createRequest(&execution, &outputs);
+ std::vector<TestBuffer> outputs;
+ generated_tests::createRequest(mTestModel, &execution, &outputs);
// Compute and verify result.
ASSERT_EQ(execution.compute(), Result::NO_ERROR);
- if (checkResults) {
- mGraph.checkResults(outputs, mCriteria);
+ if (shouldCheckResults) {
+ checkResults(mTestModel, outputs, mCriteria);
}
}
@@ -307,8 +314,10 @@
ASSERT_TRUE(mGraph.generate(kSeed, numOperations, dimensionRange));
// Create a model from the random graph.
- test_wrapper::Model model;
- mGraph.createModel(&model);
+ mTestModel = mGraph.createTestModel();
+
+ generated_tests::GeneratedModel model;
+ generated_tests::createModel(mTestModel, &model);
ASSERT_TRUE(model.isValid());
ASSERT_EQ(model.finish(), Result::NO_ERROR);
@@ -358,6 +367,7 @@
const uint32_t kSeed = GetParam();
std::string mTestName;
RandomGraph mGraph;
+ TestModel mTestModel;
AccuracyCriteria mCriteria;
static int64_t mStandardDevicesFeatureLevel; // minimum across all devices
diff --git a/tools/test_generator/test_harness/TestHarness.cpp b/tools/test_generator/test_harness/TestHarness.cpp
index 98ab524..279973f 100644
--- a/tools/test_generator/test_harness/TestHarness.cpp
+++ b/tools/test_generator/test_harness/TestHarness.cpp
@@ -23,6 +23,8 @@
#include <algorithm>
#include <cmath>
#include <functional>
+#include <limits>
+#include <map>
#include <numeric>
#include <string>
#include <vector>
@@ -31,6 +33,9 @@
namespace {
+template <typename T>
+constexpr bool nnIsFloat = std::is_floating_point_v<T> || std::is_same_v<T, _Float16>;
+
constexpr uint32_t kMaxNumberOfPrintedErrors = 10;
// TODO(b/139442217): Allow passing accuracy criteria from spec.
@@ -59,15 +64,32 @@
// Check if the actual results meet the accuracy criterion.
template <typename T>
-void expectNear(const TestOperand& op, const TestBuffer& result, double atol, double rtol) {
+void expectNear(const TestOperand& op, const TestBuffer& result,
+ const AccuracyCriterion& criterion) {
+ constexpr uint32_t kMinNumberOfElementsToTestBiasMSE = 10;
const T* actualBuffer = result.get<T>();
const T* expectedBuffer = op.data.get<T>();
- uint32_t len = getNumberOfElements(op), numErrors = 0;
+ uint32_t len = getNumberOfElements(op), numErrors = 0, numSkip = 0;
+ double bias = 0.0f, mse = 0.0f;
for (uint32_t i = 0; i < len; i++) {
// Compare all data types in double for precision and signed arithmetic.
double actual = static_cast<double>(actualBuffer[i]);
double expected = static_cast<double>(expectedBuffer[i]);
- double tolerableRange = atol + rtol * std::fabs(expected);
+ double tolerableRange = criterion.atol + criterion.rtol * std::fabs(expected);
+
+ // Skip invalid floating point values.
+ if (std::isnan(expected) || std::isinf(expected) || std::fabs(expected) > 1e3) {
+ numSkip++;
+ continue;
+ }
+
+ // Accumulate bias and MSE. Use relative bias and MSE for floating point values.
+ double diff = actual - expected;
+ if constexpr (nnIsFloat<T>) {
+ diff /= std::max(1.0, std::abs(expected));
+ }
+ bias += diff;
+ mse += diff * diff;
// Print at most kMaxNumberOfPrintedErrors errors by EXPECT_NEAR.
if (numErrors < kMaxNumberOfPrintedErrors) {
@@ -76,24 +98,34 @@
if (std::fabs(actual - expected) > tolerableRange) numErrors++;
}
EXPECT_EQ(numErrors, 0u);
+
+ // Test bias and MSE.
+ if (len < numSkip + kMinNumberOfElementsToTestBiasMSE) return;
+ bias /= static_cast<double>(len - numSkip);
+ mse /= static_cast<double>(len - numSkip);
+ EXPECT_LE(std::fabs(bias), criterion.bias);
+ EXPECT_LE(mse, criterion.mse);
}
-// For boolean values, we expect exact match.
-void expectBooleanEqual(const TestOperand& op, const TestBuffer& result) {
+// For boolean values, we expect the number of mismatches does not exceed a certain ratio.
+void expectBooleanNearlyEqual(const TestOperand& op, const TestBuffer& result,
+ float allowedErrorRatio) {
const bool8* actualBuffer = result.get<bool8>();
const bool8* expectedBuffer = op.data.get<bool8>();
uint32_t len = getNumberOfElements(op), numErrors = 0;
+ std::stringstream errorMsg;
for (uint32_t i = 0; i < len; i++) {
- bool actual = static_cast<bool>(actualBuffer[i]);
- bool expected = static_cast<bool>(expectedBuffer[i]);
-
- // Print at most kMaxNumberOfPrintedErrors errors by EXPECT_NEAR.
- if (numErrors < kMaxNumberOfPrintedErrors) {
- EXPECT_EQ(expected, actual) << "When comparing element " << i;
+ if (expectedBuffer[i] != actualBuffer[i]) {
+ if (numErrors < kMaxNumberOfPrintedErrors)
+ errorMsg << " Expected: " << expectedBuffer[i] << ", actual: " << actualBuffer[i]
+ << ", when comparing element " << i << "\n";
+ numErrors++;
}
- if (expected != actual) numErrors++;
}
- EXPECT_EQ(numErrors, 0u);
+ // When |len| is small, the allowedErrorCount will intentionally ceil at 1, which allows for
+ // greater tolerance.
+ uint32_t allowedErrorCount = static_cast<uint32_t>(std::ceil(allowedErrorRatio * len));
+ EXPECT_LE(numErrors, allowedErrorCount) << errorMsg.str();
}
// Calculates the expected probability from the unnormalized log-probability of
@@ -157,6 +189,50 @@
} // namespace
+void checkResults(const TestModel& model, const std::vector<TestBuffer>& buffers,
+ const AccuracyCriteria& criteria) {
+ ASSERT_EQ(model.main.outputIndexes.size(), buffers.size());
+ for (uint32_t i = 0; i < model.main.outputIndexes.size(); i++) {
+ SCOPED_TRACE(testing::Message() << "When comparing output " << i);
+ const auto& operand = model.main.operands[model.main.outputIndexes[i]];
+ const auto& result = buffers[i];
+ if (operand.isIgnored) continue;
+
+ switch (operand.type) {
+ case TestOperandType::TENSOR_FLOAT32:
+ expectNear<float>(operand, result, criteria.float32);
+ break;
+ case TestOperandType::TENSOR_FLOAT16:
+ expectNear<_Float16>(operand, result, criteria.float16);
+ break;
+ case TestOperandType::TENSOR_INT32:
+ case TestOperandType::INT32:
+ expectNear<int32_t>(operand, result, criteria.int32);
+ break;
+ case TestOperandType::TENSOR_QUANT8_ASYMM:
+ expectNear<uint8_t>(operand, result, criteria.quant8Asymm);
+ break;
+ case TestOperandType::TENSOR_QUANT8_SYMM:
+ expectNear<int8_t>(operand, result, criteria.quant8Symm);
+ break;
+ case TestOperandType::TENSOR_QUANT16_ASYMM:
+ expectNear<uint16_t>(operand, result, criteria.quant16Asymm);
+ break;
+ case TestOperandType::TENSOR_QUANT16_SYMM:
+ expectNear<int16_t>(operand, result, criteria.quant16Symm);
+ break;
+ case TestOperandType::TENSOR_BOOL8:
+ expectBooleanNearlyEqual(operand, result, criteria.bool8AllowedErrorRatio);
+ break;
+ case TestOperandType::TENSOR_QUANT8_ASYMM_SIGNED:
+ expectNear<int8_t>(operand, result, criteria.quant8AsymmSigned);
+ break;
+ default:
+ FAIL() << "Data type not supported.";
+ }
+ }
+}
+
void checkResults(const TestModel& model, const std::vector<TestBuffer>& buffers) {
// For RANDOM_MULTINOMIAL test only.
if (model.expectedMultinomialDistributionTolerance > 0.0f) {
@@ -164,13 +240,29 @@
return;
}
- // Decide the tolerable range.
+ // Decide the default tolerable range.
//
// For floating-point models, we use the relaxed precision if either
// - relaxed computation flag is set
// - the model has at least one TENSOR_FLOAT16 operand
- double fpAtol = 1e-5;
- double fpRtol = 5.0f * 1.1920928955078125e-7;
+ //
+ // The bias and MSE criteria are implicitly set to the maximum -- we do not enforce these
+ // criteria in normal generated tests.
+ //
+ // TODO: Adjust the error limit based on testing.
+ //
+ AccuracyCriteria criteria = {
+ // The relative tolerance is 5ULP of FP32.
+ .float32 = {.atol = 1e-5, .rtol = 5.0f * 1.1920928955078125e-7},
+ // Both the absolute and relative tolerance are 5ULP of FP16.
+ .float16 = {.atol = 5.0f * 0.0009765625, .rtol = 5.0f * 0.0009765625},
+ .int32 = {.atol = 1},
+ .quant8Asymm = {.atol = 1},
+ .quant8Symm = {.atol = 1},
+ .quant16Asymm = {.atol = 1},
+ .quant16Symm = {.atol = 1},
+ .bool8AllowedErrorRatio = 0.0f,
+ };
bool hasFloat16Inputs = false;
model.forEachSubgraph([&hasFloat16Inputs](const TestSubgraph& subgraph) {
if (!hasFloat16Inputs) {
@@ -181,56 +273,14 @@
}
});
if (model.isRelaxed || hasFloat16Inputs) {
- // TODO: Adjust the error limit based on testing.
- // If in relaxed mode, set the absolute tolerance to be 5ULP of FP16.
- fpAtol = 5.0f * 0.0009765625;
- // Set the relative tolerance to be 5ULP of the corresponding FP precision.
- fpRtol = 5.0f * 0.0009765625;
+ criteria.float32 = criteria.float16;
}
const double quant8AllowedError = getQuant8AllowedError();
+ criteria.quant8Asymm.atol = quant8AllowedError;
+ criteria.quant8AsymmSigned.atol = quant8AllowedError;
+ criteria.quant8Symm.atol = quant8AllowedError;
- ASSERT_EQ(model.main.outputIndexes.size(), buffers.size());
- for (uint32_t i = 0; i < model.main.outputIndexes.size(); i++) {
- SCOPED_TRACE(testing::Message() << "When comparing output " << i);
- const auto& operand = model.main.operands[model.main.outputIndexes[i]];
- const auto& result = buffers[i];
- if (operand.isIgnored) continue;
-
- switch (operand.type) {
- case TestOperandType::TENSOR_FLOAT32:
- expectNear<float>(operand, result, fpAtol, fpRtol);
- break;
- case TestOperandType::TENSOR_FLOAT16:
- expectNear<_Float16>(operand, result, fpAtol, fpRtol);
- break;
- case TestOperandType::TENSOR_INT32:
- expectNear<int32_t>(operand, result, 0, 0);
- break;
- case TestOperandType::TENSOR_QUANT8_ASYMM:
- expectNear<uint8_t>(operand, result, quant8AllowedError, 0);
- break;
- case TestOperandType::TENSOR_QUANT8_SYMM:
- expectNear<int8_t>(operand, result, quant8AllowedError, 0);
- break;
- case TestOperandType::TENSOR_QUANT16_ASYMM:
- expectNear<uint16_t>(operand, result, 1, 0);
- break;
- case TestOperandType::TENSOR_QUANT16_SYMM:
- expectNear<int16_t>(operand, result, 1, 0);
- break;
- case TestOperandType::TENSOR_BOOL8:
- expectBooleanEqual(operand, result);
- break;
- case TestOperandType::TENSOR_QUANT8_ASYMM_SIGNED:
- expectNear<int8_t>(operand, result, quant8AllowedError, 0);
- break;
- case TestOperandType::INT32:
- expectNear<int32_t>(operand, result, 0, 0);
- break;
- default:
- FAIL() << "Data type not supported.";
- }
- }
+ checkResults(model, buffers, criteria);
}
TestModel convertQuant8AsymmOperandsToSigned(const TestModel& testModel) {
diff --git a/tools/test_generator/test_harness/include/TestHarness.h b/tools/test_generator/test_harness/include/TestHarness.h
index bd6f1d2..cc8c5b0 100644
--- a/tools/test_generator/test_harness/include/TestHarness.h
+++ b/tools/test_generator/test_harness/include/TestHarness.h
@@ -27,6 +27,7 @@
#include <cstdlib>
#include <cstring>
#include <functional>
+#include <limits>
#include <map>
#include <memory>
#include <random>
@@ -454,10 +455,44 @@
std::map<std::string, const TestModel*> mTestModels;
};
+struct AccuracyCriterion {
+ // We expect the driver results to be unbiased.
+ // Formula: abs(sum_{i}(diff) / sum(1)) <= bias, where
+ // * fixed point: diff = actual - expected
+ // * floating point: diff = (actual - expected) / max(1, abs(expected))
+ float bias = std::numeric_limits<float>::max();
+
+ // Set the threshold on Mean Square Error (MSE).
+ // Formula: sum_{i}(diff ^ 2) / sum(1) <= mse
+ float mse = std::numeric_limits<float>::max();
+
+ // We also set accuracy thresholds on each element to detect any particular edge cases that may
+ // be shadowed in bias or MSE. We use the similar approach as our CTS unit tests, but with much
+ // relaxed criterion.
+ // Formula: abs(actual - expected) <= atol + rtol * abs(expected)
+ // where atol stands for Absolute TOLerance and rtol for Relative TOLerance.
+ float atol = 0.0f;
+ float rtol = 0.0f;
+};
+
+struct AccuracyCriteria {
+ AccuracyCriterion float32;
+ AccuracyCriterion float16;
+ AccuracyCriterion int32;
+ AccuracyCriterion quant8Asymm;
+ AccuracyCriterion quant8AsymmSigned;
+ AccuracyCriterion quant8Symm;
+ AccuracyCriterion quant16Asymm;
+ AccuracyCriterion quant16Symm;
+ float bool8AllowedErrorRatio = 0.1f;
+};
+
// Check the output results against the expected values in test model by calling
// GTEST_ASSERT/EXPECT. The index of the results corresponds to the index in
// model.main.outputIndexes. E.g., results[i] corresponds to model.main.outputIndexes[i].
void checkResults(const TestModel& model, const std::vector<TestBuffer>& results);
+void checkResults(const TestModel& model, const std::vector<TestBuffer>& results,
+ const AccuracyCriteria& criteria);
TestModel convertQuant8AsymmOperandsToSigned(const TestModel& testModel);