[nnc] Allow 1 ulp tolerance in log approximation (#52165)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/52165
Apparently bitwise identicality is too high a bar (I'm seeing
differences at this level depending on the HW platform, e.g.,
Broadwell is bitwise accurate but Skylake is 1ulp off). But anyways
VML is accurate to 1 ulp, so let's allow that.
ghstack-source-id: 121815001
Test Plan: test_approx
Reviewed By: asuhan
Differential Revision: D26408079
fbshipit-source-id: 46cbd1487c72ae7bc40567f2f72ed2b919707d0d
diff --git a/test/cpp/tensorexpr/test_approx.cpp b/test/cpp/tensorexpr/test_approx.cpp
index 09d0cf6..5a56771 100644
--- a/test/cpp/tensorexpr/test_approx.cpp
+++ b/test/cpp/tensorexpr/test_approx.cpp
@@ -8,6 +8,7 @@
#include <torch/torch.h>
#include <cstring>
+using namespace torch::indexing;
namespace te = torch::jit::tensorexpr;
static void vectorize(te::LoopNest* ln, te::Tensor* target, int width) {
@@ -17,6 +18,17 @@
ln->vectorize(inner);
}
+std::string diffs(const at::Tensor& a, const at::Tensor& b) {
+ auto diff = torch::abs(a.flatten() - b.flatten());
+ auto count_diffs = torch::sum(diff > 0.f);
+ auto greatest_diff_index = torch::argmax(diff);
+ std::stringstream ss;
+ ss << "Found " << count_diffs << " unequal element(s). "
+ << "The greatest difference was " << diff.index({greatest_diff_index})
+ << " at index " << greatest_diff_index;
+ return ss.str();
+}
+
TEST(Approx, log_vml) {
te::KernelScope ks;
te::VarHandle N("N", te::kInt);
@@ -31,19 +43,25 @@
s = te::IRSimplifier::simplify(s);
te::LLVMCodeGen cg(s, {A, B, N});
+ auto eps = std::numeric_limits<float>::epsilon();
auto test = [&](const at::Tensor& A_t) {
at::Tensor B_ref = at::log(A_t);
at::Tensor B_t = at::empty_like(A_t);
- cg.call({A_t.data_ptr<float>(), B_t.data_ptr<float>(), A_t.numel()});
+ auto ap = A_t.data_ptr<float>();
+ auto bp = B_t.data_ptr<float>();
+ cg.call({ap, bp, A_t.numel()});
// Results should be bit-identical.
- ASSERT_TRUE(
- memcmp(
- B_ref.data_ptr<float>(), B_t.data_ptr<float>(), B_ref.nbytes()) ==
- 0);
+ ASSERT_TRUE(torch::allclose(
+ B_t, B_ref, /*rtol=*/eps, /*atol=*/0.0f, /*equal_nan=*/true))
+ << "Input[:8]\n"
+ << A_t.index({Slice(0, 8)}) << "\n"
+ << "Test[:8]\n"
+ << B_t.index({Slice(0, 8)}) << "\n"
+ << "Ref[:8]\n"
+ << B_ref.index({Slice(0, 8)}) << diffs(B_t, B_ref);
};
// Generate every single-precision FP value in [1.0, 2.0).
- auto eps = std::numeric_limits<float>::epsilon();
at::Tensor A_t = torch::arange(1.0f, 2.0f, eps);
ASSERT_EQ(A_t.numel(), 1 << 23);