[nnc] Allow 1 ulp tolerance in log approximation (#52165) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/52165 Apparently bitwise identicality is too high a bar (I'm seeing differences at this level depending on the HW platform, e.g., Broadwell is bitwise accurate but Skylake is 1ulp off). But anyways VML is accurate to 1 ulp, so let's allow that. ghstack-source-id: 121815001 Test Plan: test_approx Reviewed By: asuhan Differential Revision: D26408079 fbshipit-source-id: 46cbd1487c72ae7bc40567f2f72ed2b919707d0d

commit: 415658836538d69362ed5482dc5fbfdba39a1c69 [log] [tgz]
author: Bert Maher <bertrand@fb.com> Tue Feb 16 16:41:40 2021 -0800
committer: Facebook GitHub Bot <facebook-github-bot@users.noreply.github.com> Tue Feb 16 16:49:36 2021 -0800
tree: 385b799fd9a40970b94195966306076c88b5e025
parent: 9409a3a39b7149bb2d833a89e0c944109bef7c27 [diff]
diff --git a/test/cpp/tensorexpr/test_approx.cpp b/test/cpp/tensorexpr/test_approx.cpp
index 09d0cf6..5a56771 100644
--- a/test/cpp/tensorexpr/test_approx.cpp
+++ b/test/cpp/tensorexpr/test_approx.cpp

@@ -8,6 +8,7 @@
 #include <torch/torch.h>
 #include <cstring>
 
+using namespace torch::indexing;
 namespace te = torch::jit::tensorexpr;
 
 static void vectorize(te::LoopNest* ln, te::Tensor* target, int width) {
@@ -17,6 +18,17 @@
   ln->vectorize(inner);
 }
 
+std::string diffs(const at::Tensor& a, const at::Tensor& b) {
+  auto diff = torch::abs(a.flatten() - b.flatten());
+  auto count_diffs = torch::sum(diff > 0.f);
+  auto greatest_diff_index = torch::argmax(diff);
+  std::stringstream ss;
+  ss << "Found " << count_diffs << " unequal element(s). "
+     << "The greatest difference was " << diff.index({greatest_diff_index})
+     << " at index " << greatest_diff_index;
+  return ss.str();
+}
+
 TEST(Approx, log_vml) {
   te::KernelScope ks;
   te::VarHandle N("N", te::kInt);
@@ -31,19 +43,25 @@
   s = te::IRSimplifier::simplify(s);
   te::LLVMCodeGen cg(s, {A, B, N});
 
+  auto eps = std::numeric_limits<float>::epsilon();
   auto test = [&](const at::Tensor& A_t) {
     at::Tensor B_ref = at::log(A_t);
     at::Tensor B_t = at::empty_like(A_t);
-    cg.call({A_t.data_ptr<float>(), B_t.data_ptr<float>(), A_t.numel()});
+    auto ap = A_t.data_ptr<float>();
+    auto bp = B_t.data_ptr<float>();
+    cg.call({ap, bp, A_t.numel()});
     // Results should be bit-identical.
-    ASSERT_TRUE(
-        memcmp(
-            B_ref.data_ptr<float>(), B_t.data_ptr<float>(), B_ref.nbytes()) ==
-        0);
+    ASSERT_TRUE(torch::allclose(
+        B_t, B_ref, /*rtol=*/eps, /*atol=*/0.0f, /*equal_nan=*/true))
+        << "Input[:8]\n"
+        << A_t.index({Slice(0, 8)}) << "\n"
+        << "Test[:8]\n"
+        << B_t.index({Slice(0, 8)}) << "\n"
+        << "Ref[:8]\n"
+        << B_ref.index({Slice(0, 8)}) << diffs(B_t, B_ref);
   };
 
   // Generate every single-precision FP value in [1.0, 2.0).
-  auto eps = std::numeric_limits<float>::epsilon();
   at::Tensor A_t = torch::arange(1.0f, 2.0f, eps);
   ASSERT_EQ(A_t.numel(), 1 << 23);
commit	415658836538d69362ed5482dc5fbfdba39a1c69	[log] [tgz]
author	Bert Maher <bertrand@fb.com>	Tue Feb 16 16:41:40 2021 -0800
committer	Facebook GitHub Bot <facebook-github-bot@users.noreply.github.com>	Tue Feb 16 16:49:36 2021 -0800
tree	385b799fd9a40970b94195966306076c88b5e025
parent	9409a3a39b7149bb2d833a89e0c944109bef7c27 [diff]