[AOTI] Add ABI-compatiblity tests (#123848) Summary: In AOTInductor generated CPU model code, there can be direct references to some aten/c10 utility functions and data structures, e.g. at::vec and c10::Half. These are performance critical and thus it doesn't make sense to create C shim for them. Instead, we make sure they are implemented in a header-only way, and use this set of tests to guard future changes. There are more header files to be updated, but we will do it in other followup PRs. Pull Request resolved: https://github.com/pytorch/pytorch/pull/123848 Approved by: https://github.com/jansel ghstack dependencies: #123847

commit: 4946638f06e5916ea9bd0f790ff620bdb78a92a3 [log] [tgz]
author: Bin Bao <binbao@meta.com> Thu Apr 11 19:55:18 2024 -0700
committer: PyTorch MergeBot <pytorchmergebot@users.noreply.github.com> Fri Apr 19 00:51:24 2024 +0000
tree: 1eb44b6f254a0795d77de1c8ea9e28f497bea351
parent: cbefaf2a37b5f0659395fea3e1301a4e36a40013 [diff]
diff --git a/.ci/pytorch/test.sh b/.ci/pytorch/test.sh
index 5408e0f..23eaf8a 100755
--- a/.ci/pytorch/test.sh
+++ b/.ci/pytorch/test.sh

@@ -334,7 +334,7 @@
   # TODO: need a faster way to build
   if [[ "$BUILD_ENVIRONMENT" != *rocm* ]]; then
       BUILD_AOT_INDUCTOR_TEST=1 python setup.py develop
-      CPP_TESTS_DIR="${BUILD_BIN_DIR}" LD_LIBRARY_PATH="${TORCH_LIB_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_aot_inductor
+      CPP_TESTS_DIR="${BUILD_BIN_DIR}" LD_LIBRARY_PATH="${TORCH_LIB_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_aoti_abi_check cpp/test_aoti_inference
   fi
 }
 

diff --git a/.github/labeler.yml b/.github/labeler.yml
index 74ea0ab..8a572bf 100644
--- a/.github/labeler.yml
+++ b/.github/labeler.yml

@@ -36,7 +36,8 @@
 - torch/distributed/_tensor/**
 - torch/distributed/fsdp/**
 - torch/csrc/inductor/**
-- test/cpp/aot_inductor/**
+- test/cpp/aoti_abi_check/**
+- test/cpp/aoti_inference/**
 
 "module: cpu":
 - aten/src/ATen/cpu/**

diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
index d080ef6..83bf158 100644
--- a/caffe2/CMakeLists.txt
+++ b/caffe2/CMakeLists.txt

@@ -1336,8 +1336,11 @@
   endif()
   if(BUILD_AOT_INDUCTOR_TEST)
     add_subdirectory(
-      ${TORCH_ROOT}/test/cpp/aot_inductor
-      ${CMAKE_BINARY_DIR}/test_aot_inductor)
+      ${TORCH_ROOT}/test/cpp/aoti_abi_check
+      ${CMAKE_BINARY_DIR}/test_aoti_abi_check)
+    add_subdirectory(
+      ${TORCH_ROOT}/test/cpp/aoti_inference
+      ${CMAKE_BINARY_DIR}/test_aoti_inference)
   endif()
 endif()
 

diff --git a/test/cpp/aoti_abi_check/CMakeLists.txt b/test/cpp/aoti_abi_check/CMakeLists.txt
new file mode 100644
index 0000000..8ae688e
--- /dev/null
+++ b/test/cpp/aoti_abi_check/CMakeLists.txt

@@ -0,0 +1,27 @@
+set(AOTI_ABI_CHECK_TEST_ROOT ${TORCH_ROOT}/test/cpp/aoti_abi_check)
+
+# Build the cpp gtest binary containing the cpp-only tests.
+set(AOTI_ABI_CHECK_TEST_SRCS
+  ${AOTI_ABI_CHECK_TEST_ROOT}/main.cpp
+  ${AOTI_ABI_CHECK_TEST_ROOT}/test_dtype.cpp
+)
+
+add_executable(test_aoti_abi_check
+  ${AOTI_ABI_CHECK_TEST_SRCS}
+)
+
+# TODO temporary until we can delete the old gtest polyfills.
+target_compile_definitions(test_aoti_abi_check PRIVATE USE_GTEST)
+
+# WARNING: DO NOT LINK torch!!!
+# The purpose is to check if the used aten/c10 headers are writtern in a header-only way
+target_link_libraries(test_aoti_abi_check PRIVATE gtest)
+target_include_directories(test_aoti_abi_check PRIVATE ${ATen_CPU_INCLUDE})
+
+if(INSTALL_TEST)
+  install(TARGETS test_aoti_abi_check DESTINATION bin)
+  # Install PDB files for MSVC builds
+  if(MSVC AND BUILD_SHARED_LIBS)
+    install(FILES $<TARGET_PDB_FILE:test_aoti_abi_check> DESTINATION bin OPTIONAL)
+  endif()
+endif()

diff --git a/test/cpp/aoti_abi_check/README.md b/test/cpp/aoti_abi_check/README.md
new file mode 100644
index 0000000..7a35838
--- /dev/null
+++ b/test/cpp/aoti_abi_check/README.md

@@ -0,0 +1 @@
+Tests in this directory are meant to guard certain ATen/c10 util functions and data structures are implemented in a header-only fashion, to make sure AOTInductor generated CPU model code is ABI backward-compatible.

diff --git a/test/cpp/aoti_abi_check/main.cpp b/test/cpp/aoti_abi_check/main.cpp
new file mode 100644
index 0000000..d810713
--- /dev/null
+++ b/test/cpp/aoti_abi_check/main.cpp

@@ -0,0 +1,6 @@
+#include <gtest/gtest.h>
+
+int main(int argc, char* argv[]) {
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}

diff --git a/test/cpp/aoti_abi_check/test_dtype.cpp b/test/cpp/aoti_abi_check/test_dtype.cpp
new file mode 100644
index 0000000..bf00815
--- /dev/null
+++ b/test/cpp/aoti_abi_check/test_dtype.cpp

@@ -0,0 +1,99 @@
+#include <gtest/gtest.h>
+
+#include <c10/util/BFloat16-math.h>
+#include <c10/util/BFloat16.h>
+#include <c10/util/Float8_e4m3fn.h>
+#include <c10/util/Float8_e4m3fnuz.h>
+#include <c10/util/Float8_e5m2.h>
+#include <c10/util/Float8_e5m2fnuz.h>
+#include <c10/util/Half.h>
+
+namespace torch {
+namespace aot_inductor {
+
+TEST(TestDtype, TestBFloat16) {
+  c10::BFloat16 a = 1.0f;
+  c10::BFloat16 b = 2.0f;
+  c10::BFloat16 add = 3.0f;
+  c10::BFloat16 sub = -1.0f;
+  c10::BFloat16 mul = 2.0f;
+  c10::BFloat16 div = 0.5f;
+
+  EXPECT_EQ(a + b, add);
+  EXPECT_EQ(a - b, sub);
+  EXPECT_EQ(a * b, mul);
+  EXPECT_EQ(a / b, div);
+}
+
+TEST(TestDtype, TestFloat8_e4m3fn) {
+  c10::Float8_e4m3fn a = 1.0f;
+  c10::Float8_e4m3fn b = 2.0f;
+  c10::Float8_e4m3fn add = 3.0f;
+  c10::Float8_e4m3fn sub = -1.0f;
+  c10::Float8_e4m3fn mul = 2.0f;
+  c10::Float8_e4m3fn div = 0.5f;
+
+  EXPECT_EQ(a + b, add);
+  EXPECT_EQ(a - b, sub);
+  EXPECT_EQ(a * b, mul);
+  EXPECT_EQ(a / b, div);
+}
+
+TEST(TestDtype, TestFloat8_e4m3fuz) {
+  c10::Float8_e4m3fnuz a = 1.0f;
+  c10::Float8_e4m3fnuz b = 2.0f;
+  c10::Float8_e4m3fnuz add = 3.0f;
+  c10::Float8_e4m3fnuz sub = -1.0f;
+  c10::Float8_e4m3fnuz mul = 2.0f;
+  c10::Float8_e4m3fnuz div = 0.5f;
+
+  EXPECT_EQ(a + b, add);
+  EXPECT_EQ(a - b, sub);
+  EXPECT_EQ(a * b, mul);
+  EXPECT_EQ(a / b, div);
+}
+
+TEST(TestDtype, TestFloat8_e5m2) {
+  c10::Float8_e5m2 a = 1.0f;
+  c10::Float8_e5m2 b = 2.0f;
+  c10::Float8_e5m2 add = 3.0f;
+  c10::Float8_e5m2 sub = -1.0f;
+  c10::Float8_e5m2 mul = 2.0f;
+  c10::Float8_e5m2 div = 0.5f;
+
+  EXPECT_EQ(a + b, add);
+  EXPECT_EQ(a - b, sub);
+  EXPECT_EQ(a * b, mul);
+  EXPECT_EQ(a / b, div);
+}
+
+TEST(TestDtype, TestFloat8_e5m2fnuz) {
+  c10::Float8_e5m2fnuz a = 1.0f;
+  c10::Float8_e5m2fnuz b = 2.0f;
+  c10::Float8_e5m2fnuz add = 3.0f;
+  c10::Float8_e5m2fnuz sub = -1.0f;
+  c10::Float8_e5m2fnuz mul = 2.0f;
+  c10::Float8_e5m2fnuz div = 0.5f;
+
+  EXPECT_EQ(a + b, add);
+  EXPECT_EQ(a - b, sub);
+  EXPECT_EQ(a * b, mul);
+  EXPECT_EQ(a / b, div);
+}
+
+TEST(TestDtype, TestHalf) {
+  c10::Half a = 1.0f;
+  c10::Half b = 2.0f;
+  c10::Half add = 3.0f;
+  c10::Half sub = -1.0f;
+  c10::Half mul = 2.0f;
+  c10::Half div = 0.5f;
+
+  EXPECT_EQ(a + b, add);
+  EXPECT_EQ(a - b, sub);
+  EXPECT_EQ(a * b, mul);
+  EXPECT_EQ(a / b, div);
+}
+
+} // namespace aot_inductor
+} // namespace torch

diff --git a/test/cpp/aot_inductor/CMakeLists.txt b/test/cpp/aoti_inference/CMakeLists.txt
similarity index 73%
rename from test/cpp/aot_inductor/CMakeLists.txt
rename to test/cpp/aoti_inference/CMakeLists.txt
index 8ec065e..70c91fb 100644
--- a/test/cpp/aot_inductor/CMakeLists.txt
+++ b/test/cpp/aoti_inference/CMakeLists.txt

@@ -1,5 +1,5 @@
 
-set(AOT_INDUCTOR_TEST_ROOT ${TORCH_ROOT}/test/cpp/aot_inductor)
+set(AOT_INDUCTOR_TEST_ROOT ${TORCH_ROOT}/test/cpp/aoti_inference)
 
 # Build custom TorchScript op for AOTInductor
 add_library(aoti_custom_class SHARED aoti_custom_class.cpp)
@@ -31,7 +31,7 @@
   ${AOT_INDUCTOR_TEST_ROOT}/test.cpp
 )
 
-add_executable(test_aot_inductor
+add_executable(test_aoti_inference
   ${TORCH_ROOT}/test/cpp/common/main.cpp
   ${INDUCTOR_TEST_SRCS}
   data.pt
@@ -39,10 +39,10 @@
   script_model_cpu.pt
   script_model_cuda.pt
 )
-add_dependencies(test_aot_inductor aoti_custom_class aoti_script_model)
+add_dependencies(test_aoti_inference aoti_custom_class aoti_script_model)
 
 # TODO temporary until we can delete the old gtest polyfills.
-target_compile_definitions(test_aot_inductor PRIVATE USE_GTEST)
+target_compile_definitions(test_aoti_inference PRIVATE USE_GTEST)
 
 # Define a custom command to generate the library
 add_custom_command(
@@ -51,24 +51,24 @@
         DEPENDS ${AOT_INDUCTOR_TEST_ROOT}/test.py
 )
 
-target_link_libraries(test_aot_inductor PRIVATE
+target_link_libraries(test_aoti_inference PRIVATE
   torch
   gtest
   -Wl,--no-as-needed aoti_custom_class
 )
 
 if(USE_CUDA)
-  target_include_directories(test_aot_inductor PRIVATE ${ATen_CUDA_INCLUDE})
-  target_compile_definitions(test_aot_inductor PRIVATE USE_CUDA)
+  target_include_directories(test_aoti_inference PRIVATE ${ATen_CUDA_INCLUDE})
+  target_compile_definitions(test_aoti_inference PRIVATE USE_CUDA)
 endif()
-target_compile_definitions(test_aot_inductor PRIVATE
+target_compile_definitions(test_aoti_inference PRIVATE
     CMAKE_CURRENT_BINARY_DIR=${CMAKE_CURRENT_BINARY_DIR}
 )
 
 if(INSTALL_TEST)
-  install(TARGETS test_aot_inductor DESTINATION bin)
+  install(TARGETS test_aoti_inference DESTINATION bin)
   # Install PDB files for MSVC builds
   if(MSVC AND BUILD_SHARED_LIBS)
-    install(FILES $<TARGET_PDB_FILE:test_aot_inductor> DESTINATION bin OPTIONAL)
+    install(FILES $<TARGET_PDB_FILE:test_aoti_inference> DESTINATION bin OPTIONAL)
   endif()
 endif()

diff --git a/test/cpp/aot_inductor/aoti_custom_class.cpp b/test/cpp/aoti_inference/aoti_custom_class.cpp
similarity index 100%
rename from test/cpp/aot_inductor/aoti_custom_class.cpp
rename to test/cpp/aoti_inference/aoti_custom_class.cpp


diff --git a/test/cpp/aot_inductor/aoti_custom_class.h b/test/cpp/aoti_inference/aoti_custom_class.h
similarity index 100%
rename from test/cpp/aot_inductor/aoti_custom_class.h
rename to test/cpp/aoti_inference/aoti_custom_class.h


diff --git a/test/cpp/aot_inductor/compile_model.py b/test/cpp/aoti_inference/compile_model.py
similarity index 100%
rename from test/cpp/aot_inductor/compile_model.py
rename to test/cpp/aoti_inference/compile_model.py


diff --git a/test/cpp/aot_inductor/test.cpp b/test/cpp/aoti_inference/test.cpp
similarity index 99%
rename from test/cpp/aot_inductor/test.cpp
rename to test/cpp/aoti_inference/test.cpp
index bfb5e41..fde2a37 100644
--- a/test/cpp/aot_inductor/test.cpp
+++ b/test/cpp/aoti_inference/test.cpp

@@ -283,7 +283,7 @@
 } // namespace
 
 namespace torch {
-namespace inductor {
+namespace aot_inductor {
 
 TEST(AotInductorTest, BasicTestCpu) {
   test_aoti("cpu", false);
@@ -324,5 +324,5 @@
 }
 #endif
 
-} // namespace inductor
+} // namespace aot_inductor
 } // namespace torch

diff --git a/test/cpp/aot_inductor/test.py b/test/cpp/aoti_inference/test.py
similarity index 100%
rename from test/cpp/aot_inductor/test.py
rename to test/cpp/aoti_inference/test.py


diff --git a/test/cpp/common/main.cpp b/test/cpp/common/main.cpp
index 632aa8e..7feb2a8 100644
--- a/test/cpp/common/main.cpp
+++ b/test/cpp/common/main.cpp

@@ -18,6 +18,7 @@
 
 int main(int argc, char* argv[]) {
   ::testing::InitGoogleTest(&argc, argv);
+
   if (!torch::cuda::is_available()) {
     std::cout << "CUDA not available. Disabling CUDA and MultiCUDA tests"
               << std::endl;

diff --git a/torch/_inductor/codegen/cpp_prefix.h b/torch/_inductor/codegen/cpp_prefix.h
index 5afb619..a05a9e2 100644
--- a/torch/_inductor/codegen/cpp_prefix.h
+++ b/torch/_inductor/codegen/cpp_prefix.h

@@ -7,6 +7,12 @@
 #include <limits>
 #include <omp.h>
 
+// WARNING: be extra careful when including more ATen/c10 header files here!
+// Because AOTInductor generated code will copy-paste this cpp_prefix.h for
+// the CPU backend, we have to make sure the used headers are implemented
+// in a header-only way, i.e. all the function and class definitions are
+// in .h files instead of .cpp files, to avoid ABI backward-compatiblity breakage.
+
 #include <ATen/NumericUtils.h>
 #include <ATen/core/PhiloxRNGEngine.h>
 #include <ATen/native/Math.h>
commit	4946638f06e5916ea9bd0f790ff620bdb78a92a3	[log] [tgz]
author	Bin Bao <binbao@meta.com>	Thu Apr 11 19:55:18 2024 -0700
committer	PyTorch MergeBot <pytorchmergebot@users.noreply.github.com>	Fri Apr 19 00:51:24 2024 +0000
tree	1eb44b6f254a0795d77de1c8ea9e28f497bea351
parent	cbefaf2a37b5f0659395fea3e1301a4e36a40013 [diff]