caffe2/CMakeLists.txt - platform/external/pytorch - Git at Google

 # ---[ Generate and install header and cpp files
 include(../cmake/Codegen.cmake)

 # ---[ Vulkan code gen
 if(USE_VULKAN)
   include(../cmake/VulkanCodegen.cmake)
 endif()

 # ---[ MSVC OpenMP modification
 if(MSVC)
   include(../cmake/public/utils.cmake)
 endif()

 # Debug messages - if you want to get a list of source files and examine
 # target information, enable the following by -DPRINT_CMAKE_DEBUG_INFO=ON.
 set(PRINT_CMAKE_DEBUG_INFO FALSE CACHE BOOL "print cmake debug information")
 if(PRINT_CMAKE_DEBUG_INFO)
   include(../cmake/DebugHelper.cmake)
 endif()

 # ATen parallelism settings
 #  OMP - OpenMP for intra-op, native thread pool for inter-op parallelism
 #  NATIVE - using native thread pool for intra- and inter-op parallelism
 #  TBB - using TBB for intra- and native thread pool for inter-op parallelism
 if(INTERN_BUILD_MOBILE)
   set(ATEN_THREADING "NATIVE" CACHE STRING "ATen parallel backend")
 else()
   if(USE_OPENMP)
     set(ATEN_THREADING "OMP" CACHE STRING "ATen parallel backend")
   elseif(USE_TBB)
     set(ATEN_THREADING "TBB" CACHE STRING "ATen parallel backend")
   else()
     set(ATEN_THREADING "NATIVE" CACHE STRING "ATen parallel backend")
   endif()
 endif()

 set(AT_PARALLEL_OPENMP 0)
 set(AT_PARALLEL_NATIVE 0)
 set(AT_PARALLEL_NATIVE_TBB 0)

 message(STATUS "Using ATen parallel backend: ${ATEN_THREADING}")
 if("${ATEN_THREADING}" STREQUAL "OMP")
   set(AT_PARALLEL_OPENMP 1)
 elseif("${ATEN_THREADING}" STREQUAL "NATIVE")
   set(AT_PARALLEL_NATIVE 1)
 elseif("${ATEN_THREADING}" STREQUAL "TBB")
   if(NOT USE_TBB)
     message(FATAL_ERROR "Using TBB backend but USE_TBB is off")
   endif()
   message(WARNING "ATEN TBB Threading is deprectated.")
   set(AT_PARALLEL_NATIVE_TBB 1)
 else()
   message(FATAL_ERROR "Unknown ATen parallel backend: ${ATEN_THREADING}")
 endif()

 # ---[ Declare source file lists

 # ---[ ATen build
 if(INTERN_BUILD_ATEN_OPS)
   set(__caffe2_CMAKE_POSITION_INDEPENDENT_CODE ${CMAKE_POSITION_INDEPENDENT_CODE})
   set(CMAKE_POSITION_INDEPENDENT_CODE ON)
   add_subdirectory(../aten aten)
   set(CMAKE_POSITION_INDEPENDENT_CODE ${__caffe2_CMAKE_POSITION_INDEPENDENT_CODE})

   # Generate the headers wrapped by our operator
   file(GLOB_RECURSE torchgen_python "${PROJECT_SOURCE_DIR}/torchgen/*.py")
   add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/contrib/aten/aten_op.h
   COMMAND
   "${PYTHON_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/contrib/aten/gen_op.py
     --aten_root=${CMAKE_CURRENT_SOURCE_DIR}/../aten
     --template_dir=${CMAKE_CURRENT_SOURCE_DIR}/contrib/aten
     --yaml_dir=${CMAKE_CURRENT_BINARY_DIR}/../aten/src/ATen
     --install_dir=${CMAKE_CURRENT_BINARY_DIR}/contrib/aten
   DEPENDS
   ${torchgen_python}
   ${CMAKE_BINARY_DIR}/aten/src/ATen/Declarations.yaml
   ${CMAKE_CURRENT_SOURCE_DIR}/contrib/aten/gen_op.py
   ${CMAKE_CURRENT_SOURCE_DIR}/contrib/aten/aten_op_template.h)

   add_custom_target(__aten_op_header_gen
     DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/contrib/aten/aten_op.h)
   add_library(aten_op_header_gen INTERFACE)
   add_dependencies(aten_op_header_gen __aten_op_header_gen)

   # Add source, includes, and libs to lists
   list(APPEND Caffe2_CPU_SRCS ${ATen_CPU_SRCS})
   list(APPEND Caffe2_GPU_SRCS ${ATen_CUDA_CPP_SRCS})
   list(APPEND Caffe2_GPU_SRCS_W_SORT_BY_KEY ${ATen_CUDA_SRCS_W_SORT_BY_KEY})
   list(APPEND Caffe2_GPU_CU_SRCS ${ATen_CUDA_CU_SRCS})
   list(APPEND Caffe2_GPU_CU_SRCS_W_SORT_BY_KEY ${ATen_CUDA_CU_SRCS_W_SORT_BY_KEY})
   list(APPEND Caffe2_HIP_SRCS ${ATen_HIP_SRCS})
   list(APPEND Caffe2_MPS_SRCS ${ATen_MPS_SRCS})
   list(APPEND Caffe2_HIP_SRCS ${ATen_HIP_SRCS_W_SORT_BY_KEY})
   list(APPEND Caffe2_CPU_TEST_SRCS ${ATen_CPU_TEST_SRCS})
   list(APPEND Caffe2_MPS_TEST_SRCS ${ATen_MPS_TEST_SRCS})
   list(APPEND Caffe2_GPU_TEST_SRCS ${ATen_CUDA_TEST_SRCS})
   list(APPEND Caffe2_HIP_TEST_SRCS ${ATen_HIP_TEST_SRCS})
   list(APPEND Caffe2_CPU_TEST_SRCS ${ATen_CORE_TEST_SRCS})
   list(APPEND Caffe2_VULKAN_TEST_SRCS ${ATen_VULKAN_TEST_SRCS})
   list(APPEND Caffe2_CPU_INCLUDE ${ATen_CPU_INCLUDE})
   list(APPEND Caffe2_GPU_INCLUDE ${ATen_CUDA_INCLUDE})
   list(APPEND Caffe2_HIP_INCLUDE ${ATen_HIP_INCLUDE})
   list(APPEND Caffe2_VULKAN_INCLUDE ${ATen_VULKAN_INCLUDE})
   list(APPEND Caffe2_DEPENDENCY_LIBS ${ATen_CPU_DEPENDENCY_LIBS})
   list(APPEND Caffe2_CUDA_DEPENDENCY_LIBS ${ATen_CUDA_DEPENDENCY_LIBS})
   list(APPEND Caffe2_HIP_DEPENDENCY_LIBS ${ATen_HIP_DEPENDENCY_LIBS})
   list(APPEND Caffe2_DEPENDENCY_INCLUDE ${ATen_THIRD_PARTY_INCLUDE})
 endif()

 # ---[ Caffe2 build
 # Note: the folders that are being commented out have not been properly
 # addressed yet.

 if(NOT MSVC AND USE_XNNPACK)
   if(NOT TARGET fxdiv)
     set(FXDIV_BUILD_TESTS OFF CACHE BOOL "")
     set(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "")
     add_subdirectory(
       "${FXDIV_SOURCE_DIR}"
       "${CMAKE_BINARY_DIR}/FXdiv")
   endif()
 endif()

 add_subdirectory(core)
 add_subdirectory(serialize)
 add_subdirectory(utils)
 if(BUILD_CAFFE2 OR (NOT USE_FBGEMM))
   add_subdirectory(perfkernels)
 endif()

 # Skip modules that are not used by libtorch mobile yet.
 if(BUILD_CAFFE2 AND NOT INTERN_BUILD_MOBILE)
   add_subdirectory(contrib)
   add_subdirectory(predictor)
   add_subdirectory(predictor/emulator)
   add_subdirectory(core/nomnigraph)
   if(USE_NVRTC)
     add_subdirectory(cuda_rtc)
   endif()
   add_subdirectory(db)
   add_subdirectory(distributed)
   # add_subdirectory(experiments) # note, we may remove this folder at some point
   add_subdirectory(ideep)
   add_subdirectory(image)
   add_subdirectory(video)
   add_subdirectory(mobile)
   add_subdirectory(mpi)
   add_subdirectory(observers)
   add_subdirectory(onnx)
   if(BUILD_CAFFE2_OPS)
     add_subdirectory(operators)
     add_subdirectory(operators/rnn)
     if(USE_FBGEMM)
       add_subdirectory(quantization/server)
     endif()
     if(USE_QNNPACK)
       add_subdirectory(operators/quantized)
     endif()
   endif()
   add_subdirectory(opt)
   add_subdirectory(proto)
   add_subdirectory(python)
   add_subdirectory(queue)
   add_subdirectory(sgd)
   add_subdirectory(share)
   # add_subdirectory(test) # todo: use caffe2_gtest_main instead of gtest_main because we will need to call GlobalInit
   add_subdirectory(transforms)
 endif()
 if(NOT BUILD_CAFFE2 AND NOT INTERN_BUILD_MOBILE)
   add_subdirectory(proto)
 endif()

 # Advanced: if we have allow list specified, we will do intersections for all
 # main lib srcs.
 if(CAFFE2_ALLOWLISTED_FILES)
   caffe2_do_allowlist(Caffe2_CPU_SRCS CAFFE2_ALLOWLISTED_FILES)
   caffe2_do_allowlist(Caffe2_GPU_SRCS CAFFE2_ALLOWLISTED_FILES)
   caffe2_do_allowlist(Caffe2_GPU_SRCS_W_SORT_BY_KEY CAFFE2_ALLOWLISTED_FILES)
   caffe2_do_allowlist(Caffe2_GPU_CU_SRCS CAFFE2_ALLOWLISTED_FILES)
   caffe2_do_allowlist(Caffe2_GPU_CU_SRCS_W_SORT_BY_KEY CAFFE2_ALLOWLISTED_FILES)
   caffe2_do_allowlist(Caffe2_HIP_SRCS CAFFE2_ALLOWLISTED_FILES)
 endif()

 if(PRINT_CMAKE_DEBUG_INFO)
   message(STATUS "CPU sources: ")
   foreach(tmp ${Caffe2_CPU_SRCS})
     message(STATUS "  " ${tmp})
   endforeach()

   message(STATUS "GPU sources: (for torch_cuda_cpp)")
   foreach(tmp ${Caffe2_GPU_SRCS})
     message(STATUS "  " ${tmp})
   endforeach()

   message(STATUS "GPU sources: (for torch_cuda_cu)")
   foreach(tmp ${Caffe2_GPU_CU_SRCS})
     message(STATUS "  " ${tmp})
   endforeach()

   message(STATUS "torch_cuda_cu GPU sources (w/ sort by key): ")
   foreach(tmp ${Caffe2_GPU_CU_SRCS_W_SORT_BY_KEY})
     message(STATUS "  " ${tmp})
   endforeach()

   message(STATUS "torch_cuda_cpp GPU sources (w/ sort by key): ")
   foreach(tmp ${Caffe2_GPU_SRCS_W_SORT_BY_KEY})
     message(STATUS "  " ${tmp})
   endforeach()

   message(STATUS "CPU include: ")
   foreach(tmp ${Caffe2_CPU_INCLUDE})
     message(STATUS "  " ${tmp})
   endforeach()

   message(STATUS "GPU include: ")
   foreach(tmp ${Caffe2_GPU_INCLUDE})
     message(STATUS "  " ${tmp})
   endforeach()

   message(STATUS "CPU test sources: ")
   foreach(tmp ${Caffe2_CPU_TEST_SRCS})
     message(STATUS "  " ${tmp})
   endforeach()

   message(STATUS "GPU test sources: ")
   foreach(tmp ${Caffe2_GPU_TEST_SRCS})
     message(STATUS "  " ${tmp})
   endforeach()

   message(STATUS "HIP sources: ")
   foreach(tmp ${Caffe2_HIP_SRCS})
     message(STATUS "  " ${tmp})
   endforeach()

   message(STATUS "MPS sources: ")
   foreach(tmp ${Caffe2_MPS_SRCS})
     message(STATUS "  " ${tmp})
   endforeach()

   message(STATUS "HIP test sources: ")
   foreach(tmp ${Caffe2_HIP_TEST_SRCS})
     message(STATUS "  " ${tmp})
   endforeach()

   message(STATUS "ATen CPU test sources: ")
   foreach(tmp ${ATen_CPU_TEST_SRCS})
     message(STATUS "  " ${tmp})
   endforeach()

   message(STATUS "ATen MPS test sources: ")
   foreach(tmp ${ATen_MPS_TEST_SRCS})
     message(STATUS "  " ${tmp})
   endforeach()

   message(STATUS "ATen CUDA test sources: ")
   foreach(tmp ${ATen_CUDA_TEST_SRCS})
     message(STATUS "  " ${tmp})
   endforeach()

   message(STATUS "ATen HIP test sources: ")
   foreach(tmp ${ATen_HIP_TEST_SRCS})
     message(STATUS "  " ${tmp})
   endforeach()

   message(STATUS "ATen Vulkan test sources: ")
   foreach(tmp ${ATen_VULKAN_TEST_SRCS})
     message(STATUS "  " ${tmp})
   endforeach()

 endif()

 if(NOT INTERN_BUILD_MOBILE)
   # ---[ List of libraries to link with
   add_library(caffe2_protos STATIC $<TARGET_OBJECTS:Caffe2_PROTO>)
   add_dependencies(caffe2_protos Caffe2_PROTO)
   # If we are going to link protobuf locally inside caffe2 libraries, what we will do is
   # to create a helper static library that always contains libprotobuf source files, and
   # link the caffe2 related dependent libraries to it.
   target_include_directories(caffe2_protos INTERFACE $<INSTALL_INTERFACE:include>)
   # Reason for this public dependency is as follows:
   # (1) Strictly speaking, we should not expose any Protobuf related functions. We should
   #     only use function interfaces wrapped with our own public API, and link protobuf
   #     locally.
   # (2) However, currently across the Caffe2 codebase, we have extensive use of protobuf
   #     functionalities. For example, not only libcaffe2.so uses it, but also other
   #     binaries such as python extensions etc. As a result, we will have to have a
   #     transitive dependency to libprotobuf.
   #
   # Good thing is that, if we specify CAFFE2_LINK_LOCAL_PROTOBUF, then we do not need to
   # separately deploy protobuf binaries - libcaffe2.so will contain all functionalities
   # one needs. One can verify this via ldd.
   #
   # TODO item in the future includes:
   # (1) Enable using lite protobuf
   # (2) Properly define public API that do not directly depend on protobuf itself.
   # (3) Expose the libprotobuf.a file for dependent libraries to link to.
   #
   # What it means for users/developers?
   # (1) Users: nothing affecting the users, other than the fact that CAFFE2_LINK_LOCAL_PROTOBUF
   #     avoids the need to deploy protobuf.
   # (2) Developers: if one simply uses core caffe2 functionality without using protobuf,
   #     nothing changes. If one has a dependent library that uses protobuf, then one needs to
   #     have the right protobuf version as well as linking to libprotobuf.a.
   target_link_libraries(caffe2_protos PUBLIC protobuf::libprotobuf)
   if(NOT BUILD_SHARED_LIBS)
     install(TARGETS caffe2_protos ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}")
   endif()
 endif()

 # ==========================================================
 # formerly-libtorch
 # ==========================================================

 set(TORCH_SRC_DIR "${PROJECT_SOURCE_DIR}/torch")
 set(TORCH_ROOT "${PROJECT_SOURCE_DIR}")

 if(NOT TORCH_INSTALL_BIN_DIR)
   set(TORCH_INSTALL_BIN_DIR bin)
 endif()

 if(NOT TORCH_INSTALL_INCLUDE_DIR)
   set(TORCH_INSTALL_INCLUDE_DIR include)
 endif()

 if(NOT TORCH_INSTALL_LIB_DIR)
   set(TORCH_INSTALL_LIB_DIR lib)
 endif()

 set(CMAKE_POSITION_INDEPENDENT_CODE TRUE)

 # Generate files
 set(TOOLS_PATH "${TORCH_ROOT}/tools")

 configure_file("${TORCH_SRC_DIR}/_utils_internal.py"
   "${TOOLS_PATH}/shared/_utils_internal.py"
   COPYONLY)

 # Generate header with version info
 configure_file("${TORCH_SRC_DIR}/csrc/api/include/torch/version.h.in"
   "${TORCH_SRC_DIR}/csrc/api/include/torch/version.h"
   @ONLY)

 set(GENERATED_CXX_TORCH
   "${TORCH_SRC_DIR}/csrc/autograd/generated/Functions.cpp"
   )

 if(NOT INTERN_DISABLE_AUTOGRAD AND NOT BUILD_LITE_INTERPRETER)
   list(APPEND GENERATED_CXX_TORCH
     "${TORCH_SRC_DIR}/csrc/autograd/generated/VariableType_0.cpp"
     "${TORCH_SRC_DIR}/csrc/autograd/generated/VariableType_1.cpp"
     "${TORCH_SRC_DIR}/csrc/autograd/generated/VariableType_2.cpp"
     "${TORCH_SRC_DIR}/csrc/autograd/generated/VariableType_3.cpp"
     "${TORCH_SRC_DIR}/csrc/autograd/generated/VariableType_4.cpp"
     "${TORCH_SRC_DIR}/csrc/autograd/generated/TraceType_0.cpp"
     "${TORCH_SRC_DIR}/csrc/autograd/generated/TraceType_1.cpp"
     "${TORCH_SRC_DIR}/csrc/autograd/generated/TraceType_2.cpp"
     "${TORCH_SRC_DIR}/csrc/autograd/generated/TraceType_3.cpp"
     "${TORCH_SRC_DIR}/csrc/autograd/generated/TraceType_4.cpp"
     "${TORCH_SRC_DIR}/csrc/autograd/generated/ADInplaceOrViewType_0.cpp"
     "${TORCH_SRC_DIR}/csrc/autograd/generated/ADInplaceOrViewType_1.cpp"
   )
   if(BUILD_LAZY_TS_BACKEND)
     list(APPEND GENERATED_CXX_TORCH
       "${TORCH_SRC_DIR}/csrc/lazy/generated/LazyNativeFunctions.cpp"
       "${TORCH_SRC_DIR}/csrc/lazy/generated/RegisterAutogradLazy.cpp"
       "${TORCH_SRC_DIR}/csrc/lazy/generated/RegisterLazy.cpp"
     )
   endif()
 endif()

 set(GENERATED_H_TORCH
   "${TORCH_SRC_DIR}/csrc/autograd/generated/Functions.h"
   "${TORCH_SRC_DIR}/csrc/autograd/generated/variable_factories.h"
   )

 if(NOT INTERN_DISABLE_AUTOGRAD)
   list(APPEND GENERATED_H_TORCH
     "${TORCH_SRC_DIR}/csrc/autograd/generated/VariableType.h"
     "${TORCH_SRC_DIR}/csrc/lazy/generated/LazyIr.h"
     "${TORCH_SRC_DIR}/csrc/lazy/generated/LazyNonNativeIr.h"
     "${TORCH_SRC_DIR}/csrc/lazy/generated/LazyNativeFunctions.h"
   )
 endif()

 set(GENERATED_CXX_PYTHON
   "${TORCH_SRC_DIR}/csrc/autograd/generated/python_functions_0.cpp"
   "${TORCH_SRC_DIR}/csrc/autograd/generated/python_functions_1.cpp"
   "${TORCH_SRC_DIR}/csrc/autograd/generated/python_functions_2.cpp"
   "${TORCH_SRC_DIR}/csrc/autograd/generated/python_functions_3.cpp"
   "${TORCH_SRC_DIR}/csrc/autograd/generated/python_functions_4.cpp"
   "${TORCH_SRC_DIR}/csrc/autograd/generated/python_variable_methods.cpp"
   "${TORCH_SRC_DIR}/csrc/autograd/generated/python_torch_functions_0.cpp"
   "${TORCH_SRC_DIR}/csrc/autograd/generated/python_torch_functions_1.cpp"
   "${TORCH_SRC_DIR}/csrc/autograd/generated/python_torch_functions_2.cpp"
   "${TORCH_SRC_DIR}/csrc/autograd/generated/python_nn_functions.cpp"
   "${TORCH_SRC_DIR}/csrc/autograd/generated/python_fft_functions.cpp"
   "${TORCH_SRC_DIR}/csrc/autograd/generated/python_linalg_functions.cpp"
   "${TORCH_SRC_DIR}/csrc/autograd/generated/python_nested_functions.cpp"
   "${TORCH_SRC_DIR}/csrc/autograd/generated/python_sparse_functions.cpp"
   "${TORCH_SRC_DIR}/csrc/autograd/generated/python_special_functions.cpp"
   "${TORCH_SRC_DIR}/csrc/autograd/generated/python_return_types.cpp"
   "${TORCH_SRC_DIR}/csrc/autograd/generated/python_enum_tag.cpp"
   )

 set(GENERATED_H_PYTHON
   "${TORCH_SRC_DIR}/csrc/autograd/generated/python_functions.h"
   )

 set(GENERATED_TESTING_PYTHON
   "${TORCH_SRC_DIR}/testing/_internal/generated/annotated_fn_args.py"
   )

 set(TORCH_GENERATED_CODE
   ${GENERATED_CXX_TORCH}
   ${GENERATED_H_TORCH}
   ${GENERATED_CXX_PYTHON}
   ${GENERATED_H_PYTHON}
   ${GENERATED_TESTING_PYTHON}
   )

 set(GEN_PER_OPERATOR_FLAG)
 if(USE_PER_OPERATOR_HEADERS)
   list(APPEND GEN_PER_OPERATOR_FLAG "--per_operator_headers")
 endif()

 file(GLOB_RECURSE autograd_python "${TOOLS_PATH}/autograd/*.py")
 file(GLOB_RECURSE autograd_yaml "${TOOLS_PATH}/autograd/*.yaml")
 file(GLOB_RECURSE autograd_templates "${TOOLS_PATH}/autograd/templates/*")
 add_custom_command(
   OUTPUT
   ${TORCH_GENERATED_CODE}
   COMMAND
   "${PYTHON_EXECUTABLE}" tools/setup_helpers/generate_code.py
     --native-functions-path "aten/src/ATen/native/native_functions.yaml"
     --tags-path "aten/src/ATen/native/tags.yaml"
     $<$<BOOL:${INTERN_DISABLE_AUTOGRAD}>:--disable-autograd>
     $<$<BOOL:${SELECTED_OP_LIST}>:--selected-op-list-path="${SELECTED_OP_LIST}">
     --force_schema_registration
     --gen_lazy_ts_backend
     ${GEN_PER_OPERATOR_FLAG}
   DEPENDS
     "${TORCH_ROOT}/aten/src/ATen/native/native_functions.yaml"
     "${TORCH_ROOT}/aten/src/ATen/native/tags.yaml"
     "${TORCH_ROOT}/aten/src/ATen/native/ts_native_functions.yaml"
     "${TORCH_ROOT}/torch/csrc/lazy/core/shape_inference.h"
     "${TORCH_ROOT}/torch/csrc/lazy/ts_backend/ts_native_functions.cpp"
     "${TORCH_ROOT}/aten/src/ATen/templates/DispatchKeyNativeFunctions.h"
     "${TORCH_ROOT}/aten/src/ATen/templates/DispatchKeyNativeFunctions.cpp"
     "${TORCH_ROOT}/aten/src/ATen/templates/LazyIr.h"
     "${TORCH_ROOT}/aten/src/ATen/templates/LazyNonNativeIr.h"
     "${TORCH_ROOT}/aten/src/ATen/templates/RegisterDispatchKey.cpp"
     ${autograd_python}
     ${autograd_yaml}
     ${autograd_templates}
     ${torchgen_python}
   WORKING_DIRECTORY "${TORCH_ROOT}")


 # Required workaround for libtorch_python.so build
 # see https://samthursfield.wordpress.com/2015/11/21/cmake-dependencies-between-targets-and-files-and-custom-commands/#custom-commands-in-different-directories
 add_custom_target(
   generate-torch-sources
   DEPENDS ${TORCH_GENERATED_CODE}
   )

 set(TORCH_SRCS ${GENERATED_CXX_TORCH})
 list(APPEND TORCH_SRCS ${GENERATED_H_TORCH})
 list(APPEND LIBTORCH_CMAKE_SRCS "")

 list(APPEND LITE_EAGER_SYMOBLICATION_SRCS "")
 if(USE_SOURCE_DEBUG_ON_MOBILE)
   append_filelist("libtorch_lite_eager_symbolication" LITE_EAGER_SYMOBLICATION_SRCS)
   # For source debug on lite interpreter, we have to add dependency on pickling
   # but references to read/writeArchiveAndTensor is not built for mobile
   # so this condition specifically says we are building for source debug
   # on mobile.
   if(BUILD_LITE_INTERPRETER)
     set_source_files_properties(${TORCH_SRC_DIR}/csrc/jit/serialization/pickle.cpp PROPERTIES COMPILE_FLAGS "-DC10_MOBILE -DFEATURE_TORCH_MOBILE")
   endif()
 endif()

 list(APPEND LITE_PROFILER_SRCS "")
 if(USE_LITE_INTERPRETER_PROFILER)
   append_filelist("libtorch_edge_profiler_sources " LITE_PROFILER_SRCS)
 endif()

 # Switch between the full jit interpreter and lite interpreter
 if(BUILD_LITE_INTERPRETER)
   append_filelist("libtorch_lite_cmake_sources" LIBTORCH_CMAKE_SRCS)
   list(APPEND LIBTORCH_CMAKE_SRCS ${LITE_EAGER_SYMOBLICATION_SRCS})
   list(APPEND LIBTORCH_CMAKE_SRCS ${LITE_PROFILER_SRCS})
   set(CMAKE_POSITION_INDEPENDENT_CODE TRUE)
 else()
   append_filelist("libtorch_cmake_sources" LIBTORCH_CMAKE_SRCS)
   if(BUILD_LAZY_TS_BACKEND)
     append_filelist("lazy_tensor_ts_sources" LIBTORCH_CMAKE_SRCS)
   endif()
   if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
     # TODO: Delete this line once https://github.com/pytorch/pytorch/pull/55889 lands
     set_source_files_properties(../torch/csrc/jit/serialization/export.cpp PROPERTIES COMPILE_FLAGS -Wno-deprecated-declarations)

     # TODO: Delete this when https://github.com/pytorch/pytorch/issues/35026 is fixed
     set_source_files_properties(../torch/csrc/autograd/record_function_ops.cpp PROPERTIES COMPILE_FLAGS -Wno-deprecated-declarations)
   endif()
 endif()
 list(APPEND TORCH_SRCS ${LIBTORCH_CMAKE_SRCS})

 if(PRINT_CMAKE_DEBUG_INFO)
   message(STATUS "Interpreter sources: ")
   foreach(tmp ${LIBTORCH_CMAKE_SRCS})
     message(STATUS "  " ${tmp})
   endforeach()
 endif()

 # Mobile backend delegate srcs
 if(INTERN_BUILD_MOBILE)
   set(DELEGATE_SRCS
     ${TORCH_SRC_DIR}/csrc/jit/backends/backend_debug_info.cpp
     ${TORCH_SRC_DIR}/csrc/jit/backends/backend_interface.cpp
   )
   list(APPEND TORCH_SRCS ${DELEGATE_SRCS})
   if(IOS AND USE_COREML_DELEGATE)
     set(COREML_DELEGATE_SRCS
       ${TORCH_SRC_DIR}/csrc/jit/backends/coreml/cpp/context.cpp
       ${TORCH_SRC_DIR}/csrc/jit/backends/coreml/objc/PTMCoreMLBackend.mm
       ${TORCH_SRC_DIR}/csrc/jit/backends/coreml/objc/PTMCoreMLExecutor.mm
       ${TORCH_SRC_DIR}/csrc/jit/backends/coreml/objc/PTMCoreMLCompiler.mm
       ${TORCH_SRC_DIR}/csrc/jit/backends/coreml/objc/PTMCoreMLFeatureProvider.mm
     )
     set_source_files_properties(${TORCH_SRC_DIR}/csrc/jit/backends/coreml/objc/PTMCoreMLBackend.mm PROPERTIES COMPILE_FLAGS "-fno-objc-arc")
     include_directories(${TORCH_ROOT}/third_party/nlohmann/single_include)
     list(APPEND TORCH_SRCS ${COREML_DELEGATE_SRCS})
   endif()
 endif()

 # Required workaround for LLVM 9 includes.
 if(NOT MSVC)
   set_source_files_properties(${TORCH_SRC_DIR}/csrc/jit/tensorexpr/llvm_jit.cpp PROPERTIES COMPILE_FLAGS -Wno-noexcept-type)
   # Force -Werror on several files
   set_source_files_properties(${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/mkldnn/Pooling.cpp PROPERTIES COMPILE_FLAGS "-Werror")
 endif()
 # Disable certain warnings for GCC-9.X
 if(CMAKE_COMPILER_IS_GNUCXX AND (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 9.0.0))
   # See https://github.com/pytorch/pytorch/issues/38856
   set_source_files_properties(${TORCH_SRC_DIR}/csrc/jit/tensorexpr/llvm_jit.cpp PROPERTIES COMPILE_FLAGS "-Wno-redundant-move -Wno-noexcept-type")
   set_source_files_properties(${TORCH_SRC_DIR}/csrc/jit/tensorexpr/llvm_codegen.cpp PROPERTIES COMPILE_FLAGS "-Wno-init-list-lifetime")
 endif()

 if(NOT INTERN_DISABLE_MOBILE_INTERP)
   set(MOBILE_SRCS
      ${TORCH_SRC_DIR}/csrc/jit/mobile/function.cpp
      ${TORCH_SRC_DIR}/csrc/jit/mobile/import.cpp
      ${TORCH_SRC_DIR}/csrc/jit/mobile/import_data.cpp
      ${TORCH_SRC_DIR}/csrc/jit/mobile/interpreter.cpp
      ${TORCH_SRC_DIR}/csrc/jit/mobile/compatibility/model_compatibility.cpp
      ${TORCH_SRC_DIR}/csrc/jit/mobile/module.cpp
      ${TORCH_SRC_DIR}/csrc/jit/mobile/flatbuffer_loader.cpp
      ${TORCH_SRC_DIR}/csrc/jit/mobile/observer.cpp
      ${TORCH_SRC_DIR}/csrc/jit/mobile/parse_bytecode.cpp
      ${TORCH_SRC_DIR}/csrc/jit/mobile/parse_operators.cpp
      ${TORCH_SRC_DIR}/csrc/jit/mobile/quantization.cpp
      ${TORCH_SRC_DIR}/csrc/jit/mobile/train/export_data.cpp
      ${TORCH_SRC_DIR}/csrc/jit/mobile/train/optim/sgd.cpp
      ${TORCH_SRC_DIR}/csrc/jit/mobile/train/random.cpp
      ${TORCH_SRC_DIR}/csrc/jit/mobile/train/sequential.cpp
      ${TORCH_SRC_DIR}/csrc/jit/mobile/upgrader_mobile.cpp
      )
   list(APPEND TORCH_SRCS ${MOBILE_SRCS})
   list(APPEND TORCH_SRCS ${LITE_EAGER_SYMOBLICATION_SRCS})
 endif()

 # This one needs to be unconditionally added as Functions.cpp is also unconditionally added
 list(APPEND TORCH_SRCS
   ${TORCH_SRC_DIR}/csrc/autograd/FunctionsManual.cpp
   ${TORCH_SRC_DIR}/csrc/utils/out_types.cpp
 )

 if(NOT INTERN_DISABLE_AUTOGRAD AND NOT BUILD_LITE_INTERPRETER)
   list(APPEND TORCH_SRCS
     ${TORCH_SRC_DIR}/csrc/autograd/TraceTypeManual.cpp
     ${TORCH_SRC_DIR}/csrc/autograd/VariableTypeManual.cpp
   )
 endif()

 if(${USE_ITT})
   list(APPEND TORCH_SRCS
     ${TORCH_SRC_DIR}/csrc/itt_wrapper.cpp
     ${TORCH_SRC_DIR}/csrc/profiler/stubs/itt.cpp
   )
 endif()

 if(NOT INTERN_BUILD_MOBILE AND NOT BUILD_LITE_INTERPRETER)
   list(APPEND TORCH_SRCS
     ${TORCH_SRC_DIR}/csrc/api/src/jit.cpp
     ${TORCH_SRC_DIR}/csrc/jit/mobile/compatibility/backport.cpp
     ${TORCH_SRC_DIR}/csrc/jit/mobile/compatibility/backport_manager.cpp
     ${TORCH_SRC_DIR}/csrc/jit/serialization/onnx.cpp
     ${TORCH_SRC_DIR}/csrc/jit/serialization/export.cpp
     ${TORCH_SRC_DIR}/csrc/jit/serialization/export_bytecode.cpp
     ${TORCH_SRC_DIR}/csrc/jit/serialization/export_module.cpp
     ${TORCH_SRC_DIR}/csrc/jit/serialization/flatbuffer_serializer.cpp
     ${TORCH_SRC_DIR}/csrc/jit/serialization/flatbuffer_serializer_jit.cpp
     ${TORCH_SRC_DIR}/csrc/jit/codegen/fuser/cpu/fused_kernel.cpp
     ${TORCH_SRC_DIR}/csrc/jit/api/module_save.cpp
     ${TORCH_SRC_DIR}/csrc/utils/byte_order.cpp
   )

   # Disable legacy import of building without Caffe2 support
   if(BUILD_CAFFE2)
     list(APPEND TORCH_SRCS
       ${TORCH_SRC_DIR}/csrc/jit/serialization/import_legacy.cpp
     )
   else()
     set_source_files_properties(
       ${TORCH_SRC_DIR}/csrc/jit/serialization/import.cpp
       PROPERTIES COMPILE_FLAGS "-DC10_DISABLE_LEGACY_IMPORT"
     )
   endif()
   if(USE_DISTRIBUTED)
     append_filelist("libtorch_distributed_base_sources" TORCH_SRCS)
     if(NOT WIN32)
       append_filelist("libtorch_distributed_extra_sources" TORCH_SRCS)
     endif()
   endif()
 endif()

 if(USE_CUDA OR USE_ROCM)
   append_filelist("libtorch_cuda_core_sources" Caffe2_GPU_HIP_JIT_FUSERS_SRCS)
 endif()

 if(USE_CUDA)
   list(APPEND Caffe2_GPU_CU_SRCS ${Caffe2_GPU_HIP_JIT_FUSERS_SRCS})
   add_library(caffe2_nvrtc SHARED ${ATen_NVRTC_STUB_SRCS})
   if(MSVC)
     # Delay load nvcuda.dll so we can import torch compiled with cuda on a CPU-only machine
     set(DELAY_LOAD_FLAGS "-DELAYLOAD:nvcuda.dll;delayimp.lib")
   else()
     set(DELAY_LOAD_FLAGS "")
   endif()

   target_link_libraries(caffe2_nvrtc ${CUDA_NVRTC} ${CUDA_CUDA_LIB} ${CUDA_NVRTC_LIB} ${DELAY_LOAD_FLAGS})
   target_include_directories(caffe2_nvrtc PRIVATE ${CUDA_INCLUDE_DIRS})
   install(TARGETS caffe2_nvrtc DESTINATION "${TORCH_INSTALL_LIB_DIR}")
   if(USE_NCCL)
     list(APPEND Caffe2_GPU_SRCS
       ${TORCH_SRC_DIR}/csrc/cuda/nccl.cpp)
   endif()
   if(USE_DISTRIBUTED)
     append_filelist("libtorch_cuda_distributed_base_sources" Caffe2_GPU_SRCS)
     if(NOT WIN32)
       append_filelist("libtorch_cuda_distributed_extra_sources" Caffe2_GPU_SRCS)
     endif()
   endif()
   set_source_files_properties(
     ${TORCH_ROOT}/aten/src/ATen/cuda/detail/LazyNVRTC.cpp
     PROPERTIES COMPILE_DEFINITIONS "NVRTC_SHORTHASH=${CUDA_NVRTC_SHORTHASH}"
   )
   set_source_files_properties(${TORCH_SRC_DIR}/csrc/jit/passes/frozen_conv_add_relu_fusion.cpp PROPERTIES COMPILE_FLAGS "-DUSE_CUDA=1")
 endif()

 if(BUILD_ONEDNN_GRAPH)
   list(APPEND Caffe2_CPU_SRCS
     ${TORCH_SRC_DIR}/csrc/jit/codegen/onednn/LlgaTensorImpl.cpp
     ${TORCH_SRC_DIR}/csrc/jit/codegen/onednn/graph_fuser.cpp
     ${TORCH_SRC_DIR}/csrc/jit/codegen/onednn/graph_rewriter.cpp
     ${TORCH_SRC_DIR}/csrc/jit/codegen/onednn/graph_helper.cpp
     ${TORCH_SRC_DIR}/csrc/jit/codegen/onednn/register_interface.cpp
     ${TORCH_SRC_DIR}/csrc/jit/codegen/onednn/decompose_silu.cpp
     ${TORCH_SRC_DIR}/csrc/jit/codegen/onednn/interface.cpp
     ${TORCH_SRC_DIR}/csrc/jit/codegen/onednn/kernel.cpp
     ${TORCH_SRC_DIR}/csrc/jit/codegen/onednn/defer_size_check.cpp
     ${TORCH_SRC_DIR}/csrc/jit/codegen/onednn/layout_propagation.cpp
     ${TORCH_SRC_DIR}/csrc/jit/codegen/onednn/prepare_binary.cpp
     ${TORCH_SRC_DIR}/csrc/jit/codegen/onednn/guard_shape.cpp
   )
 endif()

 if(USE_ROCM)
   list(APPEND Caffe2_HIP_SRCS ${Caffe2_GPU_HIP_JIT_FUSERS_SRCS})
   if(USE_NCCL)
     list(APPEND Caffe2_HIP_SRCS
       ${TORCH_SRC_DIR}/csrc/cuda/nccl.cpp)
   endif()
   if(USE_DISTRIBUTED)
     append_filelist("libtorch_cuda_distributed_base_sources" Caffe2_HIP_SRCS)
     if(NOT WIN32)
       append_filelist("libtorch_cuda_distributed_extra_sources" Caffe2_HIP_SRCS)
     endif()
   endif()
   # caffe2_nvrtc's stubs to driver APIs are useful for HIP.
   # See NOTE [ ATen NVRTC Stub and HIP ]
   add_library(caffe2_nvrtc SHARED ${ATen_NVRTC_STUB_SRCS})
   target_link_libraries(caffe2_nvrtc ${PYTORCH_HIP_HCC_LIBRARIES} ${ROCM_HIPRTC_LIB})
   target_compile_definitions(caffe2_nvrtc PRIVATE USE_ROCM __HIP_PLATFORM_HCC__)
   install(TARGETS caffe2_nvrtc DESTINATION "${TORCH_INSTALL_LIB_DIR}")
 endif()

 if(NOT NO_API AND NOT BUILD_LITE_INTERPRETER)
   list(APPEND TORCH_SRCS
     ${TORCH_SRC_DIR}/csrc/api/src/cuda.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/data/datasets/mnist.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/data/samplers/distributed.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/data/samplers/random.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/data/samplers/sequential.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/data/samplers/stream.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/enum.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/imethod.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/serialize.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/jit.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/nn/init.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/nn/module.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/_functions.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/activation.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/adaptive.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/batchnorm.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/normalization.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/instancenorm.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/conv.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/dropout.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/distance.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/embedding.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/fold.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/linear.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/loss.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/padding.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/pixelshuffle.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/pooling.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/rnn.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/upsampling.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/transformer.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/nn/modules/container/functional.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/nn/options/activation.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/nn/options/adaptive.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/nn/options/batchnorm.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/nn/options/embedding.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/nn/options/instancenorm.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/nn/options/normalization.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/nn/options/conv.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/nn/options/dropout.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/nn/options/linear.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/nn/options/padding.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/nn/options/pooling.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/nn/options/rnn.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/nn/options/vision.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/nn/options/transformer.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/optim/adagrad.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/optim/adam.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/optim/adamw.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/optim/lbfgs.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/optim/optimizer.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/optim/rmsprop.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/optim/serialize.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/optim/sgd.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/optim/schedulers/lr_scheduler.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/optim/schedulers/step_lr.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/serialize/input-archive.cpp
     ${TORCH_SRC_DIR}/csrc/api/src/serialize/output-archive.cpp
   )
 endif()

 list(APPEND Caffe2_CPU_SRCS ${TORCH_SRCS})

 if(USE_MPS)
   list(APPEND Caffe2_CPU_SRCS ${Caffe2_MPS_SRCS})
 endif()

 # NOTE [ Linking AVX and non-AVX files ]
 #
 # Regardless of the CPU capabilities, we build some files with AVX2, and AVX512
 # instruction set. If the host CPU doesn't support those, we simply ignore their
 # functions at runtime during dispatch.
 #
 # We must make sure that those files are at the end of the input list when
 # linking the torch_cpu library. Otherwise, the following error scenario might
 # occur:
 # 1. A non-AVX2 and an AVX2 file both call a function defined with the `inline`
 #    keyword
 # 2. The compiler decides not to inline this function
 # 3. Two different versions of the machine code are generated for this function:
 #    one without AVX2 instructions and one with AVX2.
 # 4. When linking, the AVX2 version is found earlier in the input object files,
 #    so the linker makes the entire library use it, even in code not guarded by
 #    the dispatcher.
 # 5. A CPU without AVX2 support executes this function, encounters an AVX2
 #    instruction and crashes.
 #
 # Thus we organize the input files in the following order:
 # 1. All files with no AVX-n support
 # 2. All files with AVX2 support ('*AVX2.cpp')
 # 3. All files with AVX512 support ('*AVX512.cpp')
 set(Caffe2_CPU_SRCS_NON_AVX)
 set(Caffe2_CPU_SRCS_AVX2)
 set(Caffe2_CPU_SRCS_AVX512)
 foreach(input_filename ${Caffe2_CPU_SRCS})
   if(${input_filename} MATCHES "AVX2\\.cpp")
     list(APPEND Caffe2_CPU_SRCS_AVX2 ${input_filename})
   elseif(${input_filename} MATCHES "AVX512\\.cpp")
     list(APPEND Caffe2_CPU_SRCS_AVX512 ${input_filename})
   else()
     list(APPEND Caffe2_CPU_SRCS_NON_AVX ${input_filename})
   endif()
 endforeach(input_filename)
 set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS_NON_AVX} ${Caffe2_CPU_SRCS_AVX2} ${Caffe2_CPU_SRCS_AVX512})

 # ==========================================================
 # END formerly-libtorch sources
 # ==========================================================

 add_library(torch_cpu ${Caffe2_CPU_SRCS})
 if(HAVE_SOVERSION)
   set_target_properties(torch_cpu PROPERTIES
       VERSION ${TORCH_VERSION} SOVERSION ${TORCH_SOVERSION})
 endif()
 torch_compile_options(torch_cpu)  # see cmake/public/utils.cmake
 if(HAS_WERROR_SIGN_COMPARE AND WERROR)
   # target_compile_options(torch_cpu PRIVATE "-Werror=sign-compare")
   set_property(SOURCE ${ATen_CORE_SRCS} ${ATen_CPU_SRCS} APPEND PROPERTY COMPILE_OPTIONS "-Werror=sign-compare")
 endif()

 set_property(SOURCE ${ATen_CORE_SRCS} APPEND
     PROPERTY COMPILE_DEFINITIONS "TORCH_ASSERT_ONLY_METHOD_OPERATORS")

 if(USE_PRECOMPILED_HEADERS)
   target_precompile_headers(torch_cpu PRIVATE
       "$<$<COMPILE_LANGUAGE:CXX>:ATen/core/ATen_pch.h>")
   # Exclude some files from using PCH
   set_source_files_properties(
       # Not built with OpenMP, so PCH is invalid
       ${Torch_SOURCE_DIR}/aten/src/ATen/MapAllocator.cpp
       # Builds with incompatible compiler flags
       ${Caffe2_CPU_SRCS_AVX2}
       ${Caffe2_CPU_SRCS_AVX512}
       PROPERTIES SKIP_PRECOMPILE_HEADERS ON)
 endif()

 # Pass path to PocketFFT
 if(AT_POCKETFFT_ENABLED)
   if(CMAKE_VERSION VERSION_LESS "3.11")
     target_include_directories(torch_cpu PRIVATE "${POCKETFFT_INCLUDE_DIR}")
   else()
     set_source_files_properties(
         "${PROJECT_SOURCE_DIR}/aten/src/ATen/native/mkl/SpectralOps.cpp"
         PROPERTIES INCLUDE_DIRECTORIES "${POCKETFFT_INCLUDE_DIR}")
   endif()
 endif()

 if(CMAKE_COMPILER_IS_GNUCXX AND BUILD_LIBTORCH_CPU_WITH_DEBUG)
   # To enable debug fission we need to build libtorch_cpu with debug info on,
   # but this increases link time and peak memory usage if we use the
   # REL_WITH_DEB_INFO env var since that enables it for everything, but it's
   # only really necessary for libtorch_cpu.
   target_compile_options(torch_cpu PRIVATE "-g")
 endif()

 if(USE_LLVM AND LLVM_FOUND)
   llvm_map_components_to_libnames(LLVM_LINK_LIBS
     support core analysis executionengine instcombine
     scalaropts transformutils ${LLVM_TARGETS_TO_BUILD} orcjit)
   target_link_libraries(torch_cpu PRIVATE ${LLVM_LINK_LIBS})
   if(APPLE)
     set(LINKER_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/unexported_symbols.lds")
     set_target_properties(torch_cpu PROPERTIES LINK_DEPENDS ${LINKER_SCRIPT})
     set_target_properties(torch_cpu PROPERTIES LINK_FLAGS "-Wl,-unexported_symbols_list,${LINKER_SCRIPT}")
   elseif(UNIX)
     set(LINKER_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/version_script.lds")
     set_target_properties(torch_cpu PROPERTIES LINK_DEPENDS ${LINKER_SCRIPT})
     target_link_libraries(torch_cpu PRIVATE "-Wl,--version-script=${LINKER_SCRIPT}")
   endif()
 endif(USE_LLVM AND LLVM_FOUND)

 # This is required for older versions of CMake, which don't allow
 # specifying add_library() without a list of source files
 set(DUMMY_EMPTY_FILE ${CMAKE_BINARY_DIR}/empty.cpp)

 if(MSVC)
   set(DUMMY_FILE_CONTENT "__declspec(dllexport) int ignore_this_library_placeholder(){return 0\\;}")
 else()
   set(DUMMY_FILE_CONTENT "")
 endif()

 file(WRITE ${DUMMY_EMPTY_FILE} ${DUMMY_FILE_CONTENT})

 # Wrapper library for people who link against torch and expect both CPU and CUDA support
 # Contains "torch_cpu" and "torch_cuda"
 add_library(torch ${DUMMY_EMPTY_FILE})
 if(BUILD_SPLIT_CUDA)
   # When we split torch_cuda, we want a dummy torch_cuda library that contains both parts
   add_library(torch_cuda ${DUMMY_EMPTY_FILE})
 endif()
 if(HAVE_SOVERSION)
   set_target_properties(torch PROPERTIES
       VERSION ${TORCH_VERSION} SOVERSION ${TORCH_SOVERSION})
 endif()

 if(USE_ROCM)
   filter_list(__caffe2_hip_srcs_cpp Caffe2_HIP_SRCS "\\.(cu|hip)$")
   set_source_files_properties(${__caffe2_hip_srcs_cpp} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
 endif()

 # Compile exposed libraries.
 if(USE_ROCM)
   set(CUDA_LINK_LIBRARIES_KEYWORD PRIVATE)
   hip_add_library(torch_hip ${Caffe2_HIP_SRCS})
   set(CUDA_LINK_LIBRARIES_KEYWORD)
   torch_compile_options(torch_hip)  # see cmake/public/utils.cmake
   # TODO: Not totally sure if this is live or not
   if(USE_NCCL)
     target_link_libraries(torch_hip PRIVATE __caffe2_nccl)
     target_compile_definitions(torch_hip PRIVATE USE_NCCL)
   endif()

   if(USE_PRECOMPILED_HEADERS)
     target_precompile_headers(torch_hip PRIVATE
         "$<$<COMPILE_LANGUAGE:CXX>:ATen/core/ATen_pch.h>")
   endif()
 elseif(USE_CUDA)
   set(CUDA_LINK_LIBRARIES_KEYWORD PRIVATE)
   if(CUDA_SEPARABLE_COMPILATION)
     # Separate compilation fails when kernels using `thrust::sort_by_key`
     # are linked with the rest of CUDA code. Workaround by linking them separately.
     add_library(torch_cuda ${Caffe2_GPU_SRCS} ${Caffe2_GPU_CU_SRCS})
     set_property(TARGET torch_cuda PROPERTY CUDA_SEPARABLE_COMPILATION ON)

     add_library(torch_cuda_w_sort_by_key OBJECT
         ${Caffe2_GPU_SRCS_W_SORT_BY_KEY}
         ${Caffe2_GPU_CU_SRCS_W_SORT_BY_KEY})
     set_property(TARGET torch_cuda_w_sort_by_key PROPERTY CUDA_SEPARABLE_COMPILATION OFF)
     target_link_libraries(torch_cuda PRIVATE torch_cuda_w_sort_by_key)
   elseif(BUILD_SPLIT_CUDA)
     add_library(torch_cuda_cpp ${Caffe2_GPU_SRCS} ${Caffe2_GPU_SRCS_W_SORT_BY_KEY})
     add_library(torch_cuda_cu ${Caffe2_GPU_CU_SRCS} ${Caffe2_GPU_CU_SRCS_W_SORT_BY_KEY})
   else()
     add_library(torch_cuda
         ${Caffe2_GPU_SRCS} ${Caffe2_GPU_SRCS_W_SORT_BY_KEY}
         ${Caffe2_GPU_CU_SRCS} ${Caffe2_GPU_CU_SRCS_W_SORT_BY_KEY})
   endif()
   set(CUDA_LINK_LIBRARIES_KEYWORD)
   if(BUILD_SPLIT_CUDA)
     torch_compile_options(torch_cuda_cpp)  # see cmake/public/utils.cmake
     torch_compile_options(torch_cuda_cu)  # see cmake/public/utils.cmake
     target_compile_definitions(torch_cuda_cpp PRIVATE BUILD_SPLIT_CUDA)
     target_compile_definitions(torch_cuda_cpp PRIVATE USE_CUDA)
     target_compile_definitions(torch_cuda_cu PRIVATE BUILD_SPLIT_CUDA)
     target_compile_definitions(torch_cuda_cu PRIVATE USE_CUDA)
   else()
     torch_compile_options(torch_cuda)  # see cmake/public/utils.cmake
     target_compile_definitions(torch_cuda PRIVATE USE_CUDA)
   endif()
   if(USE_NCCL AND BUILD_SPLIT_CUDA)
     target_link_libraries(torch_cuda_cpp PRIVATE __caffe2_nccl)
     target_compile_definitions(torch_cuda_cpp PRIVATE USE_NCCL)
   elseif(USE_NCCL)
     target_link_libraries(torch_cuda PRIVATE __caffe2_nccl)
     target_compile_definitions(torch_cuda PRIVATE USE_NCCL)
   endif()
   if(USE_UCC AND BUILD_SPLIT_CUDA)
     target_link_libraries(torch_cuda_cpp PRIVATE __caffe2_ucc)
     target_compile_definitions(torch_cuda_cpp PRIVATE USE_UCC)
   elseif(USE_UCC)
     target_link_libraries(torch_cuda PRIVATE __caffe2_ucc)
     target_compile_definitions(torch_cuda PRIVATE USE_UCC)
   endif()
   if(BUILD_LAZY_CUDA_LINALG)
     add_library(torch_cuda_linalg ${ATen_CUDA_LINALG_SRCS})
     target_compile_definitions(torch_cuda_linalg PRIVATE USE_CUDA BUILD_LAZY_CUDA_LINALG)
     # Library order is important during static linking
     # `torch::magma` should be mentioned before other CUDA
     # to transitively include all symbols present in torch_cuda/torch_cpu
     if(USE_MAGMA)
       target_link_libraries(torch_cuda_linalg PRIVATE torch::magma)
       # CUDAHooks reports version of MAGMA PyTorch was compiled against, i.e. needs to be able to include magma headers
       get_target_property(HOOKS_INCLUDE_DIRECTORIES torch_cuda INCLUDE_DIRECTORIES)
       if(NOT "${MAGMA_INCLUDE_DIR}" IN_LIST HOOKS_INCLUDE_DIRECTORIES)
         set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/../aten/src/ATen/cuda/detail/CUDAHooks.cpp PROPERTIES INCLUDE_DIRECTORIES  "${MAGMA_INCLUDE_DIR}")
       endif()
     endif()
     target_link_libraries(torch_cuda_linalg PRIVATE
         torch_cpu
         torch_cuda
     )
     if($ENV{ATEN_STATIC_CUDA})
       target_link_libraries(torch_cuda_linalg PRIVATE
           ${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcusolver_static.a
           ${CUDA_TOOLKIT_ROOT_DIR}/lib64/liblapack_static.a     # needed for libcusolver_static
       )
     else()
       target_link_libraries(torch_cuda_linalg PRIVATE
           ${CUDA_cusolver_LIBRARY}
       )
     endif()
     # NS: TODO, is this really necessary?
     if(USE_MAGMA AND CAFFE2_STATIC_LINK_CUDA)
       target_link_libraries(torch_cuda_linalg PRIVATE
           "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libculibos.a" dl)
     endif()
     set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/../aten/src/ATen/native/cuda/LinearAlgebraStubs.cpp PROPERTIES COMPILE_FLAGS "-DBUILD_LAZY_CUDA_LINALG")
     install(TARGETS torch_cuda_linalg DESTINATION "${TORCH_INSTALL_LIB_DIR}")
   endif()

   if(USE_PRECOMPILED_HEADERS)
     if(BUILD_SPLIT_CUDA)
       target_precompile_headers(torch_cuda_cpp PRIVATE
           "$<$<COMPILE_LANGUAGE:CXX>:ATen/core/ATen_pch.h>")
     else()
       target_precompile_headers(torch_cuda PRIVATE
           "$<$<COMPILE_LANGUAGE:CXX>:ATen/core/ATen_pch.h>")
     endif()
   endif()
 endif()

 if(USE_CUDA OR USE_ROCM)
   include(${TORCH_SRC_DIR}/csrc/jit/codegen/cuda/nvfuser.cmake)
 endif()

 if(NOT MSVC AND USE_XNNPACK)
   TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv)
 endif()

 # ==========================================================
 # formerly-libtorch flags
 # ==========================================================

 if(NOT INTERN_BUILD_MOBILE)
   # Forces caffe2.pb.h to be generated before its dependents are compiled.
   # Adding the generated header file to the ${TORCH_SRCS} list is not sufficient
   # to establish the dependency, since the generation procedure is declared in a different CMake file.
   # See https://samthursfield.wordpress.com/2015/11/21/cmake-dependencies-between-targets-and-files-and-custom-commands/#custom-commands-in-different-directories
   add_dependencies(torch_cpu Caffe2_PROTO)
 endif()

 # Build model tracer for tracing-based selective build
 if(TRACING_BASED AND NOT BUILD_LITE_INTERPRETER AND NOT INTERN_BUILD_MOBILE)
   add_subdirectory(
     ${TORCH_ROOT}/torch/csrc/jit/mobile/model_tracer
     ${CMAKE_BINARY_DIR}/model_tracer
   )
   string(APPEND CMAKE_CXX_FLAGS " -DENABLE_RECORD_KERNEL_FUNCTION_DTYPE")
 endif()

 # Codegen selected_mobile_ops.h for template selective build
 if(BUILD_LITE_INTERPRETER AND SELECTED_OP_LIST)
   message("running gen_selected_mobile_ops_header for:  '${SELECTED_OP_LIST}'")
   file(GLOB lite_interpreter_python "${TOOLS_PATH}/lite_interpreter/*.py")
   if(${TRACING_BASED})
     file(GLOB code_analyzer_python "${TOOLS_PATH}/code_analyzer/*.py")
     add_custom_command(
       OUTPUT ${CMAKE_BINARY_DIR}/aten/src/ATen/selected_mobile_ops.h
       COMMAND
         "${PYTHON_EXECUTABLE}"
         -m tools.code_analyzer.gen_oplist
         --model_file_list_path "${SELECTED_OP_LIST}"
         --output_dir "${CMAKE_BINARY_DIR}/aten/src/ATen"
       DEPENDS
         ${torchgen_python}
         ${lite_interpreter_python}
         ${code_analyzer_python}
         "${SELECTED_OP_LIST}"
         "${TORCH_ROOT}/aten/src/ATen/native/native_functions.yaml"
       WORKING_DIRECTORY "${TORCH_ROOT}")
   else()
     add_custom_command(
       OUTPUT ${CMAKE_BINARY_DIR}/aten/src/ATen/selected_mobile_ops.h
       COMMAND
         "${PYTHON_EXECUTABLE}"
         -m tools.lite_interpreter.gen_selected_mobile_ops_header
         --yaml_file_path "${SELECTED_OP_LIST}"
         --output_file_path "${CMAKE_BINARY_DIR}/aten/src/ATen"
       DEPENDS
         ${torchgen_python}
         ${lite_interpreter_python}
         "${SELECTED_OP_LIST}"
         "${TORCH_ROOT}/aten/src/ATen/native/native_functions.yaml"
       WORKING_DIRECTORY "${TORCH_ROOT}")
   endif()

   add_custom_target(
     __selected_mobile_ops_header_gen
     DEPENDS ${CMAKE_BINARY_DIR}/aten/src/ATen/selected_mobile_ops.h)
   add_dependencies(torch_cpu __selected_mobile_ops_header_gen)
 endif()

 if(NOT NO_API)
   target_include_directories(torch_cpu PRIVATE
     ${TORCH_SRC_DIR}/csrc/api
     ${TORCH_SRC_DIR}/csrc/api/include)
 endif()

 if(BUILD_SPLIT_CUDA AND MSVC)
   # -INCLUDE is used to ensure torch_cuda_cpp/cu are linked against in a project that relies on them.
   target_link_libraries(torch_cuda_cpp INTERFACE "-INCLUDE:?warp_size@cuda@at@@YAHXZ")
   # See [Note about _torch_cuda_cu_linker_symbol_op and torch_cuda_cu] in native_functions.yaml
   target_link_libraries(torch_cuda_cu INTERFACE "-INCLUDE:?_torch_cuda_cu_linker_symbol_op_cuda@native@at@@YA?AVTensor@2@AEBV32@@Z")
 elseif(USE_CUDA AND MSVC)
   # -INCLUDE is used to ensure torch_cuda is linked against in a project that relies on them.
   # Related issue: https://github.com/pytorch/pytorch/issues/31611
   target_link_libraries(torch_cuda INTERFACE "-INCLUDE:?warp_size@cuda@at@@YAHXZ")
 endif()

 if(NOT BUILD_LITE_INTERPRETER)
   set(TH_CPU_INCLUDE
     # dense
     aten/src/TH
     ${CMAKE_CURRENT_BINARY_DIR}/aten/src/TH
     ${TORCH_ROOT}/aten/src
     ${CMAKE_CURRENT_BINARY_DIR}/aten/src

     ${CMAKE_BINARY_DIR}/aten/src)
     target_include_directories(torch_cpu PRIVATE ${TH_CPU_INCLUDE})
 endif()

 set(ATen_CPU_INCLUDE
   ${TORCH_ROOT}/aten/src
   ${CMAKE_CURRENT_BINARY_DIR}/../aten/src
   ${CMAKE_BINARY_DIR}/aten/src)

 if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
   set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/../aten/src/ATen/native/QuantizedLinear.cpp PROPERTIES COMPILE_FLAGS -Wno-deprecated-declarations)
   set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/../aten/src/ATen/native/RNN.cpp PROPERTIES COMPILE_FLAGS -Wno-deprecated-declarations)
   set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/../aten/src/ATen/native/quantized/cpu/qlinear_prepack.cpp PROPERTIES COMPILE_FLAGS -Wno-deprecated-declarations)
   set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/../aten/src/ATen/native/quantized/qlinear_unpack.cpp PROPERTIES COMPILE_FLAGS -Wno-deprecated-declarations)
 endif()

 if(USE_TBB)
   list(APPEND ATen_CPU_INCLUDE ${TBB_INCLUDE_DIR})
   target_link_libraries(torch_cpu PUBLIC TBB::tbb)
 endif()

 if(BUILD_CAFFE2 AND BUILD_CAFFE2_OPS AND USE_FBGEMM)
   # FIXME: quantization/server/conv_dnnlowp_op.cc depends on fbgemm/src/RefImplementations.h
   target_include_directories(torch_cpu PRIVATE ${CMAKE_CURRENT_LIST_DIR}/../third_party)
 endif()

 target_include_directories(torch_cpu PRIVATE ${ATen_CPU_INCLUDE})

 target_include_directories(torch_cpu PRIVATE
   ${TORCH_SRC_DIR}/csrc)

 target_include_directories(torch_cpu PRIVATE
   ${TORCH_ROOT}/third_party/miniz-2.1.0)

 target_include_directories(torch_cpu PRIVATE
   ${TORCH_ROOT}/third_party/kineto/libkineto/include)

 if(USE_KINETO)
   target_include_directories(torch_cpu PRIVATE
     ${TORCH_ROOT}/third_party/kineto/libkineto/src)
 endif()

 install(DIRECTORY "${TORCH_SRC_DIR}/csrc"
   DESTINATION ${TORCH_INSTALL_INCLUDE_DIR}/torch
   FILES_MATCHING PATTERN "*.h" PATTERN "*.hpp")
 install(FILES
   "${TORCH_SRC_DIR}/script.h"
   "${TORCH_SRC_DIR}/extension.h"
   "${TORCH_SRC_DIR}/custom_class.h"
   "${TORCH_SRC_DIR}/library.h"
   "${TORCH_SRC_DIR}/custom_class_detail.h"
   DESTINATION ${TORCH_INSTALL_INCLUDE_DIR}/torch)
 if(BUILD_TEST)
   if(BUILD_LITE_INTERPRETER)
     add_subdirectory(
       ${TORCH_ROOT}/test/cpp/lite_interpreter_runtime
       ${CMAKE_BINARY_DIR}/test_lite_interpreter_runtime
     )
     add_subdirectory(
       ${TORCH_ROOT}/test/mobile/lightweight_dispatch
       ${CMAKE_BINARY_DIR}/test_codegen_unboxing
     )
   else()
     add_subdirectory(${TORCH_ROOT}/test/cpp/jit ${CMAKE_BINARY_DIR}/test_jit)
     add_subdirectory(
       ${TORCH_ROOT}/test/cpp/tensorexpr
       ${CMAKE_BINARY_DIR}/test_tensorexpr
     )
     if(USE_DISTRIBUTED)
       add_subdirectory(${TORCH_ROOT}/test/cpp/c10d ${CMAKE_BINARY_DIR}/test_cpp_c10d)
       if(NOT WIN32)
         add_subdirectory(${TORCH_ROOT}/test/cpp/dist_autograd ${CMAKE_BINARY_DIR}/dist_autograd)
         add_subdirectory(${TORCH_ROOT}/test/cpp/rpc ${CMAKE_BINARY_DIR}/test_cpp_rpc)
       endif()
     endif()
     if(NOT NO_API)
       add_subdirectory(${TORCH_ROOT}/test/cpp/api ${CMAKE_BINARY_DIR}/test_api)
     endif()

     if(USE_LLVM AND LLVM_FOUND)
       add_subdirectory(
         ${TORCH_ROOT}/test/mobile/nnc
         ${CMAKE_BINARY_DIR}/test_mobile_nnc
       )
     endif()
     add_subdirectory(${TORCH_ROOT}/test/cpp/lazy
                      ${CMAKE_BINARY_DIR}/test_lazy)
   endif()
 endif()

 # XXX This ABI check cannot be run with arm-linux-androideabi-g++
 if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
   if(DEFINED GLIBCXX_USE_CXX11_ABI)
     message(STATUS "_GLIBCXX_USE_CXX11_ABI is already defined as a cmake variable")
   else()
     message(STATUS "${CMAKE_CXX_COMPILER} ${TORCH_SRC_DIR}/abi-check.cpp -o ${CMAKE_BINARY_DIR}/abi-check")
     execute_process(
       COMMAND
       "${CMAKE_CXX_COMPILER}"
       "${TORCH_SRC_DIR}/abi-check.cpp"
       "-o"
       "${CMAKE_BINARY_DIR}/abi-check"
       RESULT_VARIABLE ABI_CHECK_COMPILE_RESULT)
     if(ABI_CHECK_COMPILE_RESULT)
       message(FATAL_ERROR "Could not compile ABI Check: ${ABI_CHECK_COMPILE_RESULT}")
     endif()
     execute_process(
       COMMAND "${CMAKE_BINARY_DIR}/abi-check"
       RESULT_VARIABLE ABI_CHECK_RESULT
       OUTPUT_VARIABLE GLIBCXX_USE_CXX11_ABI)
     if(ABI_CHECK_RESULT)
       message(WARNING "Could not run ABI Check: ${ABI_CHECK_RESULT}")
     endif()
   endif()
   message(STATUS "Determined _GLIBCXX_USE_CXX11_ABI=${GLIBCXX_USE_CXX11_ABI}")
 endif()

 # CMake config for external projects.
 configure_file(
   ${PROJECT_SOURCE_DIR}/cmake/TorchConfigVersion.cmake.in
   ${PROJECT_BINARY_DIR}/TorchConfigVersion.cmake
   @ONLY)
 configure_file(
   ${TORCH_ROOT}/cmake/TorchConfig.cmake.in
   ${PROJECT_BINARY_DIR}/TorchConfig.cmake
   @ONLY)
 install(FILES
   ${PROJECT_BINARY_DIR}/TorchConfigVersion.cmake
   ${PROJECT_BINARY_DIR}/TorchConfig.cmake
   DESTINATION share/cmake/Torch)


 # ---[ Torch python bindings build
 add_subdirectory(../torch torch)
 set(TORCH_PYTHON_COMPILE_OPTIONS ${TORCH_PYTHON_COMPILE_OPTIONS} PARENT_SCOPE)
 set(TORCH_PYTHON_LINK_FLAGS ${TORCH_PYTHON_LINK_FLAGS} PARENT_SCOPE)

 # ==========================================================
 # END formerly-libtorch flags
 # ==========================================================

 if(NOT NO_API)
   target_include_directories(torch_cpu PUBLIC
     $<BUILD_INTERFACE:${TORCH_SRC_DIR}/csrc/api>
     $<BUILD_INTERFACE:${TORCH_SRC_DIR}/csrc/api/include>)
 endif()


 if(USE_OPENMP)
   find_package(OpenMP QUIET)
 endif()
 if(USE_OPENMP AND OPENMP_FOUND)
   if(MSVC AND OpenMP_CXX_LIBRARIES MATCHES "libiomp5md\\.lib")
     set(AT_MKL_MT 1)
   else()
     set(AT_MKL_MT 0)
   endif()
   message(STATUS "pytorch is compiling with OpenMP. \n"
     "OpenMP CXX_FLAGS: ${OpenMP_CXX_FLAGS}. \n"
     "OpenMP libraries: ${OpenMP_CXX_LIBRARIES}.")
   if(UNIX)
     separate_arguments(OpenMP_CXX_OPTIONS UNIX_COMMAND "${OpenMP_CXX_FLAGS}")
   else()
     separate_arguments(OpenMP_CXX_OPTIONS WINDOWS_COMMAND "${OpenMP_CXX_FLAGS}")
   endif()
   target_compile_options(torch_cpu PRIVATE ${OpenMP_CXX_OPTIONS})
   target_link_libraries(torch_cpu PRIVATE ${OpenMP_CXX_LIBRARIES})
 endif()


 if(USE_ROCM)
   target_compile_definitions(torch_hip PRIVATE
     USE_ROCM
     __HIP_PLATFORM_HCC__
     )
   # NB: Massive hack.  torch/csrc/jit/codegen/fuser/codegen.cpp includes
   # torch/csrc/jit/codegen/fuser/cuda/resource_strings.h which changes the
   # strings depending on if you're __HIP_PLATFORM_HCC__ or not.
   # But that file is in torch_cpu!  So, against all odds, this macro
   # has to be set on torch_cpu too.  I also added it to torch for
   # better luck
   target_compile_definitions(torch_cpu PRIVATE
     USE_ROCM
     __HIP_PLATFORM_HCC__
     )
   target_compile_definitions(torch PRIVATE
     USE_ROCM
     __HIP_PLATFORM_HCC__
     )
   target_include_directories(torch_hip PRIVATE
     /opt/rocm/include
     /opt/rocm/hcc/include
     /opt/rocm/rocblas/include
     /opt/rocm/hipsparse/include
     )
 endif()

 if(BUILD_LITE_INTERPRETER)
   target_compile_definitions(torch_cpu PRIVATE BUILD_LITE_INTERPRETER)
   # Enable template selective build only when SELECTED_OP_LIST is provided.
   if(SELECTED_OP_LIST)
     target_compile_definitions(torch_cpu PRIVATE TEMPLATE_SELECTIVE_BUILD)
   endif()
 endif()


 # Pass USE_DISTRIBUTED to torch_cpu, as some codes in jit/pickler.cpp and
 # jit/unpickler.cpp need to be compiled only when USE_DISTRIBUTED is set
 if(USE_DISTRIBUTED)
   target_compile_definitions(torch_cpu PUBLIC USE_DISTRIBUTED)
   if(USE_GLOO AND USE_C10D_GLOO)
     target_compile_definitions(torch_cpu PUBLIC USE_C10D_GLOO)
   endif()
   if(USE_UCC AND USE_C10D_UCC)
     target_compile_definitions(torch_cpu PUBLIC USE_C10D_UCC)
     if(USE_CUDA)
       if(BUILD_SPLIT_CUDA)
         target_compile_definitions(torch_cuda_cpp PUBLIC USE_C10D_UCC)
       else()
         target_compile_definitions(torch_cuda PUBLIC USE_C10D_UCC)
       endif()
     endif()
   endif()
   if(USE_NCCL AND USE_C10D_NCCL)
     if(USE_ROCM)
       target_compile_definitions(torch_hip PUBLIC USE_C10D_NCCL)
     else()
       if(BUILD_SPLIT_CUDA)
         target_compile_definitions(torch_cuda_cpp PUBLIC USE_C10D_NCCL)
         if(USE_NCCL_WITH_UCC)
           target_compile_definitions(torch_cuda_cpp PUBLIC USE_NCCL_WITH_UCC)
         endif()
       else()
         target_compile_definitions(torch_cuda PUBLIC USE_C10D_NCCL)
         if(USE_NCCL_WITH_UCC)
           target_compile_definitions(torch_cuda PUBLIC USE_NCCL_WITH_UCC)
         endif()
       endif()
     endif()
   endif()
   if(USE_MPI AND USE_C10D_MPI)
     if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
       set_source_files_properties(
         "${TORCH_SRC_DIR}/csrc/distributed/c10d/ProcessGroupMPI.cpp"
         PROPERTIES COMPILE_FLAGS -Wno-deprecated-declarations)
     endif()
     target_compile_definitions(torch_cpu PUBLIC USE_C10D_MPI)
   endif()
   # Pass USE_RPC in order to reduce use of
   # #if defined(USE_DISTRIBUTED) && !defined(_WIN32)
   # need to be removed when RPC is supported
   if(NOT WIN32)
     target_compile_definitions(torch_cpu PUBLIC USE_RPC)
   endif()
   # Pass USE_TENSORPIPE to torch_cpu as some parts of rpc/utils.cpp
   # can only be compiled with USE_TENSORPIPE is set.
   if(USE_TENSORPIPE)
     target_compile_definitions(torch_cpu PUBLIC USE_TENSORPIPE)
   endif()
 endif()

 if(NOT INTERN_BUILD_MOBILE)
   caffe2_interface_library(caffe2_protos caffe2_protos_whole)
   target_link_libraries(torch_cpu PRIVATE caffe2_protos_whole)
   if(${CAFFE2_LINK_LOCAL_PROTOBUF})
     target_link_libraries(torch_cpu INTERFACE protobuf::libprotobuf)
   else()
     target_link_libraries(torch_cpu PUBLIC protobuf::libprotobuf)
   endif()
 endif()

 if(USE_OPENMP AND OPENMP_FOUND)
   message(STATUS "Caffe2 is compiling with OpenMP. \n"
     "OpenMP CXX_FLAGS: ${OpenMP_CXX_FLAGS}. \n"
     "OpenMP libraries: ${OpenMP_CXX_LIBRARIES}.")
   target_link_libraries(torch_cpu PRIVATE ${OpenMP_CXX_LIBRARIES})
 endif()

 if($ENV{TH_BINARY_BUILD})
   if(NOT MSVC AND USE_CUDA AND NOT APPLE)
     # Note [Extra MKL symbols for MAGMA in torch_cpu]
     #
     # When we build CUDA libraries and link against MAGMA, MAGMA makes use of
     # some BLAS symbols in its CPU fallbacks when it has no GPU versions
     # of kernels.  Previously, we ensured the BLAS symbols were filled in by
     # MKL by linking torch_cuda with BLAS, but when we are statically linking
     # against MKL (when we do wheel builds), this actually ends up pulling in a
     # decent chunk of MKL into torch_cuda, inflating our torch_cuda binary
     # size by 8M.  torch_cpu exposes most of the MKL symbols we need, but
     # empirically we determined that there are four which it doesn't provide.  If
     # we link torch_cpu with these --undefined symbols, we can ensure they
     # do get pulled in, and then we can avoid statically linking in MKL to
     # torch_cuda at all!
     #
     # We aren't really optimizing for binary size on Windows (and this link
     # line doesn't work on Windows), so don't do it there.
     #
     # These linker commands do not work on OS X, do not attempt this there.
     # (It shouldn't matter anyway, though, because OS X has dropped CUDA support)
     foreach(_symb  slaed0 daled0 dormql sormql zheevd cheevd)
     STRING(APPEND _undefined_link_flags " -Wl,--undefined=mkl_lapack_${_symb}")
     endforeach(_symb)
     set_target_properties(torch_cpu PROPERTIES LINK_FLAGS  ${_undefined_link_flags})

   endif()
 endif()

 target_link_libraries(torch_cpu PUBLIC c10)
 target_link_libraries(torch_cpu PUBLIC ${Caffe2_PUBLIC_DEPENDENCY_LIBS})
 target_link_libraries(torch_cpu PRIVATE ${Caffe2_DEPENDENCY_LIBS})
 target_link_libraries(torch_cpu PRIVATE ${Caffe2_DEPENDENCY_WHOLE_LINK_LIBS})
 target_include_directories(torch_cpu INTERFACE $<INSTALL_INTERFACE:include>)
 target_include_directories(torch_cpu PRIVATE ${Caffe2_CPU_INCLUDE})
 target_include_directories(torch_cpu SYSTEM PRIVATE "${Caffe2_DEPENDENCY_INCLUDE}")
 # Set standard properties on the target
 torch_set_target_props(torch_cpu)


 target_compile_options(torch_cpu PRIVATE "-DCAFFE2_BUILD_MAIN_LIB")
 if(BUILD_SPLIT_CUDA)
   target_compile_options(torch_cuda_cu PRIVATE "-DTORCH_CUDA_CU_BUILD_MAIN_LIB")
   target_compile_options(torch_cuda_cpp PRIVATE "-DTORCH_CUDA_CPP_BUILD_MAIN_LIB")
   # NB: This must be target_compile_definitions, not target_compile_options,
   # as the latter is not respected by nvcc
   target_compile_definitions(torch_cuda_cu PRIVATE "-DTORCH_CUDA_CU_BUILD_MAIN_LIB")
   target_compile_definitions(torch_cuda_cpp PRIVATE "-DTORCH_CUDA_CPP_BUILD_MAIN_LIB")
 elseif(USE_CUDA)
   target_compile_options(torch_cuda PRIVATE "-DTORCH_CUDA_BUILD_MAIN_LIB")
   # NB: This must be target_compile_definitions, not target_compile_options,
   # as the latter is not respected by nvcc
   target_compile_definitions(torch_cuda PRIVATE "-DTORCH_CUDA_BUILD_MAIN_LIB")
 elseif(USE_ROCM)
   target_compile_options(torch_hip PRIVATE "-DTORCH_HIP_BUILD_MAIN_LIB")
   target_compile_definitions(torch_hip PRIVATE "-DTORCH_HIP_BUILD_MAIN_LIB")
 endif()

 if(USE_EXPERIMENTAL_CUDNN_V8_API)
   if(BUILD_SPLIT_CUDA)
     target_compile_definitions(torch_cuda_cu PRIVATE "-DUSE_EXPERIMENTAL_CUDNN_V8_API")
     target_compile_definitions(torch_cuda_cpp PRIVATE "-DUSE_EXPERIMENTAL_CUDNN_V8_API")
   elseif(USE_CUDA)
     target_compile_definitions(torch_cuda PRIVATE "-DUSE_EXPERIMENTAL_CUDNN_V8_API")
   endif()
 endif()

 set(EXPERIMENTAL_SINGLE_THREAD_POOL "0" CACHE STRING
   "Experimental option to use a single thread pool for inter- and intra-op parallelism")
 if("${EXPERIMENTAL_SINGLE_THREAD_POOL}")
   target_compile_definitions(torch_cpu PUBLIC "-DAT_EXPERIMENTAL_SINGLE_THREAD_POOL=1")
 endif()

 if(MSVC AND NOT BUILD_SHARED_LIBS)
   # Note [Supporting both static and dynamic libraries on Windows]
   # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   # A Windows library may be distributed as either a static or dynamic
   # library.  The chosen distribution mechanism affects how you setup
   # the headers for the library: if you statically link a function,
   # all you need is an ordinary signature:
   #
   #     void f();
   #
   # But if you *dynamically* link it, then you must provide a __declspec
   # specifying that it should be imported from a DLL:
   #
   #     __declspec(dllimport) void f();
   #
   # Mixing the two situations will not work: if you specify dllimport
   # while statically linking, the linker will complain it cannot find
   # the __imp_f symbol (which serve as the DLL entrypoint); if you
   # fail to specify dllimport for a symbol that's coming from a DLL,
   # the linker will complain that it can't find f.  Joy!
   #
   # Most places on the Internet, you will find people have written
   # their headers under the assumption that the application will
   # only ever be dynamically linked, as they define a macro which
   # tags a function as __declspec(dllexport) if you are actually
   # building the library, and __declspec(dllimport) otherwise.  But
   # if you want these headers to also work if you are linking against
   # a static library, you need a way to avoid adding these __declspec's
   # at all.  And that "mechanism" needs to apply to any downstream
   # libraries/executables which are going to link against your library.
   #
   #   As an aside, why do we need to support both modes?
   #   For historical reasons, PyTorch ATen on Windows is built dynamically,
   #   while Caffe2 on Windows is built statically (mostly because if
   #   we build it dynamically, we are over the DLL exported symbol limit--and
   #   that is because Caffe2 hasn't comprehensively annotated all symbols
   #   which cross the DLL boundary with CAFFE_API).  So any code
   #   which is used by both PyTorch and Caffe2 needs to support both
   #   modes of linking.
   #
   # So, you have a macro (call it AT_CORE_STATIC_WINDOWS) which you need to have
   # set for any downstream library/executable that transitively includes your
   # headers.  How are you going to do this?  You have two options:
   #
   #   1. Write out a config.h header which stores whether or not
   #      you are linking statically or dynamically.
   #
   #   2. Force all of users to set the the macro themselves.  If they
   #      use cmake, you can set -DAT_CORE_STATIC_WINDOWS=1 as a PUBLIC
   #      compile option, in which case cmake will automatically
   #      add the macro for you.
   #
   # Which one is better? Well, it depends: they trade off implementor
   # ease versus user ease: (1) is more work for the library author
   # but the user doesn't have to worry about it; (2) requires the user
   # to set the macro themselves... but only if they don't use cmake.
   #
   # So, which is appropriate in our situation?  In my mind, here is
   # the distinguishing factor: it is more common to distribute
   # DLLs, since they don't require you to line up the CRT version
   # (/MD, /MDd, /MT, /MTd) and MSVC version at the use site.  So,
   # if a user is already in the business of static linkage, they're
   # already in "expert user" realm.  So, I've decided that at this
   # point in time, the simplicity of implementation of (2) wins out.
   #
   # NB: This must be target_compile_definitions, not target_compile_options,
   # as the latter is not respected by nvcc
   target_compile_definitions(torch_cpu PUBLIC "AT_CORE_STATIC_WINDOWS=1")
 endif()
 if(MSVC AND BUILD_SHARED_LIBS)
   # ONNX is linked statically and needs to be exported from this library
   # to be used externally. Make sure that references match the export.
   target_compile_options(torch_cpu PRIVATE "-DONNX_BUILD_MAIN_LIB")
 endif()

 caffe2_interface_library(torch_cpu torch_cpu_library)

 if(USE_CUDA)
   caffe2_interface_library(torch_cuda torch_cuda_library)
   if(BUILD_SPLIT_CUDA)
     caffe2_interface_library(torch_cuda_cu torch_cuda_cu_library)
     caffe2_interface_library(torch_cuda_cpp torch_cuda_cpp_library)
   endif()
 elseif(USE_ROCM)
   caffe2_interface_library(torch_hip torch_hip_library)
 endif()

 caffe2_interface_library(torch torch_library)

 install(TARGETS torch_cpu torch_cpu_library EXPORT Caffe2Targets DESTINATION "${TORCH_INSTALL_LIB_DIR}")

 if(USE_CUDA)
   install(TARGETS torch_cuda torch_cuda_library EXPORT Caffe2Targets DESTINATION "${TORCH_INSTALL_LIB_DIR}")
   if(BUILD_SPLIT_CUDA)
     install(TARGETS torch_cuda_cu torch_cuda_cu_library EXPORT Caffe2Targets DESTINATION "${TORCH_INSTALL_LIB_DIR}")
     install(TARGETS torch_cuda_cpp torch_cuda_cpp_library EXPORT Caffe2Targets DESTINATION "${TORCH_INSTALL_LIB_DIR}")
   endif()
 elseif(USE_ROCM)
   install(TARGETS torch_hip torch_hip_library EXPORT Caffe2Targets DESTINATION "${TORCH_INSTALL_LIB_DIR}")
 endif()
 install(TARGETS torch torch_library EXPORT Caffe2Targets DESTINATION "${TORCH_INSTALL_LIB_DIR}")

 target_link_libraries(torch PUBLIC torch_cpu_library)

 if(USE_CUDA)
   target_link_libraries(torch PUBLIC torch_cuda_library)
   if(BUILD_SPLIT_CUDA)
     # NS: Library order is important here to prevent cudnn double linking
     target_link_libraries(torch_cuda PUBLIC torch_cuda_cpp_library)
     target_link_libraries(torch_cuda PUBLIC torch_cuda_cu_library)
   endif()
 elseif(USE_ROCM)
   target_link_libraries(torch PUBLIC torch_hip_library)
 endif()

 if(PRINT_CMAKE_DEBUG_INFO)
   print_target_properties(torch)
   print_target_properties(torch_cpu)
 endif()

 # Install PDB files for MSVC builds
 if(MSVC AND BUILD_SHARED_LIBS)
   install(FILES $<TARGET_PDB_FILE:torch_cpu> DESTINATION "${TORCH_INSTALL_LIB_DIR}" OPTIONAL)
   if(BUILD_SPLIT_CUDA)
     install(FILES $<TARGET_PDB_FILE:torch_cuda_cu> DESTINATION "${TORCH_INSTALL_LIB_DIR}" OPTIONAL)
     install(FILES $<TARGET_PDB_FILE:torch_cuda_cpp> DESTINATION "${TORCH_INSTALL_LIB_DIR}" OPTIONAL)
   elseif(USE_CUDA)
     install(FILES $<TARGET_PDB_FILE:torch_cuda> DESTINATION "${TORCH_INSTALL_LIB_DIR}" OPTIONAL)
   elseif(USE_ROCM)
     install(FILES $<TARGET_PDB_FILE:torch_hip> DESTINATION "${TORCH_INSTALL_LIB_DIR}" OPTIONAL)
   endif()
 endif()

 # ---[ CUDA library.
 if(BUILD_SPLIT_CUDA)
   target_link_libraries(torch_cuda_cu INTERFACE torch::cudart)
   target_link_libraries(torch_cuda_cpp INTERFACE torch::cudart)
   target_link_libraries(torch_cuda_cu PUBLIC c10_cuda torch::nvtoolsext)
   target_link_libraries(torch_cuda_cpp PUBLIC c10_cuda torch::nvtoolsext)

   target_include_directories(
       torch_cuda_cu INTERFACE $<INSTALL_INTERFACE:include>)
   target_include_directories(
       torch_cuda_cpp INTERFACE $<INSTALL_INTERFACE:include>)
   target_include_directories(
       torch_cuda_cu PRIVATE ${Caffe2_GPU_INCLUDE})
   target_include_directories(
       torch_cuda_cpp PRIVATE ${Caffe2_GPU_INCLUDE})
   target_link_libraries(
       torch_cuda_cu PRIVATE ${Caffe2_CUDA_DEPENDENCY_LIBS})
   target_link_libraries(
       torch_cuda_cpp PRIVATE ${Caffe2_CUDA_DEPENDENCY_LIBS})
   target_link_libraries(torch_cuda_cu PRIVATE torch_cuda_cpp)
   if(USE_CUDNN)
     target_link_libraries(
         torch_cuda_cpp PRIVATE  caffe2::cudnn-private)
   endif()

   # These public dependencies must go after the previous dependencies, as the
   # order of the libraries in the linker call matters here when statically
   # linking; libculibos and cublas must be last.
   target_link_libraries(torch_cuda_cpp PUBLIC torch_cpu_library ${Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS})
   target_link_libraries(torch_cuda_cu PUBLIC torch_cpu_library ${Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS})
 elseif(USE_CUDA)
   target_link_libraries(torch_cuda INTERFACE torch::cudart)
   target_link_libraries(torch_cuda PUBLIC c10_cuda torch::nvtoolsext)

   target_include_directories(
       torch_cuda INTERFACE $<INSTALL_INTERFACE:include>)
   target_include_directories(
       torch_cuda PRIVATE ${Caffe2_GPU_INCLUDE})
   target_link_libraries(
       torch_cuda PRIVATE ${Caffe2_CUDA_DEPENDENCY_LIBS})
   if(USE_CUDNN)
     target_link_libraries(
         torch_cuda PRIVATE  caffe2::cudnn-private)
   endif()

   # These public dependencies must go after the previous dependencies, as the
   # order of the libraries in the linker call matters here when statically
   # linking; libculibos and cublas must be last.
   target_link_libraries(torch_cuda PUBLIC torch_cpu_library ${Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS})
 endif()

 # ---[ Metal(OSX) modification
 if(APPLE AND USE_PYTORCH_METAL)
   if(NOT INTERN_BUILD_MOBILE)
     include(../cmake/Metal.cmake)
     # We need to link the system frameworks explicitly
     find_library(metal NAMES Metal)
     find_library(mps NAMES MetalPerformanceShaders)
     find_library(foundation NAMES Foundation)
     find_library(accelerate NAMES Accelerate)
     target_link_libraries(torch_cpu PUBLIC ${metal} ${mps} ${foundation} ${accelerate})
   endif()
 endif()


 target_link_libraries(torch_cpu PRIVATE flatbuffers)

 # Note [Global dependencies]
 # Some libraries (e.g. OpenMPI) like to dlopen plugins after they're initialized,
 # and they assume that all of their symbols will be available in the global namespace.
 # On the other hand we try to be good citizens and avoid polluting the symbol
 # namespaces, so libtorch is loaded with all its dependencies in a local scope.
 # That usually leads to missing symbol errors at run-time, so to avoid a situation like
 # this we have to preload those libs in a global namespace.
 if(BUILD_SHARED_LIBS)
   add_library(torch_global_deps SHARED ${TORCH_SRC_DIR}/csrc/empty.c)
   if(HAVE_SOVERSION)
     set_target_properties(torch_global_deps PROPERTIES
         VERSION ${TORCH_VERSION} SOVERSION ${TORCH_SOVERSION})
   endif()
   set_target_properties(torch_global_deps PROPERTIES LINKER_LANGUAGE C)
   if(USE_MPI)
       target_link_libraries(torch_global_deps ${MPI_CXX_LIBRARIES})
   endif()
   target_link_libraries(torch_global_deps ${MKL_LIBRARIES})
   # The CUDA libraries are linked here for a different reason: in some
   # cases we load these libraries with ctypes, and if they weren't opened
   # with RTLD_GLOBAL, we'll do the "normal" search process again (and
   # not find them, because they're usually in non-standard locations)
   if(USE_CUDA)
     target_link_libraries(torch_global_deps ${Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS})
     target_link_libraries(torch_global_deps torch::cudart torch::nvtoolsext)
   endif()
   if(USE_TBB)
     target_link_libraries(torch_global_deps TBB::tbb)
   endif()

   install(TARGETS torch_global_deps DESTINATION "${TORCH_INSTALL_LIB_DIR}")
 endif()

 # ---[ Caffe2 HIP sources.
 if(USE_ROCM)
   # Call again since Caffe2_HIP_INCLUDE is extended with ATen include dirs.
   # Get Compile Definitions from the directory (FindHIP.cmake bug)
   get_directory_property(MY_DEFINITIONS COMPILE_DEFINITIONS)
   if(MY_DEFINITIONS)
     foreach(_item ${MY_DEFINITIONS})
       list(APPEND HIP_CLANG_FLAGS "-D${_item}")
     endforeach()
   endif()

   # Call again since Caffe2_HIP_INCLUDE is extended with ATen include dirs.
   hip_include_directories(${Caffe2_HIP_INCLUDE})

   # Since PyTorch files contain HIP headers, these flags are required for the necessary definitions to be added.
   target_compile_options(torch_hip PUBLIC ${HIP_CXX_FLAGS})  # experiment
   target_link_libraries(torch_hip PUBLIC c10_hip)

   if(NOT INTERN_BUILD_MOBILE)
     # TODO: Cut this over to ATEN_HIP_FILES_GEN_LIB.  At the moment, we
     # only generate CUDA files
     # NB: This dependency must be PRIVATE, because we don't install
     # ATEN_CUDA_FILES_GEN_LIB (it's a synthetic target just to get the
     # correct dependency from generated files.)
     target_link_libraries(torch_hip PRIVATE ATEN_CUDA_FILES_GEN_LIB)
   endif()
   target_link_libraries(torch_hip PUBLIC torch_cpu_library ${Caffe2_PUBLIC_HIP_DEPENDENCY_LIBS})
   target_link_libraries(torch_hip PRIVATE ${Caffe2_HIP_DEPENDENCY_LIBS})

   # Since PyTorch files contain HIP headers, this is also needed to capture the includes.
   target_include_directories(torch_hip PRIVATE ${Caffe2_HIP_INCLUDE})
   target_include_directories(torch_hip INTERFACE $<INSTALL_INTERFACE:include>)
 endif()

 if(BUILD_STATIC_RUNTIME_BENCHMARK)
   add_subdirectory(${TORCH_ROOT}/benchmarks/static_runtime ${PROJECT_BINARY_DIR}/bin)
   add_executable(static_runtime_bench "${STATIC_RUNTIME_BENCHMARK_SRCS}")
   add_executable(static_runtime_test "${STATIC_RUNTIME_TEST_SRCS}")
   target_link_libraries(static_runtime_bench torch_library benchmark)
   target_link_libraries(static_runtime_test torch_library gtest_main)
 endif()

 if(BUILD_TENSOREXPR_BENCHMARK)
   add_subdirectory(${TORCH_ROOT}/benchmarks/cpp/tensorexpr ${CMAKE_BINARY_DIR}/tensorexpr_bench)
 endif()

 if(BUILD_NVFUSER_BENCHMARK)
   add_subdirectory(${TORCH_ROOT}/benchmarks/cpp/nvfuser ${CMAKE_BINARY_DIR}/nvfuser_bench)
 endif()

 if(BUILD_CPP_BENCHMARKS)
   add_subdirectory(${TORCH_ROOT}/benchmarks/cpp ${PROJECT_BINARY_DIR}/bin)
 endif()

 if(BUILD_MOBILE_BENCHMARK)
   foreach(benchmark_src ${ATen_MOBILE_BENCHMARK_SRCS})
     get_filename_component(benchmark_name ${benchmark_src} NAME_WE)
     add_executable(${benchmark_name} "${benchmark_src}")
     target_link_libraries(${benchmark_name} torch_library benchmark)
     target_include_directories(${benchmark_name} PRIVATE $<INSTALL_INTERFACE:include>)
     target_include_directories(${benchmark_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
     target_include_directories(${benchmark_name} PRIVATE ${ATen_CPU_INCLUDE})
     target_link_options(${benchmark_name} PRIVATE "LINKER:--allow-multiple-definition")
   endforeach()
 endif()

 if(BUILD_MOBILE_TEST)
   foreach(test_src ${ATen_MOBILE_TEST_SRCS})
     get_filename_component(test_name ${test_src} NAME_WE)
     add_executable(${test_name} "${test_src}")
     target_link_libraries(${test_name} torch_library gtest_main)
     target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
     target_include_directories(${test_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
     target_include_directories(${test_name} PRIVATE ${ATen_CPU_INCLUDE})
     add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
   endforeach()
 endif()

 # ---[ Test binaries.
 if(BUILD_TEST)

   foreach(test_src ${ATen_VEC_TEST_SRCS})
     foreach(i RANGE ${NUM_CPU_CAPABILITY_NAMES})
         get_filename_component(test_name ${test_src} NAME_WE)
         list(GET CPU_CAPABILITY_NAMES ${i} CPU_CAPABILITY)
         list(GET CPU_CAPABILITY_FLAGS ${i} FLAGS)
         separate_arguments(FLAGS UNIX_COMMAND "${FLAGS}")
         # Build vec with minimal dependencies on all platforms but Windows
         if(NOT MSVC)
           add_executable(${test_name}_${CPU_CAPABILITY} "${test_src}" ../aten/src/ATen/native/quantized/AffineQuantizerBase.cpp)
           # TODO: Get rid of c10 dependency (which is only needed for the implementation of AT_ERROR)
           target_link_libraries(${test_name}_${CPU_CAPABILITY} c10 sleef gtest_main)
           if(USE_FBGEMM)
             target_link_libraries(${test_name}_${CPU_CAPABILITY} fbgemm)
           endif()
         else()
           add_executable(${test_name}_${CPU_CAPABILITY} "${test_src}")
           target_link_libraries(${test_name}_${CPU_CAPABILITY} torch_library gtest_main)
         endif()
         target_include_directories(${test_name}_${CPU_CAPABILITY} PRIVATE $<INSTALL_INTERFACE:include>)
         target_include_directories(${test_name}_${CPU_CAPABILITY} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
         target_include_directories(${test_name}_${CPU_CAPABILITY} PRIVATE ${ATen_CPU_INCLUDE})
         target_compile_definitions(${test_name}_${CPU_CAPABILITY} PRIVATE CPU_CAPABILITY=${CPU_CAPABILITY}  CPU_CAPABILITY_${CPU_CAPABILITY})
         target_compile_options(${test_name}_${CPU_CAPABILITY} PRIVATE  ${FLAGS})
         if(NOT MSVC)
               target_compile_options(${test_name}_${CPU_CAPABILITY} PRIVATE -Wno-ignored-qualifiers)
         endif(NOT MSVC)
         add_test(NAME ${test_name}_${CPU_CAPABILITY} COMMAND $<TARGET_FILE:${test_name}_${CPU_CAPABILITY}>)
     endforeach()
   endforeach()

   foreach(test_src ${Caffe2_CPU_TEST_SRCS})
     get_filename_component(test_name ${test_src} NAME_WE)
     add_executable(${test_name} "${test_src}")
     target_link_libraries(${test_name} torch_library gtest_main)
     if(USE_OPENMP)
       # -fopenmp is a compile time flag and as result not guaranteed
       # to link executable against OpenMP runtime library
       target_link_libraries(${test_name} ${OpenMP_CXX_LIBRARIES})
     endif()
     target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
     target_include_directories(${test_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
     target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
     if(NOT MSVC)
       target_compile_options(${test_name} PRIVATE -Wno-unused-variable)
     endif()
     add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
     if(INSTALL_TEST)
       install(TARGETS ${test_name} DESTINATION test)
       # Install PDB files for MSVC builds
       if(MSVC AND BUILD_SHARED_LIBS)
         install(FILES $<TARGET_PDB_FILE:${test_name}> DESTINATION test OPTIONAL)
       endif()
     endif()
   endforeach()

   if(USE_MPS)
     foreach(test_src ${Caffe2_MPS_TEST_SRCS})
       get_filename_component(test_name ${test_src} NAME_WE)
       add_executable(${test_name} "${test_src}")
       target_link_libraries(${test_name} torch_library gtest_main)
       target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
       target_include_directories(${test_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
       target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
       add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
       if(INSTALL_TEST)
         install(TARGETS ${test_name} DESTINATION test)
         # Install PDB files for MSVC builds
         if(MSVC AND BUILD_SHARED_LIBS)
           install(FILES $<TARGET_PDB_FILE:${test_name}> DESTINATION test OPTIONAL)
         endif()
       endif()
     endforeach()
   endif()

   if(USE_CUDA)
     foreach(test_src ${Caffe2_GPU_TEST_SRCS})
       get_filename_component(test_name ${test_src} NAME_WE)
       add_executable(${test_name} "${test_src}")
       target_link_libraries(${test_name} torch_library gtest_main)
       target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
       target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
       add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
       if(INSTALL_TEST)
         install(TARGETS ${test_name} DESTINATION test)
         # Install PDB files for MSVC builds
         if(MSVC AND BUILD_SHARED_LIBS)
           install(FILES $<TARGET_PDB_FILE:${test_name}> DESTINATION test OPTIONAL)
         endif()
       endif()
     endforeach()
   endif()

   if(USE_VULKAN)
     foreach(test_src ${Caffe2_VULKAN_TEST_SRCS})
       get_filename_component(test_name ${test_src} NAME_WE)
       add_executable(${test_name} "${test_src}")
       target_link_libraries(${test_name} torch_library gtest_main)
       target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
       target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
       add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
       if(INSTALL_TEST)
         install(TARGETS ${test_name} DESTINATION test)
         # Install PDB files for MSVC builds
         if(MSVC AND BUILD_SHARED_LIBS)
           install(FILES $<TARGET_PDB_FILE:${test_name}> DESTINATION test OPTIONAL)
         endif()
       endif()
     endforeach()
   endif()

   if(USE_ROCM)
     foreach(test_src ${Caffe2_HIP_TEST_SRCS})
       get_filename_component(test_name ${test_src} NAME_WE)
       add_executable(${test_name} "${test_src}")
       target_link_libraries(${test_name} torch_library gtest_main)
       target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
       target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE} ${Caffe2_HIP_INCLUDE})
       target_compile_options(${test_name} PRIVATE ${HIP_CXX_FLAGS})
       add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
       if(INSTALL_TEST)
         install(TARGETS ${test_name} DESTINATION test)
       endif()
     endforeach()
   endif()

   # For special tests that explicitly uses dependencies, we add them here
   if(BUILD_CAFFE2 AND USE_MPI)
     target_link_libraries(mpi_test ${MPI_CXX_LIBRARIES})
     if(USE_CUDA)
       target_link_libraries(mpi_gpu_test ${MPI_CXX_LIBRARIES})
     endif()
   endif()
 endif()

 # Note: we only install the caffe2 python files if BUILD_CAFFE2_OPS is ON
 # This is because the build rules here written in such a way that they always
 # appear to need to be re-run generating >600 pieces of work during the pytorch
 # rebuild step. The long-term fix should be to clean up these rules so they
 # only rerun when needed.

 if(BUILD_PYTHON)
   # Python site-packages
   # Get canonical directory for python site packages (relative to install
   # location).  It varies from system to system.
   # We should pin the path separator to the forward slash on Windows.
   # More details can be seen at
   # https://github.com/pytorch/pytorch/tree/master/tools/build_pytorch_libs.bat#note-backslash-munging-on-windows
   pycmd(PYTHON_SITE_PACKAGES "
       import os
       import sysconfig
       relative_site_packages = sysconfig.get_path('purelib').replace(sysconfig.get_path('data'), '').lstrip(os.path.sep)
       print(relative_site_packages)
   ")
   file(TO_CMAKE_PATH ${PYTHON_SITE_PACKAGES} PYTHON_SITE_PACKAGES)
   set(PYTHON_SITE_PACKAGES ${PYTHON_SITE_PACKAGES} PARENT_SCOPE) # for Summary
   # ---[ Options.
   set(PYTHON_LIB_REL_PATH "${PYTHON_SITE_PACKAGES}" CACHE STRING "Python installation path (relative to CMake installation prefix)")
   message(STATUS "Using ${PYTHON_LIB_REL_PATH} as python relative installation path")
   # Python extension suffix
   # Try to get from python through sysconfig.get_env_var('EXT_SUFFIX') first,
   # fallback to ".pyd" if windows and ".so" for all others.
   pycmd(PY_EXT_SUFFIX "
       def get_ext_suffix():
           import sys
           if sys.version_info < (3, 8) and sys.platform == 'win32':
               # Workaround for https://bugs.python.org/issue39825
               import _imp
               return _imp.extension_suffixes()[0]
           else:
               import sysconfig
               return sysconfig.get_config_var('EXT_SUFFIX')

       suffix = get_ext_suffix()
       if suffix is not None:
           print(suffix)
       else:
           print()
   ")
   if("${PY_EXT_SUFFIX}" STREQUAL "")
     if(MSVC)
       set(PY_EXT_SUFFIX ".pyd")
     else()
       set(PY_EXT_SUFFIX ".so")
     endif()
   endif()

   if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
     # Workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80947 in EmbeddingBag.cpp
     set_source_files_properties(../aten/src/ATen/native/EmbeddingBag.cpp PROPERTIES COMPILE_FLAGS -Wno-attributes)
     set_source_files_properties(${TORCH_SRC_DIR}/../caffe2/operators/box_with_nms_limit_op.cc PROPERTIES COMPILE_FLAGS -Wno-attributes)
   endif()
   # Allow different install locations for libcaffe2
   # For setuptools installs (that all build Python), install libcaffe2 into
   # site-packages, alongside the torch libraries. The pybind11 library needs
   # an rpath to the torch library folder
   # For cmake installs, including c++ only installs, install libcaffe2 into
   # CMAKE_INSTALL_PREFIX/lib . The pybind11 library can have a hardcoded
   # rpath
   set(caffe2_pybind11_rpath "${_rpath_portable_origin}")
   if(${BUILDING_WITH_TORCH_LIBS})
     # site-packages/caffe2/python/caffe2_pybind11_state
     # site-packages/torch/lib
     set(caffe2_pybind11_rpath "${_rpath_portable_origin}/../../torch/lib")
   endif(${BUILDING_WITH_TORCH_LIBS})

   # Must also include `CMAKE_SHARED_LINKER_FLAGS` in linker flags for
   # `caffe2_pybind11_state_*` targets because paths to required libraries may
   # need to be found there (e.g., specifying path to `libiomp5` with `LDFLAGS`).
   set(_caffe2_pybind11_state_linker_flags "${CMAKE_SHARED_LINKER_FLAGS}")
   if(APPLE)
     set(_caffe2_pybind11_state_linker_flags "${_caffe2_pybind11_state_linker_flags} -undefined dynamic_lookup")
   endif()

   # ---[ Python.
   if(BUILD_CAFFE2)
   add_library(caffe2_pybind11_state MODULE ${Caffe2_CPU_PYTHON_SRCS})
   target_compile_definitions(torch PRIVATE BUILD_CAFFE2)
   target_compile_definitions(torch_python PRIVATE BUILD_CAFFE2)
   if(USE_NUMPY)
     target_compile_options(caffe2_pybind11_state PRIVATE "-DUSE_NUMPY")
     target_link_libraries(caffe2_pybind11_state  PRIVATE numpy::numpy)
   endif()
   if(NOT MSVC)
     set_target_properties(caffe2_pybind11_state PROPERTIES COMPILE_FLAGS "-fvisibility=hidden")
   endif()
   torch_set_target_props(caffe2_pybind11_state)
   set_target_properties(caffe2_pybind11_state PROPERTIES PREFIX "" DEBUG_POSTFIX "")
   set_target_properties(caffe2_pybind11_state PROPERTIES SUFFIX ${PY_EXT_SUFFIX})
   set_target_properties(caffe2_pybind11_state PROPERTIES LINK_FLAGS "${_caffe2_pybind11_state_linker_flags}")
   target_include_directories(caffe2_pybind11_state PRIVATE $<INSTALL_INTERFACE:include>)
   target_include_directories(caffe2_pybind11_state PRIVATE ${Caffe2_CPU_INCLUDE})

   target_link_libraries(caffe2_pybind11_state PRIVATE
       torch_library python::python pybind::pybind11)
   if(USE_MKLDNN)
       target_link_libraries(caffe2_pybind11_state PRIVATE caffe2::mkldnn)
   endif()
   if(WIN32)
     target_link_libraries(caffe2_pybind11_state PRIVATE onnx_proto)
   endif(WIN32)

   # Install caffe2_pybind11_state(_gpu|hip) in site-packages/caffe2/python,
   # so it needs an rpath to find libcaffe2
   set_target_properties(
       caffe2_pybind11_state PROPERTIES LIBRARY_OUTPUT_DIRECTORY
       ${CMAKE_BINARY_DIR}/caffe2/python)
   install(TARGETS caffe2_pybind11_state DESTINATION "${PYTHON_LIB_REL_PATH}/caffe2/python")
   if(MSVC AND BUILD_SHARED_LIBS)
     install(FILES $<TARGET_PDB_FILE:caffe2_pybind11_state> DESTINATION "${PYTHON_LIB_REL_PATH}/caffe2/python" OPTIONAL)
   endif()
   set_target_properties(caffe2_pybind11_state PROPERTIES INSTALL_RPATH "${caffe2_pybind11_rpath}")

   if(USE_CUDA)
     add_library(caffe2_pybind11_state_gpu MODULE ${Caffe2_GPU_PYTHON_SRCS})
     if(USE_NUMPY)
       target_compile_options(caffe2_pybind11_state_gpu PRIVATE "-DUSE_NUMPY")
       target_link_libraries(caffe2_pybind11_state_gpu PRIVATE numpy::numpy)
     endif()
     if(NOT MSVC)
       set_target_properties(caffe2_pybind11_state_gpu PROPERTIES COMPILE_FLAGS "-fvisibility=hidden")
     endif()
     torch_set_target_props(caffe2_pybind11_state_gpu)
     set_target_properties(caffe2_pybind11_state_gpu PROPERTIES PREFIX "" DEBUG_POSTFIX "")
     set_target_properties(caffe2_pybind11_state_gpu PROPERTIES SUFFIX ${PY_EXT_SUFFIX})
     set_target_properties(caffe2_pybind11_state_gpu PROPERTIES LINK_FLAGS "${_caffe2_pybind11_state_linker_flags}")
     target_include_directories(caffe2_pybind11_state_gpu PRIVATE $<INSTALL_INTERFACE:include>)
     target_include_directories(caffe2_pybind11_state_gpu PRIVATE ${Caffe2_CPU_INCLUDE})
     target_link_libraries(caffe2_pybind11_state_gpu PRIVATE
         torch_library python::python pybind::pybind11)
     if(WIN32)
       target_link_libraries(caffe2_pybind11_state_gpu PRIVATE onnx_proto)
     endif(WIN32)

     # Install with same rpath as non-gpu caffe2_pybind11_state
     set_target_properties(
         caffe2_pybind11_state_gpu PROPERTIES LIBRARY_OUTPUT_DIRECTORY
         ${CMAKE_BINARY_DIR}/caffe2/python)
     install(TARGETS caffe2_pybind11_state_gpu DESTINATION "${PYTHON_LIB_REL_PATH}/caffe2/python")
     if(MSVC AND BUILD_SHARED_LIBS)
       install(FILES $<TARGET_PDB_FILE:caffe2_pybind11_state_gpu> DESTINATION "${PYTHON_LIB_REL_PATH}/caffe2/python" OPTIONAL)
     endif()
     set_target_properties(caffe2_pybind11_state_gpu PROPERTIES INSTALL_RPATH "${caffe2_pybind11_rpath}")
   endif()

   if(USE_ROCM)
     add_library(caffe2_pybind11_state_hip MODULE ${Caffe2_HIP_PYTHON_SRCS})
     if(USE_NUMPY)
       target_compile_options(caffe2_pybind11_state_hip PRIVATE "-DUSE_NUMPY")
       target_link_libraries(caffe2_pybind11_state_hip PRIVATE numpy::numpy)
     endif()
     if(NOT MSVC)
       target_compile_options(caffe2_pybind11_state_hip PRIVATE ${HIP_CXX_FLAGS} -fvisibility=hidden)
     endif()
     torch_set_target_props(caffe2_pybind11_state_hip)
     set_target_properties(caffe2_pybind11_state_hip PROPERTIES PREFIX "")
     set_target_properties(caffe2_pybind11_state_hip PROPERTIES SUFFIX ${PY_EXT_SUFFIX})
     set_target_properties(caffe2_pybind11_state_hip PROPERTIES LINK_FLAGS "${_caffe2_pybind11_state_linker_flags}")
     target_include_directories(caffe2_pybind11_state_hip PRIVATE $<INSTALL_INTERFACE:include>)
     target_include_directories(caffe2_pybind11_state_hip PRIVATE ${Caffe2_CPU_INCLUDE} ${Caffe2_HIP_INCLUDE})
     target_link_libraries(caffe2_pybind11_state_hip PRIVATE
         torch_library python::python pybind::pybind11)

     # Install with same rpath as non-hip caffe2_pybind11_state
     set_target_properties(
         caffe2_pybind11_state_hip PROPERTIES LIBRARY_OUTPUT_DIRECTORY
         ${CMAKE_BINARY_DIR}/caffe2/python)
     install(TARGETS caffe2_pybind11_state_hip DESTINATION "${PYTHON_LIB_REL_PATH}/caffe2/python")
     set_target_properties(caffe2_pybind11_state_hip PROPERTIES INSTALL_RPATH "${caffe2_pybind11_rpath}")
   endif()

   if(MSVC AND CMAKE_GENERATOR MATCHES "Visual Studio")
     # If we are building under windows, we will copy the file from
     # build/caffe2/python/{Debug,Release}/caffe2_pybind11_state.pyd
     # to its parent folder so that we can do in-build execution.
     add_custom_target(windows_python_copy_lib ALL)
     add_dependencies(windows_python_copy_lib caffe2_pybind11_state)
     add_custom_command(
         TARGET windows_python_copy_lib POST_BUILD
         COMMAND ${CMAKE_COMMAND} -E copy
         $<TARGET_FILE:caffe2_pybind11_state>
         ${CMAKE_BINARY_DIR}/caffe2/python)
     if(USE_CUDA)
       add_dependencies(windows_python_copy_lib caffe2_pybind11_state_gpu)
       add_custom_command(
           TARGET windows_python_copy_lib POST_BUILD
           COMMAND ${CMAKE_COMMAND} -E copy
           $<TARGET_FILE:caffe2_pybind11_state_gpu>
           ${CMAKE_BINARY_DIR}/caffe2/python)
     endif()
     if(USE_ROCM)
       add_dependencies(windows_python_copy_lib caffe2_pybind11_state_hip)
       add_custom_command(
           TARGET windows_python_copy_lib POST_BUILD
           COMMAND ${CMAKE_COMMAND} -E copy
           $<TARGET_FILE:caffe2_pybind11_state_hip>
           ${CMAKE_BINARY_DIR}/caffe2/python)
     endif()
   endif()

   # Finally, Copy all python files to build directory
   # Create a custom target that copies all python files.
   file(GLOB_RECURSE PYTHON_SRCS RELATIVE ${PROJECT_SOURCE_DIR}
        "${PROJECT_SOURCE_DIR}/caffe2/*.py")
   endif()

   # generated pb files are copied from build/caffe2 to caffe2
   # if we copied them back to build this would create a build cycle
   # consider removing the need for globs
   filter_list_exclude(PYTHON_SRCS PYTHON_SRCS "proto/.*_pb")

   set(build_files)
   foreach(python_src ${PYTHON_SRCS})
     add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/${python_src}
                        DEPENDS ${PROJECT_SOURCE_DIR}/${python_src}
                        COMMAND ${CMAKE_COMMAND} -E copy
                        ${PROJECT_SOURCE_DIR}/${python_src}
                        ${CMAKE_BINARY_DIR}/${python_src})
     list(APPEND build_files ${CMAKE_BINARY_DIR}/${python_src})
   endforeach()

   add_custom_target(python_copy_files ALL DEPENDS ${build_files})


   # Install commands
   # Pick up static python files
   install(DIRECTORY ${CMAKE_BINARY_DIR}/caffe2 DESTINATION ${PYTHON_LIB_REL_PATH}
           FILES_MATCHING PATTERN "*.py")
   # Caffe proto files
   install(DIRECTORY ${CMAKE_BINARY_DIR}/caffe DESTINATION ${PYTHON_LIB_REL_PATH}
           FILES_MATCHING PATTERN "*.py")
   # Caffe2 proto files
   install(DIRECTORY ${CMAKE_BINARY_DIR}/caffe2 DESTINATION ${PYTHON_LIB_REL_PATH}
           FILES_MATCHING PATTERN "*.py")
 endif()