BUILD.bazel - platform/external/pytorch - Git at Google

 load("@bazel_skylib//lib:paths.bzl", "paths")
 load("@pybind11_bazel//:build_defs.bzl", "pybind_extension")
 load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library", "cc_test")
 load("@rules_python//python:defs.bzl", "py_library", "py_test")
 load("@pytorch//third_party:substitution.bzl", "header_template_rule", "template_rule")
 load("@pytorch//:tools/bazel.bzl", "rules")
 load("@pytorch//tools/rules:cu.bzl", "cu_library")
 load("@pytorch//tools/config:defs.bzl", "if_cuda")
 load("@pytorch//:aten.bzl", "generate_aten", "intern_build_aten_ops")
 load(":build.bzl", "GENERATED_AUTOGRAD_CPP", "GENERATED_AUTOGRAD_PYTHON", "define_targets")
 load(":build_variables.bzl", "jit_core_sources", "lazy_tensor_ts_sources", "libtorch_core_sources", "libtorch_cuda_sources", "libtorch_distributed_sources", "libtorch_extra_sources", "libtorch_python_core_sources", "torch_cpp_srcs", "libtorch_python_cuda_sources", "libtorch_python_distributed_sources")
 load(":ufunc_defs.bzl", "aten_ufunc_generated_cpu_kernel_sources", "aten_ufunc_generated_cpu_sources", "aten_ufunc_generated_cuda_sources")
 load("//:tools/bazel.bzl", "rules")

 define_targets(rules = rules)

 COMMON_COPTS = [
     "-DHAVE_MALLOC_USABLE_SIZE=1",
     "-DHAVE_MMAP=1",
     "-DHAVE_SHM_OPEN=1",
     "-DHAVE_SHM_UNLINK=1",
     "-D_FILE_OFFSET_BITS=64",
     "-DUSE_FBGEMM",
     "-DUSE_DISTRIBUTED",
     "-DAT_PER_OPERATOR_HEADERS",
     "-DATEN_THREADING=NATIVE",
     "-DNO_CUDNN_DESTROY_HANDLE",
 ] + if_cuda([
     "-DUSE_CUDA",
     "-DUSE_CUDNN",
     # TODO: This should be passed only when building for CUDA-11.5 or newer
     # use cub in a safe manner, see:
     # https://github.com/pytorch/pytorch/pull/55292
     "-DCUB_WRAPPED_NAMESPACE=at_cuda_detail",
 ])

 aten_generation_srcs = ["aten/src/ATen/native/native_functions.yaml"] + ["aten/src/ATen/native/tags.yaml"] + glob(["aten/src/ATen/templates/**"])

 generated_cpu_cpp = [
     "aten/src/ATen/RegisterBackendSelect.cpp",
     "aten/src/ATen/RegisterCPU.cpp",
     "aten/src/ATen/RegisterFunctionalization_0.cpp",
     "aten/src/ATen/RegisterFunctionalization_1.cpp",
     "aten/src/ATen/RegisterFunctionalization_2.cpp",
     "aten/src/ATen/RegisterFunctionalization_3.cpp",
     # "aten/src/ATen/RegisterFunctionalizationEverything.cpp",
     "aten/src/ATen/RegisterMkldnnCPU.cpp",
     "aten/src/ATen/RegisterNestedTensorCPU.cpp",
     "aten/src/ATen/RegisterQuantizedCPU.cpp",
     "aten/src/ATen/RegisterSparseCPU.cpp",
     "aten/src/ATen/RegisterSparseCsrCPU.cpp",
     "aten/src/ATen/RegisterZeroTensor.cpp",
     "aten/src/ATen/RegisterCompositeImplicitAutograd.cpp",
     "aten/src/ATen/RegisterCompositeImplicitAutogradNestedTensor.cpp",
     "aten/src/ATen/RegisterCompositeExplicitAutograd.cpp",
     "aten/src/ATen/RegisterCompositeExplicitAutogradNonFunctional.cpp",
     "aten/src/ATen/RegisterMeta.cpp",
     "aten/src/ATen/RegisterSparseMeta.cpp",
     "aten/src/ATen/RegisterQuantizedMeta.cpp",
     "aten/src/ATen/RegisterNestedTensorMeta.cpp",
     "aten/src/ATen/RegisterSchema.cpp",
     "aten/src/ATen/CPUFunctions.h",
     "aten/src/ATen/CPUFunctions_inl.h",
     "aten/src/ATen/CompositeExplicitAutogradFunctions.h",
     "aten/src/ATen/CompositeExplicitAutogradFunctions_inl.h",
     "aten/src/ATen/CompositeExplicitAutogradNonFunctionalFunctions.h",
     "aten/src/ATen/CompositeExplicitAutogradNonFunctionalFunctions_inl.h",
     "aten/src/ATen/CompositeImplicitAutogradFunctions.h",
     "aten/src/ATen/CompositeImplicitAutogradFunctions_inl.h",
     "aten/src/ATen/CompositeImplicitAutogradNestedTensorFunctions.h",
     "aten/src/ATen/CompositeImplicitAutogradNestedTensorFunctions_inl.h",
     "aten/src/ATen/CompositeViewCopyKernels.cpp",
     "aten/src/ATen/FunctionalInverses.h",
     "aten/src/ATen/Functions.h",
     "aten/src/ATen/Functions.cpp",
     "aten/src/ATen/RedispatchFunctions.h",
     "aten/src/ATen/Operators.h",
     "aten/src/ATen/Operators_0.cpp",
     "aten/src/ATen/Operators_1.cpp",
     "aten/src/ATen/Operators_2.cpp",
     "aten/src/ATen/Operators_3.cpp",
     "aten/src/ATen/Operators_4.cpp",
     "aten/src/ATen/NativeFunctions.h",
     "aten/src/ATen/MetaFunctions.h",
     "aten/src/ATen/MetaFunctions_inl.h",
     "aten/src/ATen/MethodOperators.h",
     "aten/src/ATen/NativeMetaFunctions.h",
     "aten/src/ATen/RegistrationDeclarations.h",
     "aten/src/ATen/VmapGeneratedPlumbing.h",
     "aten/src/ATen/core/aten_interned_strings.h",
     "aten/src/ATen/core/enum_tag.h",
     "aten/src/ATen/core/TensorBody.h",
     "aten/src/ATen/core/TensorMethods.cpp",
     "aten/src/ATen/core/ATenOpList.cpp",
 ]

 generated_cuda_cpp = [
     "aten/src/ATen/CUDAFunctions.h",
     "aten/src/ATen/CUDAFunctions_inl.h",
     "aten/src/ATen/RegisterCUDA.cpp",
     "aten/src/ATen/RegisterNestedTensorCUDA.cpp",
     "aten/src/ATen/RegisterQuantizedCUDA.cpp",
     "aten/src/ATen/RegisterSparseCUDA.cpp",
     "aten/src/ATen/RegisterSparseCsrCUDA.cpp",
 ]

 generate_aten(
     name = "generated_aten_cpp",
     srcs = aten_generation_srcs,
     outs = (
         generated_cpu_cpp +
         generated_cuda_cpp +
         aten_ufunc_generated_cpu_sources("aten/src/ATen/{}") +
         aten_ufunc_generated_cpu_kernel_sources("aten/src/ATen/{}") +
         aten_ufunc_generated_cuda_sources("aten/src/ATen/{}") + [
             "aten/src/ATen/Declarations.yaml",
         ]
     ),
     generator = "//torchgen:gen",
 )

 filegroup(
     name = "cpp_generated_code",
     srcs = GENERATED_AUTOGRAD_CPP,
     data = [":generate-code"],
 )

 # ATen
 filegroup(
     name = "aten_base_cpp",
     srcs = glob([
         "aten/src/ATen/*.cpp",
         "aten/src/ATen/functorch/*.cpp",
         "aten/src/ATen/detail/*.cpp",
         "aten/src/ATen/cpu/*.cpp",
     ]),
 )

 filegroup(
     name = "ATen_CORE_SRCS",
     srcs = glob(
         [
             "aten/src/ATen/core/**/*.cpp",
         ],
         exclude = [
             "aten/src/ATen/core/**/*_test.cpp",
         ],
     ),
 )

 filegroup(
     name = "aten_native_cpp",
     srcs = glob(["aten/src/ATen/native/*.cpp"]),
 )

 filegroup(
     name = "aten_native_sparse_cpp",
     srcs = glob(["aten/src/ATen/native/sparse/*.cpp"]),
 )

 filegroup(
     name = "aten_native_nested_cpp",
     srcs = glob(["aten/src/ATen/native/nested/*.cpp"]),
 )

 filegroup(
     name = "aten_native_quantized_cpp",
     srcs = glob(
         [
             "aten/src/ATen/native/quantized/*.cpp",
             "aten/src/ATen/native/quantized/cpu/*.cpp",
         ],
     ),
 )

 filegroup(
     name = "aten_native_transformers_cpp",
     srcs = glob(["aten/src/ATen/native/transformers/*.cpp"]),
 )

 filegroup(
     name = "aten_native_mkl_cpp",
     srcs = glob([
         "aten/src/ATen/native/mkl/*.cpp",
         "aten/src/ATen/mkl/*.cpp",
     ]),
 )

 filegroup(
     name = "aten_native_mkldnn_cpp",
     srcs = glob(["aten/src/ATen/native/mkldnn/*.cpp"]),
 )

 filegroup(
     name = "aten_native_xnnpack",
     srcs = glob(["aten/src/ATen/native/xnnpack/*.cpp"]),
 )

 filegroup(
     name = "aten_base_vulkan",
     srcs = glob(["aten/src/ATen/vulkan/*.cpp"]),
 )

 filegroup(
     name = "aten_base_metal",
     srcs = glob(["aten/src/ATen/metal/*.cpp"]),
 )

 filegroup(
     name = "ATen_QUANTIZED_SRCS",
     srcs = glob(
         [
             "aten/src/ATen/quantized/**/*.cpp",
         ],
         exclude = [
             "aten/src/ATen/quantized/**/*_test.cpp",
         ],
     ),
 )

 filegroup(
     name = "aten_cuda_cpp_srcs",
     srcs = glob(
         [
             "aten/src/ATen/cuda/*.cpp",
             "aten/src/ATen/cuda/detail/*.cpp",
             "aten/src/ATen/cuda/tunable/*.cpp",
             "aten/src/ATen/cudnn/*.cpp",
             "aten/src/ATen/native/cuda/*.cpp",
             "aten/src/ATen/native/cuda/linalg/*.cpp",
             "aten/src/ATen/native/cudnn/*.cpp",
             "aten/src/ATen/native/miopen/*.cpp",
             "aten/src/ATen/native/nested/cuda/*.cpp",
             "aten/src/ATen/native/quantized/cuda/*.cpp",
             "aten/src/ATen/native/quantized/cudnn/*.cpp",
             "aten/src/ATen/native/sparse/cuda/*.cpp",
             "aten/src/ATen/native/transformers/cuda/*.cpp",
         ],
     ),
 )

 filegroup(
     name = "aten_cu_srcs",
     srcs = glob([
         "aten/src/ATen/cuda/*.cu",
         "aten/src/ATen/cuda/detail/*.cu",
         "aten/src/ATen/native/cuda/*.cu",
         "aten/src/ATen/native/nested/cuda/*.cu",
         "aten/src/ATen/native/quantized/cuda/*.cu",
         "aten/src/ATen/native/sparse/cuda/*.cu",
         "aten/src/ATen/native/transformers/cuda/*.cu",
     ]) + aten_ufunc_generated_cuda_sources("aten/src/ATen/{}"),
     # It's a bit puzzling to me why it's not necessary to declare the
     # target that generates these sources...
 )

 header_template_rule(
     name = "aten_src_ATen_config",
     src = "aten/src/ATen/Config.h.in",
     out = "aten/src/ATen/Config.h",
     include = "aten/src",
     substitutions = {
         "@AT_MKLDNN_ENABLED@": "1",
         "@AT_MKLDNN_ACL_ENABLED@": "0",
         "@AT_MKL_ENABLED@": "1",
         "@AT_MKL_SEQUENTIAL@": "0",
         "@AT_POCKETFFT_ENABLED@": "0",
         "@AT_NNPACK_ENABLED@": "0",
         "@CAFFE2_STATIC_LINK_CUDA_INT@": "0",
         "@AT_BUILD_WITH_BLAS@": "1",
         "@AT_BUILD_WITH_LAPACK@": "1",
         "@AT_PARALLEL_OPENMP@": "0",
         "@AT_PARALLEL_NATIVE@": "1",
         "@AT_BLAS_F2C@": "0",
         "@AT_BLAS_USE_CBLAS_DOT@": "1",
     },
 )

 header_template_rule(
     name = "aten_src_ATen_cuda_config",
     src = "aten/src/ATen/cuda/CUDAConfig.h.in",
     out = "aten/src/ATen/cuda/CUDAConfig.h",
     include = "aten/src",
     substitutions = {
         "@AT_CUDNN_ENABLED@": "1",
         "@AT_CUSPARSELT_ENABLED@": "0",
         "@AT_ROCM_ENABLED@": "0",
         "@AT_MAGMA_ENABLED@": "0",
         "@NVCC_FLAGS_EXTRA@": "",
     },
 )

 cc_library(
     name = "aten_headers",
     hdrs = [
         "torch/csrc/Export.h",
         "torch/csrc/jit/frontend/function_schema_parser.h",
     ] + glob(
         [
             "aten/src/**/*.h",
             "aten/src/**/*.hpp",
             "aten/src/ATen/cuda/**/*.cuh",
             "aten/src/ATen/native/**/*.cuh",
             "aten/src/THC/*.cuh",
         ],
     ) + [
         ":aten_src_ATen_config",
         ":generated_aten_cpp",
     ],
     includes = [
         "aten/src",
     ],
     deps = [
         "//c10",
     ],
 )

 ATEN_COPTS = COMMON_COPTS + [
     "-DCAFFE2_BUILD_MAIN_LIBS",
     "-DHAVE_AVX_CPU_DEFINITION",
     "-DHAVE_AVX2_CPU_DEFINITION",
     "-fvisibility-inlines-hidden",
     "-fno-math-errno",
     "-fno-trapping-math",
 ]

 intern_build_aten_ops(
     copts = ATEN_COPTS,
     extra_impls = aten_ufunc_generated_cpu_kernel_sources("aten/src/ATen/{}"),
     deps = [
         ":aten_headers",
         "@fbgemm",
         "@mkl",
         "@sleef",
     ],
 )

 cc_library(
     name = "aten",
     srcs = [
         ":ATen_CORE_SRCS",
         ":ATen_QUANTIZED_SRCS",
         ":aten_base_cpp",
         ":aten_base_metal",
         ":aten_base_vulkan",
         ":aten_native_cpp",
         ":aten_native_mkl_cpp",
         ":aten_native_mkldnn_cpp",
         ":aten_native_nested_cpp",
         ":aten_native_quantized_cpp",
         ":aten_native_sparse_cpp",
         ":aten_native_transformers_cpp",
         ":aten_native_xnnpack",
         ":aten_src_ATen_config",
     ] + generated_cpu_cpp + aten_ufunc_generated_cpu_sources("aten/src/ATen/{}"),
     copts = ATEN_COPTS,
     linkopts = [
       "-ldl",
     ],
     data = if_cuda(
         [":libcaffe2_nvrtc.so"],
         [],
     ),
     visibility = ["//visibility:public"],
     deps = [
         ":ATen_CPU",
         ":aten_headers",
         ":caffe2_for_aten_headers",
         ":torch_headers",
         "@fbgemm",
         "@ideep",
     ],
     alwayslink = True,
 )

 cc_library(
     name = "aten_nvrtc",
     srcs = glob([
         "aten/src/ATen/cuda/nvrtc_stub/*.cpp",
     ]),
     copts = ATEN_COPTS,
     linkstatic = True,
     visibility = ["//visibility:public"],
     deps = [
         ":aten_headers",
         "//c10",
         "@cuda",
         "@cuda//:cuda_driver",
         "@cuda//:nvrtc",
     ],
     alwayslink = True,
 )

 cc_binary(
     name = "libcaffe2_nvrtc.so",
     linkshared = True,
     visibility = ["//visibility:public"],
     deps = [
         ":aten_nvrtc",
     ],
 )

 cc_library(
     name = "aten_cuda_cpp",
     srcs = [":aten_cuda_cpp_srcs"] + generated_cuda_cpp,
     hdrs = [":aten_src_ATen_cuda_config"],
     copts = ATEN_COPTS,
     visibility = ["//visibility:public"],
     deps = [
         ":aten",
         "@cuda",
         "@cuda//:cusolver",
         "@cuda//:nvrtc",
         "@cudnn",
         "@cudnn_frontend",
     ],
     alwayslink = True,
 )

 torch_cuda_half_options = [
     "-DCUDA_HAS_FP16=1",
     "-D__CUDA_NO_HALF_OPERATORS__",
     "-D__CUDA_NO_HALF_CONVERSIONS__",
     "-D__CUDA_NO_BFLOAT16_CONVERSIONS__",
     "-D__CUDA_NO_HALF2_OPERATORS__",
 ]

 cu_library(
     name = "aten_cuda",
     srcs = [":aten_cu_srcs"],
     copts = ATEN_COPTS + torch_cuda_half_options,
     visibility = ["//visibility:public"],
     deps = [
         ":aten_cuda_cpp",
         "//c10/util:bit_cast",
         "@cuda//:cublas",
         "@cuda//:cufft",
         "@cuda//:cusparse",
         "@cutlass",
     ],
     alwayslink = True,
 )

 # caffe2
 CAFFE2_COPTS = COMMON_COPTS + [
     "-Dcaffe2_EXPORTS",
     "-DCAFFE2_USE_CUDNN",
     "-DCAFFE2_BUILD_MAIN_LIB",
     "-fvisibility-inlines-hidden",
     "-fno-math-errno",
     "-fno-trapping-math",
 ]

 filegroup(
     name = "caffe2_core_srcs",
     srcs = [
         "caffe2/core/common.cc",
     ],
 )

 filegroup(
     name = "caffe2_perfkernels_srcs",
     srcs = [
         "caffe2/perfkernels/embedding_lookup_idx.cc",
     ],
 )


 filegroup(
     name = "caffe2_serialize_srcs",
     srcs = [
         "caffe2/serialize/file_adapter.cc",
         "caffe2/serialize/inline_container.cc",
         "caffe2/serialize/istream_adapter.cc",
         "caffe2/serialize/read_adapter_interface.cc",
     ],
 )

 filegroup(
     name = "caffe2_utils_srcs",
     srcs = [
         "caffe2/utils/proto_wrap.cc",
         "caffe2/utils/string_utils.cc",
         "caffe2/utils/threadpool/ThreadPool.cc",
         "caffe2/utils/threadpool/pthreadpool.cc",
         "caffe2/utils/threadpool/pthreadpool_impl.cc",
         "caffe2/utils/threadpool/thread_pool_guard.cpp",
     ],
 )

 # To achieve finer granularity and make debug easier, caffe2 is split into three libraries:
 # ATen, caffe2 and caffe2_for_aten_headers. ATen lib group up source codes under
 # aten/ directory and caffe2 contains most files under `caffe2/` directory. Since the
 # ATen lib and the caffe2 lib would depend on each other, `caffe2_for_aten_headers` is splitted
 # out from `caffe2` to avoid dependency cycle.
 cc_library(
     name = "caffe2_for_aten_headers",
     hdrs = [
         "caffe2/core/common.h",
         "caffe2/perfkernels/common.h",
         "caffe2/perfkernels/embedding_lookup_idx.h",
         "caffe2/utils/fixed_divisor.h",
     ] + glob([
         "caffe2/utils/threadpool/*.h",
     ]),
     copts = CAFFE2_COPTS,
     visibility = ["//visibility:public"],
     deps = [
         ":caffe2_core_macros",
         "//c10",
     ],
 )

 cc_library(
     name = "caffe2_headers",
     hdrs = glob(
         [
             "caffe2/perfkernels/*.h",
             "caffe2/serialize/*.h",
             "caffe2/utils/*.h",
             "caffe2/utils/threadpool/*.h",
             "modules/**/*.h",
         ],
         exclude = [
             "caffe2/core/macros.h",
         ],
     ) + if_cuda(glob([
         "caffe2/**/*.cuh",
     ])),
     copts = CAFFE2_COPTS,
     visibility = ["//visibility:public"],
     deps = [
         ":caffe2_core_macros",
         ":caffe2_for_aten_headers",
     ],
 )

 cc_library(
     name = "caffe2",
     srcs = [
         ":caffe2_core_srcs",
         ":caffe2_perfkernels_srcs",
         ":caffe2_serialize_srcs",
         ":caffe2_utils_srcs",
     ],
     copts = CAFFE2_COPTS + ["-mf16c"],
     linkstatic = 1,
     visibility = ["//visibility:public"],
     deps = [
         ":caffe2_core_macros",
         ":caffe2_headers",
         ":caffe2_perfkernels_avx",
         ":caffe2_perfkernels_avx2",
         "//third_party/miniz-2.1.0:miniz",
         "@com_google_protobuf//:protobuf",
         "@eigen",
         "@fbgemm//:fbgemm_src_headers",
         "@fmt",
         "@onnx",
     ] + if_cuda(
         [
             ":aten_cuda",
             "@tensorpipe//:tensorpipe_cuda",
         ],
         [
             ":aten",
             "@tensorpipe//:tensorpipe_cpu",
         ],
     ),
     alwayslink = True,
 )

 cu_library(
     name = "torch_cuda",
     srcs = [
         "torch/csrc/distributed/c10d/intra_node_comm.cu",
         "torch/csrc/distributed/c10d/NCCLUtils.cu",
         "torch/csrc/distributed/c10d/quantization/quantization_gpu.cu",
     ],
     copts = torch_cuda_half_options,
     visibility = ["//visibility:public"],
     deps = [
         ":aten",
         "@cuda//:cublas",
         "@cuda//:curand",
         "@cudnn",
         "@eigen",
         "@tensorpipe//:tensorpipe_cuda",
     ],
     alwayslink = True,
 )

 PERF_COPTS = [
     "-DHAVE_AVX_CPU_DEFINITION",
     "-DHAVE_AVX2_CPU_DEFINITION",
     "-DENABLE_ALIAS=1",
     "-DHAVE_MALLOC_USABLE_SIZE=1",
     "-DHAVE_MMAP=1",
     "-DHAVE_SHM_OPEN=1",
     "-DHAVE_SHM_UNLINK=1",
     "-DSLEEF_STATIC_LIBS=1",
     "-DTH_BALS_MKL",
     "-D_FILE_OFFSET_BITS=64",
     "-DUSE_FBGEMM",
     "-fvisibility-inlines-hidden",
     "-Wunused-parameter",
     "-fno-math-errno",
     "-fno-trapping-math",
     "-mf16c",
 ]

 PERF_HEADERS = glob([
     "caffe2/perfkernels/*.h",
     "caffe2/core/*.h",
 ])

 cc_library(
     name = "caffe2_perfkernels_avx",
     srcs = glob([
         "caffe2/perfkernels/*_avx.cc",
     ]),
     hdrs = PERF_HEADERS,
     copts = PERF_COPTS + [
         "-mavx",
     ],
     visibility = ["//visibility:public"],
     deps = [
         ":caffe2_headers",
         "//c10",
     ],
     alwayslink = True,
 )

 cc_library(
     name = "caffe2_perfkernels_avx2",
     srcs = glob([
         "caffe2/perfkernels/*_avx2.cc",
     ]),
     hdrs = PERF_HEADERS,
     copts = PERF_COPTS + [
         "-mavx2",
         "-mfma",
         "-mavx",
     ],
     visibility = ["//visibility:public"],
     deps = [
         ":caffe2_headers",
         "//c10",
     ],
     alwayslink = True,
 )

 # torch
 torch_cuda_headers = glob(["torch/csrc/cuda/*.h"])

 cc_library(
     name = "torch_headers",
     hdrs = if_cuda(
         torch_cuda_headers,
     ) + glob(
         [
             "torch/*.h",
             "torch/csrc/**/*.h",
             "torch/csrc/distributed/c10d/**/*.hpp",
             "torch/lib/libshm/*.h",
         ],
         exclude = [
             "torch/csrc/*/generated/*.h",
         ] + torch_cuda_headers,
     ) + GENERATED_AUTOGRAD_CPP + [":version_h"],
     includes = [
         "third_party/kineto/libkineto/include",
         "torch/csrc",
         "torch/csrc/api/include",
         "torch/csrc/distributed",
         "torch/lib",
         "torch/lib/libshm",
     ],
     visibility = ["//visibility:public"],
     deps = [
         ":aten_headers",
         ":caffe2_headers",
         "//c10",
         "@com_github_google_flatbuffers//:flatbuffers",
         "@local_config_python//:python_headers",
         "@onnx",
     ],
     alwayslink = True,
 )

 TORCH_COPTS = COMMON_COPTS + [
     "-Dtorch_EXPORTS",
     "-DHAVE_AVX_CPU_DEFINITION",
     "-DHAVE_AVX2_CPU_DEFINITION",
     "-DCAFFE2_USE_GLOO",
     "-fvisibility-inlines-hidden",
     "-fno-math-errno ",
     "-fno-trapping-math",
     "-Wno-error=unused-function",
 ]

 torch_sources = {
     k: ""
     for k in (
         libtorch_core_sources +
         libtorch_distributed_sources +
         torch_cpp_srcs +
         libtorch_extra_sources +
         jit_core_sources +
         lazy_tensor_ts_sources +
         GENERATED_AUTOGRAD_CPP
     )
 }.keys()

 cc_library(
     name = "torch",
     srcs = if_cuda(glob(
         libtorch_cuda_sources,
         exclude = [
             "torch/csrc/cuda/python_nccl.cpp",
             "torch/csrc/cuda/nccl.cpp",
             "torch/csrc/distributed/c10d/intra_node_comm.cu",
             "torch/csrc/distributed/c10d/CUDASymmetricMemory.cu",
             "torch/csrc/distributed/c10d/CUDASymmetricMemoryOps.cu",
             "torch/csrc/distributed/c10d/NCCLUtils.cu",
             "torch/csrc/distributed/c10d/quantization/quantization_gpu.cu",
         ],
     )) + torch_sources,
     copts = TORCH_COPTS,
     linkopts = [
       "-lrt",
     ],
     defines = [
         "CAFFE2_NIGHTLY_VERSION=20200115",
     ],
     visibility = ["//visibility:public"],
     deps = [
         ":caffe2",
         ":torch_headers",
         "@kineto",
         "@cpp-httplib",
         "@nlohmann",
     ] + if_cuda([
         "@cuda//:nvToolsExt",
         "@cutlass",
         ":torch_cuda",
     ]),
     alwayslink = True,
 )

 cc_library(
     name = "shm",
     srcs = glob(["torch/lib/libshm/*.cpp"]),
     linkopts = [
       "-lrt",
     ],
     deps = [
         ":torch",
     ],
 )

 cc_library(
     name = "libtorch_headers",
     hdrs = glob([
         "**/*.h",
         "**/*.cuh",
     ]) + [
         # We need the filegroup here because the raw list causes Bazel
         # to see duplicate files. It knows how to deduplicate with the
         # filegroup.
         ":cpp_generated_code",
     ],
     includes = [
         "torch/csrc/api/include",
         "torch/csrc/distributed",
         "torch/lib",
         "torch/lib/libshm",
     ],
     visibility = ["//visibility:public"],
     deps = [
         ":torch_headers",
     ],
 )

 cc_library(
     name = "torch_python",
     srcs = libtorch_python_core_sources
         + if_cuda(libtorch_python_cuda_sources)
         + if_cuda(libtorch_python_distributed_sources)
         + GENERATED_AUTOGRAD_PYTHON,
     hdrs = glob([
         "torch/csrc/generic/*.cpp",
     ]),
     copts = COMMON_COPTS + if_cuda(["-DUSE_CUDA=1"]),
     deps = [
         ":torch",
         ":shm",
         "@pybind11",
     ],
 )

 pybind_extension(
     name = "torch/_C",
     srcs = ["torch/csrc/stub.c"],
     deps = [
         ":torch_python",
         ":aten_nvrtc",
     ],
 )

 cc_library(
     name = "functorch",
     hdrs = glob([
         "functorch/csrc/dim/*.h",
     ]),
     srcs = glob([
         "functorch/csrc/dim/*.cpp",
     ]),
     deps = [
         ":aten_nvrtc",
         ":torch_python",
         "@pybind11",
     ],
 )

 pybind_extension(
     name = "functorch/_C",
     copts=[
         "-DTORCH_EXTENSION_NAME=_C"
     ],
     srcs = [
         "functorch/csrc/init_dim_only.cpp",
     ],
     deps = [
         ":functorch",
         ":torch_python",
         ":aten_nvrtc",
     ],
 )

 cc_binary(
     name = "torch/bin/torch_shm_manager",
     srcs = [
         "torch/lib/libshm/manager.cpp",
     ],
     deps = [
         ":shm",
     ],
     linkstatic = False,
 )

 template_rule(
     name = "gen_version_py",
     src = ":torch/version.py.tpl",
     out = "torch/version.py",
     substitutions = if_cuda({
         # Set default to 11.2. Otherwise Torchvision complains about incompatibility.
         "{{CUDA_VERSION}}": "11.2",
         "{{VERSION}}": "2.0.0",
     }, {
         "{{CUDA_VERSION}}": "None",
         "{{VERSION}}": "2.0.0",
     }),
 )

 py_library(
     name = "pytorch_py",
     visibility = ["//visibility:public"],
     srcs = glob(["torch/**/*.py"], exclude = ["torch/version.py"]) + [":torch/version.py"] + glob(["functorch/**/*.py"]),
     deps = [
         rules.requirement("numpy"),
         rules.requirement("pyyaml"),
         rules.requirement("requests"),
         rules.requirement("setuptools"),
         rules.requirement("sympy"),
         rules.requirement("typing_extensions"),
         "//torchgen",
     ],
     data = [
         ":torch/_C.so",
         ":functorch/_C.so",
         ":torch/bin/torch_shm_manager",
     ],
 )

 # cpp api tests
 cc_library(
     name = "test_support",
     testonly = True,
     srcs = [
         "test/cpp/api/support.cpp",
     ],
     hdrs = [
         "test/cpp/api/init_baseline.h",
         "test/cpp/api/optim_baseline.h",
         "test/cpp/api/support.h",
         "test/cpp/common/support.h",
     ],
     deps = [
         ":torch",
         "@com_google_googletest//:gtest_main",
     ],
 )

 # Torch integration tests rely on a labeled data set from the MNIST database.
 # http://yann.lecun.com/exdb/mnist/

 cpp_api_tests = glob(
     ["test/cpp/api/*.cpp"],
     exclude = [
         "test/cpp/api/imethod.cpp",
         "test/cpp/api/integration.cpp",
     ],
 )

 cc_test(
     name = "integration_test",
     size = "medium",
     srcs = ["test/cpp/api/integration.cpp"],
     data = [
         ":download_mnist",
     ],
     tags = [
         "gpu-required",
     ],
     deps = [
         ":test_support",
         "@com_google_googletest//:gtest_main",
     ],
 )

 [
     cc_test(
         name = paths.split_extension(paths.basename(filename))[0].replace("-", "_") + "_test",
         size = "medium",
         srcs = [filename],
         deps = [
             ":test_support",
             "@com_google_googletest//:gtest_main",
         ],
     )
     for filename in cpp_api_tests
 ]

 test_suite(
     name = "api_tests",
     tests = [
         "any_test",
         "autograd_test",
         "dataloader_test",
         "enum_test",
         "expanding_array_test",
         "functional_test",
         "init_test",
         "integration_test",
         "jit_test",
         "memory_test",
         "misc_test",
         "module_test",
         "modulelist_test",
         "modules_test",
         "nn_utils_test",
         "optim_test",
         "ordered_dict_test",
         "rnn_test",
         "sequential_test",
         "serialize_test",
         "static_test",
         "tensor_options_test",
         "tensor_test",
         "torch_include_test",
     ],
 )

 # dist autograd tests
 cc_test(
     name = "torch_dist_autograd_test",
     size = "small",
     srcs = ["test/cpp/dist_autograd/test_dist_autograd.cpp"],
     tags = [
         "exclusive",
         "gpu-required",
     ],
     deps = [
         ":torch",
         "@com_google_googletest//:gtest_main",
     ],
 )

 # jit tests
 # Because these individual unit tests require custom registering,
 # it is easier to mimic the cmake build by globing together a single test.
 cc_test(
     name = "jit_tests",
     size = "small",
     srcs = glob(
         [
             "test/cpp/jit/*.cpp",
             "test/cpp/jit/*.h",
             "test/cpp/tensorexpr/*.cpp",
             "test/cpp/tensorexpr/*.h",
         ],
         exclude = [
             # skip this since <pybind11/embed.h> is not found in OSS build
             "test/cpp/jit/test_exception.cpp",
         ],
     ),
     linkstatic = True,
     tags = [
         "exclusive",
         "gpu-required",
     ],
     deps = [
         ":torch",
         "@com_google_googletest//:gtest_main",
     ],
 )

 cc_test(
     name = "lazy_tests",
     size = "small",
     srcs = glob(
         [
             "test/cpp/lazy/*.cpp",
             "test/cpp/lazy/*.h",
         ],
         exclude = [
             # skip these since they depend on generated LazyIr.h which isn't available in bazel yet
             "test/cpp/lazy/test_ir.cpp",
             "test/cpp/lazy/test_lazy_ops.cpp",
             "test/cpp/lazy/test_lazy_ops_util.cpp",
         ],
     ),
     linkstatic = True,
     tags = [
         "exclusive",
     ],
     deps = [
         ":torch",
         "@com_google_googletest//:gtest_main",
     ],
 )

 # python api tests

 py_test(
     name = "test_bazel",
     srcs = ["test/_test_bazel.py"],
     main = "test/_test_bazel.py",
     deps = [":pytorch_py"],
 )

 # all tests
 test_suite(
     name = "all_tests",
     tests = [
         "api_tests",
         "jit_tests",
         "torch_dist_autograd_test",
         "//c10/test:tests",
     ],
 )

 # An internal genrule that we are converging with refers to these file
 # as if they are from this package, so we alias them for
 # compatibility.

 [
     alias(
         name = paths.basename(path),
         actual = path,
     )
     for path in [
         "aten/src/ATen/templates/DispatchKeyNativeFunctions.cpp",
         "aten/src/ATen/templates/DispatchKeyNativeFunctions.h",
         "aten/src/ATen/templates/LazyIr.h",
         "aten/src/ATen/templates/LazyNonNativeIr.h",
         "aten/src/ATen/templates/RegisterDispatchKey.cpp",
         "aten/src/ATen/templates/RegisterDispatchDefinitions.ini",
         "aten/src/ATen/native/native_functions.yaml",
         "aten/src/ATen/native/tags.yaml",
         "aten/src/ATen/native/ts_native_functions.yaml",
         "torch/csrc/lazy/core/shape_inference.h",
         "torch/csrc/lazy/ts_backend/ts_native_functions.cpp",
     ]
 ]

 genrule(
     name = "download_mnist",
     srcs = ["//:tools/download_mnist.py"],
     outs = [
         "mnist/train-images-idx3-ubyte",
         "mnist/train-labels-idx1-ubyte",
         "mnist/t10k-images-idx3-ubyte",
         "mnist/t10k-labels-idx1-ubyte",
     ],
     cmd = "python3 tools/download_mnist.py -d $(RULEDIR)/mnist",
 )