tools/target_definitions.bzl - platform/external/pytorch - Git at Google

 # @lint-ignore-every BUCKLINT supress the warning for using native
 load("@bazel_skylib//lib:paths.bzl", "paths")
 load("@fbcode_macros//build_defs:cpp_library.bzl", "cpp_library")
 load("@fbcode_macros//build_defs:cpp_python_extension.bzl", "cpp_python_extension")
 load("@fbcode_macros//build_defs:custom_rule.bzl", "custom_rule")
 load("@fbcode_macros//build_defs:python_binary.bzl", "python_binary")
 load("@fbsource//tools/build_defs:glob_defs.bzl", "glob")
 load(
     "//caffe2:build_variables.bzl",
     "glob_libtorch_python_sources",
     "libtorch_cuda_sources",
     "libtorch_nvfuser_generated_headers",
     "libtorch_nvfuser_runtime_sources",
     "libtorch_python_cuda_sources",
     "libtorch_sources",
     "torch_cpp_srcs",
 )
 load(
     "//caffe2:defs_hip.bzl",
     "get_hip_flags",
     "hip_external_deps",
     "hip_pp_flags",
 )
 load("//caffe2/caffe2/fb:defs_gpu.bzl", "gpu_library_selector", "gpu_library_targets", "is_amd_build")
 load("//tools/build/buck:nccl_deps.bzl", "get_nccl_dependency")

 def _path_to_filename(fname):
     return paths.split_extension(paths.basename(fname))[0]

 def use_kineto():
     return native.host_info().os.is_linux and native.host_info().arch.is_x86_64 and not is_amd_build()

 def add_torch_libs():
     r = {}

     torch_cpp_headers = glob(["torch/csrc/api/include/**/*.h"]) + ["torch/script.h"]
     libtorch_python_sources = glob_libtorch_python_sources()

     use_mpi = native.read_config("fbcode", "caffe2_use_mpi", None)
     enable_flatbuffer = bool(native.read_config("fbcode", "caffe2_enable_flatbuffer", None))

     compiler_flags_cpu = [
         "-DUSE_C10D",
         "-DUSE_NUMPY",
         "-DUSE_SCALARS",
         "-DNO_CUDNN_DESTROY_HANDLE",
         "-DBUILD_CAFFE2",
         "-DTORCH_ENABLE_LLVM",
         "-Wno-write-strings",
         "-Wno-format",
         "-Wno-strict-aliasing",
         "-Wno-non-virtual-dtor",
         "-Wno-shadow-compatible-local",
         "-Wno-empty-body",
     ] + ([] if native.host_info().os.is_windows else [
         # XNNPACK depends on an updated version of pthreadpool interface, whose implementation
         # includes <pthread.h> - a header not available on Windows.
         "-DUSE_XNNPACK",
     ])

     # We should really include preprocessor flags here
     # instead of compiler_flags
     propagated_pp_flags_cpu = [
         "-DSYMBOLICATE_MOBILE_DEBUG_HANDLE",
         "-DUSE_DISTRIBUTED",
         "-DUSE_C10D_GLOO",
         "-DUSE_RPC",
         "-DUSE_TENSORPIPE",
     ] + (
         ["-DUSE_C10D_MPI"] if use_mpi else []
     ) + (
         ["-DUSE_KINETO", "-DUSE_KINETO_UPDATED"] if use_kineto() else []
     ) + (
         ["-DENABLE_LIBKINETO_CLIENT"] if native.read_config("kineto", "enable_libkineto_client", "1") == "1" else []
     )

     compiler_flags_cuda = [
         "-DUSE_CUDNN",
         "-DUSE_NCCL",
     ]

     compiler_flags_hip = []

     propagated_pp_flags_cuda = [
         "-DUSE_CUDA",
         "-DUSE_C10D_NCCL",
     ]

     common_headers = glob([
         "torch/csrc/**/*.h",
         # c10d used to be a separate library whose includes ended in .hpp.
         "torch/csrc/distributed/c10d/*.hpp",
         "torch/csrc/generic/*.cpp",
     ]) + [
         "torch/csrc/deploy/Exception.h",
         "torch/csrc/deploy/deploy.h",
         "torch/csrc/deploy/elf_file.h",
         "torch/csrc/deploy/environment.h",
         "torch/csrc/deploy/interpreter/builtin_registry.h",
         "torch/csrc/deploy/interpreter/interpreter_impl.h",
         "torch/csrc/deploy/loader.h",
         "torch/csrc/deploy/mem_file.h",
         "torch/csrc/deploy/noop_environment.h",
         "torch/csrc/deploy/path_environment.h",
         "torch/csrc/deploy/unity/tests/test_unity.h",
         "torch/csrc/deploy/unity/xar_environment.h",
         "torch/csrc/distributed/rpc/metrics/RpcMetricsHandler.h",
         "test/cpp/jit/test_custom_class_registrations.h",
         "test/cpp/jit/test_utils.h",
         "test/cpp/tensorexpr/gtest_assert_float_eq.h",
         "test/cpp/tensorexpr/padded_buffer.h",
         "test/cpp/tensorexpr/test_base.h",
         "test/cpp/tensorexpr/test_utils.h",
     ]
     common_headers.remove("torch/csrc/jit/serialization/mobile_bytecode_generated.h")

     common_flags = {
         "compiler_specific_flags": {
             "clang": [
                 "-Wno-absolute-value",
                 "-Wno-expansion-to-defined",
                 "-Wno-pessimizing-move",
                 "-Wno-return-type-c-linkage",
                 "-Wno-unknown-pragmas",
             ],
         },
         "headers": common_headers,
     }

     include_directories = [
         "..",
         ".",
         "torch/csrc/api/include",
         "torch/csrc",
         # c10d used to be a separate library and its includes were c10d/Foo.hpp,
         # hence we now need this hack to keep supporting them.
         "torch/csrc/distributed",
         "torch/csrc/nn",
     ]

     _libtorch_sources = list(libtorch_sources())

     # Add the Gloo and TensorPipe backends specific to Facebook networking.
     _libtorch_sources.append("torch/csrc/distributed/c10d/fb/GlooDeviceFactory.cpp")
     _libtorch_sources.append("torch/csrc/distributed/rpc/fb/tensorpipe_agent.cpp")

     cpp_library(
         name = "libtorch",
         srcs = _libtorch_sources + ([
             "torch/csrc/jit/serialization/flatbuffer_serializer.cpp",
             "torch/csrc/jit/serialization/flatbuffer_serializer_jit.cpp",
             "torch/csrc/jit/mobile/flatbuffer_loader.cpp",
         ] if enable_flatbuffer else []),
         link_whole = True,
         include_directories = include_directories,
         propagated_pp_flags = propagated_pp_flags_cpu + (["-DENABLE_FLATBUFFER"] if enable_flatbuffer else []),
         exported_deps = (
             [
                 ":ATen-cpu",
                 ":generated-autograd-headers",
                 ":generated-lazy-headers",
                 "//caffe2:version_cpp",
                 "//caffe2/caffe2:caffe2_cpu",
                 "//caffe2/caffe2/quantization/server:dnnlowp_ops",
                 "//caffe2/caffe2/serialize:inline_container",
                 "//caffe2/torch/lib/libshm:libshm",
                 "//gloo:gloo",
                 "//gloo/fb/transport/tls:tls",
                 "//gloo/transport/tcp:tcp",
                 "//tensorpipe:tensorpipe_cpu",
             ] + (["//kineto/libkineto:kineto"] if use_kineto() else []) +
             (["//caffe2:mobile_bytecode"] if enable_flatbuffer else [])
         ),
         exported_external_deps = [
             ("nanopb", None, "protobuf-nanopb"),
             ("protobuf", None),
             ("llvm-fb", None, "LLVMAnalysis"),
             ("llvm-fb", None, "LLVMBPFAsmParser"),
             ("llvm-fb", None, "LLVMBPFCodeGen"),
             ("llvm-fb", None, "LLVMCodeGen"),
             ("llvm-fb", None, "LLVMCore"),
             ("llvm-fb", None, "LLVMExecutionEngine"),
             ("llvm-fb", None, "LLVMIRReader"),
             ("llvm-fb", None, "LLVMInstCombine"),
             ("llvm-fb", None, "LLVMInterpreter"),
             ("llvm-fb", None, "LLVMMC"),
             ("llvm-fb", None, "LLVMNVPTXCodeGen"),
             ("llvm-fb", None, "LLVMOrcJIT"),
             ("llvm-fb", None, "LLVMRISCVAsmParser"),
             ("llvm-fb", None, "LLVMRISCVCodeGen"),
             ("llvm-fb", None, "LLVMScalarOpts"),
             ("llvm-fb", None, "LLVMSupport"),
             ("llvm-fb", None, "LLVMTarget"),
             ("llvm-fb", None, "LLVMTransformUtils"),
             ("llvm-fb", None, "LLVMVectorize"),
             ("llvm-fb", None, "LLVMWebAssemblyAsmParser"),
             ("llvm-fb", None, "LLVMWebAssemblyCodeGen"),
             ("llvm-fb", None, "LLVMWebAssemblyInfo"),
             ("llvm-fb", None, "LLVMX86AsmParser"),
             ("llvm-fb", None, "LLVMX86CodeGen"),
             ("llvm-fb", None, "LLVMipo"),
         ] + ([("openmpi", None, "openmpi")] if use_mpi else []),
         compiler_flags = compiler_flags_cpu,
         **common_flags
     )

     # Below rules are used to stringify NVfuser runtime library into a header files
     python_binary(
         name = "nvfuser-stringify",
         srcs = ["torch/csrc/jit/codegen/cuda/tools/stringify_file.py"],
         base_module = "",
         main_module = "torch.csrc.jit.codegen.cuda.tools.stringify_file",
     )

     # files in libtorch_nvfuser_runtime_sources that are violating package boundaries
     # are mapped to their corresponding export_file rules.
     violation_paths_to_rule = {
         "aten/src/ATen/cuda/detail/PhiloxCudaStateRaw.cuh": ":aten/src/ATen/cuda/detail/PhiloxCudaStateRaw.cuh",
         "aten/src/ATen/cuda/detail/UnpackRaw.cuh": ":aten/src/ATen/cuda/detail/UnpackRaw.cuh",
     }

     for name in libtorch_nvfuser_runtime_sources:
         src_path = violation_paths_to_rule.get(name, name)
         filename = _path_to_filename(src_path)
         native.genrule(
             name = "gen-nvfuser-hdr={}.h".format(filename),
             srcs = {name: src_path},
             bash = "$(exe :nvfuser-stringify) -i $SRCDIR/{} -o $OUT".format(name),
             out = "{}.h".format(filename),
         )
     cpp_library(
         name = "generated-nvfuser-headers",
         headers = [":gen-nvfuser-hdr=" + x for x in libtorch_nvfuser_generated_headers],
         header_namespace = "nvfuser_resources",
     )

     _libtorch_cuda_sources = list(libtorch_cuda_sources)
     cpp_library(
         name = "libtorch_cuda",
         srcs = _libtorch_cuda_sources,
         link_whole = True,
         include_directories = include_directories,
         # TODO: putting USE_CUDA in propagated_pp_flags is error-prone
         propagated_pp_flags = propagated_pp_flags_cuda,
         exported_deps = [
             ":ATen",
             ":generated-aten-headers-cuda",
             ":generated-autograd-headers",
             ":generated-nvfuser-headers",
             ":libtorch",
             "//caffe2/caffe2:caffe2_cpu",
             "//caffe2/caffe2:caffe2_gpu",
             "//caffe2/torch/lib/libshm:libshm",
             "//gloo:gloo_gpu_cuda",
             "//tensorpipe:tensorpipe_cuda",
         ],
         exported_external_deps = [
             ("cudnn", None, "cudnn-lazy"),
             ("cuda", None, "nvToolsExt-lazy"),
             ("cuda", None, "nvrtc-lazy"),
             ("cuda", None, "nvrtc-builtins-lazy"),
         ] + get_nccl_dependency(),
         compiler_flags = compiler_flags_cpu + compiler_flags_cuda,
         **common_flags
     )

     # (original_paths, hipified_paths)
     libtorch_hip_headers_filter = torch_cpp_headers + [h for h in common_headers if any([h.startswith(d) for d in [
         # headers in the following directories are added to libtorch_hip_headers_filter
         # so that they are not hipified.
         "torch/csrc/deploy/",
         "torch/csrc/distributed/rpc/metrics/",
         "torch/csrc/jit/serialization/",
         "torch/cpp/jit/",
         "torch/cpp/tensorexpr/",
     ]])]
     libtorch_hip_sources = (libtorch_cuda_sources, [f.replace(".cu", ".hip") for f in libtorch_cuda_sources])
     libtorch_hip_headers = ([f for f in common_headers if f not in libtorch_hip_headers_filter],) * 2

     custom_rule(
         name = "fb_libtorch_hipify_gen",
         srcs = libtorch_hip_sources[0] + libtorch_hip_headers[0],
         build_args = "--source-dir= --hipify-dir= --copy-dir= --rewrite-cu-ext",
         build_script_dep = "//caffe2:fb_caffe2_hipify",
         output_gen_files = libtorch_hip_sources[1] + libtorch_hip_headers[1],
     )

     cpp_library(
         name = "libtorch_hip_headers",
         headers = [":fb_libtorch_hipify_gen={}".format(f) for f in libtorch_hip_headers[1]],
         header_namespace = "",
     )

     cpp_library(
         name = "libtorch_hip",
         srcs = [":fb_libtorch_hipify_gen={}".format(f) for f in libtorch_hip_sources[1]],
         headers = [f for f in common_headers if f in libtorch_hip_headers_filter],
         link_whole = True,
         propagated_pp_flags = hip_pp_flags,
         exported_deps = [
             ":generated-aten-headers-hip",
             ":generated-autograd-headers",
             ":generated-nvfuser-headers",
             ":libtorch",
             ":libtorch_hip_headers",
             "//caffe2:ATen-hip",
             "//caffe2/caffe2:caffe2_cpu",
             "//caffe2/caffe2:caffe2_gpu_hip",
             "//caffe2/torch/lib/libshm:libshm",
             "//gloo:gloo_gpu_hip",
             "//tensorpipe:tensorpipe_cpu",  # TODO: include a HIP version once it's developed
         ],
         exported_external_deps = hip_external_deps,
         compiler_flags = compiler_flags_cpu + compiler_flags_hip + [
             "-Wno-unused-result",
         ],
         hip_flags = ["-Wno-unused-result"] + get_hip_flags(),
         compiler_specific_flags = common_flags["compiler_specific_flags"],
     )

     gpu_library_targets(
         name = "libtorch_gpu",
         deps_cpu = [
             ":libtorch",
         ],
         deps_cuda = [
             ":libtorch_cuda",
         ],
         deps_hip = [
             ":libtorch_hip",
         ],
         exclude_hip_target = False,
         extra_external_deps = [],
     )

     # torch-cpp is still conditionally compiled based on USE_CUDA. Ideally we'd
     # separate it out as an additive library instead.
     gpu_library_selector(
         name = "torch-cpp",
         deps_cpu = [":torch-cpp-cpu"],
         deps_cuda = [":torch-cpp-cuda"],
         deps_hip = [":torch-cpp-hip"],
         merge_cpu_deps = False,
         exclude_hip_target = False,
     )

     # USE_CUDA flag is propagated through propagated_pp_flags on libtorch
     cpp_library(
         name = "torch-cpp-cuda",
         srcs = torch_cpp_srcs,
         headers = torch_cpp_headers,
         include_directories = [
             ".",
             "torch/csrc/api/include/",
         ],
         exported_deps = [
             ":libtorch_cuda",
             "//caffe2/torch/fb/init:init",
         ],
         exported_external_deps = [
             ("cuda", None, "cuda-lazy"),
             ("cudnn", None, "cudnn-lazy"),
         ],
     )

     cpp_library(
         name = "torch-cpp-hip",
         srcs = torch_cpp_srcs,
         headers = torch_cpp_headers,
         include_directories = [
             ".",
             "torch/csrc/api/include/",
         ],
         exported_deps = [
             ":libtorch_hip",
             "//caffe2/torch/fb/init:init",
         ],
         exported_external_deps = hip_external_deps,
     )

     cpp_library(
         name = "torch-cpp-cpu",
         srcs = torch_cpp_srcs,
         headers = torch_cpp_headers,
         include_directories = [
             ".",
             "torch/csrc/api/include/",
         ],
         exported_deps = [
             ":libtorch",
             "//caffe2/torch/fb/init:init",
         ],
     )

     # _C_impl is still conditionally compiled based on USE_CUDA. Ideally we'd
     # separate it out as an additive library instead.
     # TODO: split it into cpp and cuda parts similarly to libtorch
     gpu_library_selector(
         name = "_C_impl",
         deps_cpu = [":_C_impl_cpu"],
         deps_cuda = [":_C_impl_cuda"],
         deps_hip = [":_C_impl_hip"],
         merge_cpu_deps = False,
         exclude_hip_target = False,
     )

     cpp_library(
         name = "_C_impl_cpu",
         srcs = libtorch_python_sources,
         link_whole = True,
         exported_deps = [
             "fbsource//third-party/fmt:fmt",
             ":torch-cpp-cpu",
             "//caffe2/torch/fb/init:init",
             "//caffe2/torch/lib/libshm:libshm",
         ],
         exported_external_deps = [
             ("numpy", None, "cpp"),
             ("pybind11", None),
             ("python", None),
         ],
         compiler_flags = compiler_flags_cpu,
         compiler_specific_flags = common_flags["compiler_specific_flags"],
     )

     # This target is used to help get headers for compile-time deps for torch::deploy
     # libinterpreter.so build _without_ getting link-time deps, which are supplied
     # separately by the application that dlopens libinterpreter.so.
     #
     # We make use of the buck auto-generated #headers flavor of a target to accomplish this.
     #
     # However, since #headers flavor of target with srcs can't be used in all build modes, we
     # work around this limitation by using this 'pass-through' target, which has a usable
     # #headers flavor in all build modes.
     cpp_library(
         name = "headers_for_torch_python_deps",
         exported_deps = [
             ":_C_impl_cpu",
         ],
     )
     cpp_library(
         name = "headers_for_torch_python_cuda_deps",
         exported_deps = [
             ":_C_impl_cuda",
         ],
     )

     # This target compiles torch_python bindings, but skips the deps on actual
     # torch and python since those will be integrated specially in the wrapper for
     # libinterpreter.so used in torch::deploy
     cpp_library(
         name = "torch_python_without_torch",
         srcs = libtorch_python_sources + torch_cpp_srcs,
         undefined_symbols = True,
         preferred_linkage = "static",
         exported_deps = [
             ":headers_for_torch_python_deps#headers",
         ],
         exported_external_deps = [
             ("pybind11", None),
             ("frozenpython", None, "python-headers"),
         ],
         compiler_flags = compiler_flags_cpu + [
             # some code in the Python bindings compiles differently
             # when you are deploy
             "-DUSE_DEPLOY",
         ],
         compiler_specific_flags = common_flags["compiler_specific_flags"],
     )

     cpp_library(
         name = "torch_python_cuda_without_torch",
         srcs = libtorch_python_sources + torch_cpp_srcs + libtorch_python_cuda_sources,
         undefined_symbols = True,
         preferred_linkage = "static",
         exported_deps = [
             ":headers_for_torch_python_cuda_deps#headers",
         ],
         exported_external_deps = [
             ("pybind11", None),
             ("frozenpython", None, "python-headers"),
         ],
         compiler_flags = compiler_flags_cpu + [
             "-DUSE_CUDA",
             # some code in the Python bindings compiles differently
             # when you are deploy
             "-DUSE_DEPLOY",
         ],
         compiler_specific_flags = common_flags["compiler_specific_flags"],
     )

     cpp_library(
         name = "_C_impl_cuda",
         srcs = libtorch_python_sources + libtorch_python_cuda_sources,
         link_whole = True,
         exported_deps = [
             "fbsource//third-party/fmt:fmt",
             ":torch-cpp-cuda",
             "//caffe2/torch/fb/init:init",
             "//caffe2/torch/lib/libshm:libshm",
         ],
         exported_external_deps = [
             ("numpy", None, "cpp"),
             ("pybind11", None),
             ("python", None),
         ],
         compiler_flags = compiler_flags_cpu + compiler_flags_cuda,
         compiler_specific_flags = common_flags["compiler_specific_flags"],
     )

     # Autogenerated files whose rules contain ":" are not hipified.
     libtorch_python_hip_sources = [f for f in (libtorch_python_sources + libtorch_python_cuda_sources) if ":" in f]
     libtorch_python_hip_sources_hipified = [f for f in (libtorch_python_sources + libtorch_python_cuda_sources) if not ":" in f]

     custom_rule(
         name = "fb_C_impl_hipify_gen",
         srcs = libtorch_python_hip_sources_hipified,
         build_args = "--source-dir= --hipify-dir= --copy-dir=",
         build_script_dep = "//caffe2:fb_caffe2_hipify",
         output_gen_files = libtorch_python_hip_sources_hipified,
     )

     cpp_library(
         name = "_C_impl_hip",
         srcs = [":fb_C_impl_hipify_gen={}".format(f) for f in (libtorch_python_hip_sources_hipified)] + libtorch_python_hip_sources,
         link_whole = True,
         exported_deps = [
             "fbsource//third-party/fmt:fmt",
             ":torch-cpp-hip",
             "//caffe2/torch/fb/init:init",
             "//caffe2/torch/lib/libshm:libshm",
         ],
         exported_external_deps = [
             ("numpy", None, "cpp"),
             ("pybind11", None),
             ("python", None),
         ],
         compiler_flags = compiler_flags_cpu + compiler_flags_hip + ["-Wno-unused-result"],
         compiler_specific_flags = common_flags["compiler_specific_flags"],
     )

     cpp_python_extension(
         name = "_C",
         srcs = [
             "torch/csrc/stub.c",
         ],
         base_module = "torch",
         deps = [
             ":_C_impl",
             "//caffe2:flatbuffer_loader",
         ],
     )

     cpp_python_extension(
         name = "_C_flatbuffer",
         srcs = [
             "torch/csrc/stub_with_flatbuffer.c",
             "torch/csrc/init_flatbuffer_module.cpp",
         ],
         base_module = "torch",
         deps = [
             ":_C_impl",
             "//caffe2:flatbuffer_loader",
             "//caffe2:flatbuffer_serializer",
         ],
     )

     return r
	# @lint-ignore-every BUCKLINT supress the warning for using native
	load("@bazel_skylib//lib:paths.bzl", "paths")
	load("@fbcode_macros//build_defs:cpp_library.bzl", "cpp_library")
	load("@fbcode_macros//build_defs:cpp_python_extension.bzl", "cpp_python_extension")
	load("@fbcode_macros//build_defs:custom_rule.bzl", "custom_rule")
	load("@fbcode_macros//build_defs:python_binary.bzl", "python_binary")
	load("@fbsource//tools/build_defs:glob_defs.bzl", "glob")
	load(
	"//caffe2:build_variables.bzl",
	"glob_libtorch_python_sources",
	"libtorch_cuda_sources",
	"libtorch_nvfuser_generated_headers",
	"libtorch_nvfuser_runtime_sources",
	"libtorch_python_cuda_sources",
	"libtorch_sources",
	"torch_cpp_srcs",
	)
	load(
	"//caffe2:defs_hip.bzl",
	"get_hip_flags",
	"hip_external_deps",
	"hip_pp_flags",
	)
	load("//caffe2/caffe2/fb:defs_gpu.bzl", "gpu_library_selector", "gpu_library_targets", "is_amd_build")
	load("//tools/build/buck:nccl_deps.bzl", "get_nccl_dependency")

	def _path_to_filename(fname):
	return paths.split_extension(paths.basename(fname))[0]

	def use_kineto():
	return native.host_info().os.is_linux and native.host_info().arch.is_x86_64 and not is_amd_build()

	def add_torch_libs():
	r = {}

	torch_cpp_headers = glob(["torch/csrc/api/include/*/.h"]) + ["torch/script.h"]
	libtorch_python_sources = glob_libtorch_python_sources()

	use_mpi = native.read_config("fbcode", "caffe2_use_mpi", None)
	enable_flatbuffer = bool(native.read_config("fbcode", "caffe2_enable_flatbuffer", None))

	compiler_flags_cpu = [
	"-DUSE_C10D",
	"-DUSE_NUMPY",
	"-DUSE_SCALARS",
	"-DNO_CUDNN_DESTROY_HANDLE",
	"-DBUILD_CAFFE2",
	"-DTORCH_ENABLE_LLVM",
	"-Wno-write-strings",
	"-Wno-format",
	"-Wno-strict-aliasing",
	"-Wno-non-virtual-dtor",
	"-Wno-shadow-compatible-local",
	"-Wno-empty-body",
	] + ([] if native.host_info().os.is_windows else [
	# XNNPACK depends on an updated version of pthreadpool interface, whose implementation
	# includes <pthread.h> - a header not available on Windows.
	"-DUSE_XNNPACK",
	])

	# We should really include preprocessor flags here
	# instead of compiler_flags
	propagated_pp_flags_cpu = [
	"-DSYMBOLICATE_MOBILE_DEBUG_HANDLE",
	"-DUSE_DISTRIBUTED",
	"-DUSE_C10D_GLOO",
	"-DUSE_RPC",
	"-DUSE_TENSORPIPE",
	] + (
	["-DUSE_C10D_MPI"] if use_mpi else []
	) + (
	["-DUSE_KINETO", "-DUSE_KINETO_UPDATED"] if use_kineto() else []
	) + (
	["-DENABLE_LIBKINETO_CLIENT"] if native.read_config("kineto", "enable_libkineto_client", "1") == "1" else []
	)

	compiler_flags_cuda = [
	"-DUSE_CUDNN",
	"-DUSE_NCCL",
	]

	compiler_flags_hip = []

	propagated_pp_flags_cuda = [
	"-DUSE_CUDA",
	"-DUSE_C10D_NCCL",
	]

	common_headers = glob([
	"torch/csrc/*/.h",
	# c10d used to be a separate library whose includes ended in .hpp.
	"torch/csrc/distributed/c10d/*.hpp",
	"torch/csrc/generic/*.cpp",
	]) + [
	"torch/csrc/deploy/Exception.h",
	"torch/csrc/deploy/deploy.h",
	"torch/csrc/deploy/elf_file.h",
	"torch/csrc/deploy/environment.h",
	"torch/csrc/deploy/interpreter/builtin_registry.h",
	"torch/csrc/deploy/interpreter/interpreter_impl.h",
	"torch/csrc/deploy/loader.h",
	"torch/csrc/deploy/mem_file.h",
	"torch/csrc/deploy/noop_environment.h",
	"torch/csrc/deploy/path_environment.h",
	"torch/csrc/deploy/unity/tests/test_unity.h",
	"torch/csrc/deploy/unity/xar_environment.h",
	"torch/csrc/distributed/rpc/metrics/RpcMetricsHandler.h",
	"test/cpp/jit/test_custom_class_registrations.h",
	"test/cpp/jit/test_utils.h",
	"test/cpp/tensorexpr/gtest_assert_float_eq.h",
	"test/cpp/tensorexpr/padded_buffer.h",
	"test/cpp/tensorexpr/test_base.h",
	"test/cpp/tensorexpr/test_utils.h",
	]
	common_headers.remove("torch/csrc/jit/serialization/mobile_bytecode_generated.h")

	common_flags = {
	"compiler_specific_flags": {
	"clang": [
	"-Wno-absolute-value",
	"-Wno-expansion-to-defined",
	"-Wno-pessimizing-move",
	"-Wno-return-type-c-linkage",
	"-Wno-unknown-pragmas",
	],
	},
	"headers": common_headers,
	}

	include_directories = [
	"..",
	".",
	"torch/csrc/api/include",
	"torch/csrc",
	# c10d used to be a separate library and its includes were c10d/Foo.hpp,
	# hence we now need this hack to keep supporting them.
	"torch/csrc/distributed",
	"torch/csrc/nn",
	]

	_libtorch_sources = list(libtorch_sources())

	# Add the Gloo and TensorPipe backends specific to Facebook networking.
	_libtorch_sources.append("torch/csrc/distributed/c10d/fb/GlooDeviceFactory.cpp")
	_libtorch_sources.append("torch/csrc/distributed/rpc/fb/tensorpipe_agent.cpp")

	cpp_library(
	name = "libtorch",
	srcs = _libtorch_sources + ([
	"torch/csrc/jit/serialization/flatbuffer_serializer.cpp",
	"torch/csrc/jit/serialization/flatbuffer_serializer_jit.cpp",
	"torch/csrc/jit/mobile/flatbuffer_loader.cpp",
	] if enable_flatbuffer else []),
	link_whole = True,
	include_directories = include_directories,
	propagated_pp_flags = propagated_pp_flags_cpu + (["-DENABLE_FLATBUFFER"] if enable_flatbuffer else []),
	exported_deps = (
	[
	":ATen-cpu",
	":generated-autograd-headers",
	":generated-lazy-headers",
	"//caffe2:version_cpp",
	"//caffe2/caffe2:caffe2_cpu",
	"//caffe2/caffe2/quantization/server:dnnlowp_ops",
	"//caffe2/caffe2/serialize:inline_container",
	"//caffe2/torch/lib/libshm:libshm",
	"//gloo:gloo",
	"//gloo/fb/transport/tls:tls",
	"//gloo/transport/tcp:tcp",
	"//tensorpipe:tensorpipe_cpu",
	] + (["//kineto/libkineto:kineto"] if use_kineto() else []) +
	(["//caffe2:mobile_bytecode"] if enable_flatbuffer else [])
	),
	exported_external_deps = [
	("nanopb", None, "protobuf-nanopb"),
	("protobuf", None),
	("llvm-fb", None, "LLVMAnalysis"),
	("llvm-fb", None, "LLVMBPFAsmParser"),
	("llvm-fb", None, "LLVMBPFCodeGen"),
	("llvm-fb", None, "LLVMCodeGen"),
	("llvm-fb", None, "LLVMCore"),
	("llvm-fb", None, "LLVMExecutionEngine"),
	("llvm-fb", None, "LLVMIRReader"),
	("llvm-fb", None, "LLVMInstCombine"),
	("llvm-fb", None, "LLVMInterpreter"),
	("llvm-fb", None, "LLVMMC"),
	("llvm-fb", None, "LLVMNVPTXCodeGen"),
	("llvm-fb", None, "LLVMOrcJIT"),
	("llvm-fb", None, "LLVMRISCVAsmParser"),
	("llvm-fb", None, "LLVMRISCVCodeGen"),
	("llvm-fb", None, "LLVMScalarOpts"),
	("llvm-fb", None, "LLVMSupport"),
	("llvm-fb", None, "LLVMTarget"),
	("llvm-fb", None, "LLVMTransformUtils"),
	("llvm-fb", None, "LLVMVectorize"),
	("llvm-fb", None, "LLVMWebAssemblyAsmParser"),
	("llvm-fb", None, "LLVMWebAssemblyCodeGen"),
	("llvm-fb", None, "LLVMWebAssemblyInfo"),
	("llvm-fb", None, "LLVMX86AsmParser"),
	("llvm-fb", None, "LLVMX86CodeGen"),
	("llvm-fb", None, "LLVMipo"),
	] + ([("openmpi", None, "openmpi")] if use_mpi else []),
	compiler_flags = compiler_flags_cpu,
	**common_flags
	)

	# Below rules are used to stringify NVfuser runtime library into a header files
	python_binary(
	name = "nvfuser-stringify",
	srcs = ["torch/csrc/jit/codegen/cuda/tools/stringify_file.py"],
	base_module = "",
	main_module = "torch.csrc.jit.codegen.cuda.tools.stringify_file",
	)

	# files in libtorch_nvfuser_runtime_sources that are violating package boundaries
	# are mapped to their corresponding export_file rules.
	violation_paths_to_rule = {
	"aten/src/ATen/cuda/detail/PhiloxCudaStateRaw.cuh": ":aten/src/ATen/cuda/detail/PhiloxCudaStateRaw.cuh",
	"aten/src/ATen/cuda/detail/UnpackRaw.cuh": ":aten/src/ATen/cuda/detail/UnpackRaw.cuh",
	}

	for name in libtorch_nvfuser_runtime_sources:
	src_path = violation_paths_to_rule.get(name, name)
	filename = _path_to_filename(src_path)
	native.genrule(
	name = "gen-nvfuser-hdr={}.h".format(filename),
	srcs = {name: src_path},
	bash = "$(exe :nvfuser-stringify) -i $SRCDIR/{} -o $OUT".format(name),
	out = "{}.h".format(filename),
	)
	cpp_library(
	name = "generated-nvfuser-headers",
	headers = [":gen-nvfuser-hdr=" + x for x in libtorch_nvfuser_generated_headers],
	header_namespace = "nvfuser_resources",
	)

	_libtorch_cuda_sources = list(libtorch_cuda_sources)
	cpp_library(
	name = "libtorch_cuda",
	srcs = _libtorch_cuda_sources,
	link_whole = True,
	include_directories = include_directories,
	# TODO: putting USE_CUDA in propagated_pp_flags is error-prone
	propagated_pp_flags = propagated_pp_flags_cuda,
	exported_deps = [
	":ATen",
	":generated-aten-headers-cuda",
	":generated-autograd-headers",
	":generated-nvfuser-headers",
	":libtorch",
	"//caffe2/caffe2:caffe2_cpu",
	"//caffe2/caffe2:caffe2_gpu",
	"//caffe2/torch/lib/libshm:libshm",
	"//gloo:gloo_gpu_cuda",
	"//tensorpipe:tensorpipe_cuda",
	],
	exported_external_deps = [
	("cudnn", None, "cudnn-lazy"),
	("cuda", None, "nvToolsExt-lazy"),
	("cuda", None, "nvrtc-lazy"),
	("cuda", None, "nvrtc-builtins-lazy"),
	] + get_nccl_dependency(),
	compiler_flags = compiler_flags_cpu + compiler_flags_cuda,
	**common_flags
	)

	# (original_paths, hipified_paths)
	libtorch_hip_headers_filter = torch_cpp_headers + [h for h in common_headers if any([h.startswith(d) for d in [
	# headers in the following directories are added to libtorch_hip_headers_filter
	# so that they are not hipified.
	"torch/csrc/deploy/",
	"torch/csrc/distributed/rpc/metrics/",
	"torch/csrc/jit/serialization/",
	"torch/cpp/jit/",
	"torch/cpp/tensorexpr/",
	]])]
	libtorch_hip_sources = (libtorch_cuda_sources, [f.replace(".cu", ".hip") for f in libtorch_cuda_sources])
	libtorch_hip_headers = ([f for f in common_headers if f not in libtorch_hip_headers_filter],) * 2

	custom_rule(
	name = "fb_libtorch_hipify_gen",
	srcs = libtorch_hip_sources[0] + libtorch_hip_headers[0],
	build_args = "--source-dir= --hipify-dir= --copy-dir= --rewrite-cu-ext",
	build_script_dep = "//caffe2:fb_caffe2_hipify",
	output_gen_files = libtorch_hip_sources[1] + libtorch_hip_headers[1],
	)

	cpp_library(
	name = "libtorch_hip_headers",
	headers = [":fb_libtorch_hipify_gen={}".format(f) for f in libtorch_hip_headers[1]],
	header_namespace = "",
	)

	cpp_library(
	name = "libtorch_hip",
	srcs = [":fb_libtorch_hipify_gen={}".format(f) for f in libtorch_hip_sources[1]],
	headers = [f for f in common_headers if f in libtorch_hip_headers_filter],
	link_whole = True,
	propagated_pp_flags = hip_pp_flags,
	exported_deps = [
	":generated-aten-headers-hip",
	":generated-autograd-headers",
	":generated-nvfuser-headers",
	":libtorch",
	":libtorch_hip_headers",
	"//caffe2:ATen-hip",
	"//caffe2/caffe2:caffe2_cpu",
	"//caffe2/caffe2:caffe2_gpu_hip",
	"//caffe2/torch/lib/libshm:libshm",
	"//gloo:gloo_gpu_hip",
	"//tensorpipe:tensorpipe_cpu", # TODO: include a HIP version once it's developed
	],
	exported_external_deps = hip_external_deps,
	compiler_flags = compiler_flags_cpu + compiler_flags_hip + [
	"-Wno-unused-result",
	],
	hip_flags = ["-Wno-unused-result"] + get_hip_flags(),
	compiler_specific_flags = common_flags["compiler_specific_flags"],
	)

	gpu_library_targets(
	name = "libtorch_gpu",
	deps_cpu = [
	":libtorch",
	],
	deps_cuda = [
	":libtorch_cuda",
	],
	deps_hip = [
	":libtorch_hip",
	],
	exclude_hip_target = False,
	extra_external_deps = [],
	)

	# torch-cpp is still conditionally compiled based on USE_CUDA. Ideally we'd
	# separate it out as an additive library instead.
	gpu_library_selector(
	name = "torch-cpp",
	deps_cpu = [":torch-cpp-cpu"],
	deps_cuda = [":torch-cpp-cuda"],
	deps_hip = [":torch-cpp-hip"],
	merge_cpu_deps = False,
	exclude_hip_target = False,
	)

	# USE_CUDA flag is propagated through propagated_pp_flags on libtorch
	cpp_library(
	name = "torch-cpp-cuda",
	srcs = torch_cpp_srcs,
	headers = torch_cpp_headers,
	include_directories = [
	".",
	"torch/csrc/api/include/",
	],
	exported_deps = [
	":libtorch_cuda",
	"//caffe2/torch/fb/init:init",
	],
	exported_external_deps = [
	("cuda", None, "cuda-lazy"),
	("cudnn", None, "cudnn-lazy"),
	],
	)

	cpp_library(
	name = "torch-cpp-hip",
	srcs = torch_cpp_srcs,
	headers = torch_cpp_headers,
	include_directories = [
	".",
	"torch/csrc/api/include/",
	],
	exported_deps = [
	":libtorch_hip",
	"//caffe2/torch/fb/init:init",
	],
	exported_external_deps = hip_external_deps,
	)

	cpp_library(
	name = "torch-cpp-cpu",
	srcs = torch_cpp_srcs,
	headers = torch_cpp_headers,
	include_directories = [
	".",
	"torch/csrc/api/include/",
	],
	exported_deps = [
	":libtorch",
	"//caffe2/torch/fb/init:init",
	],
	)

	# _C_impl is still conditionally compiled based on USE_CUDA. Ideally we'd
	# separate it out as an additive library instead.
	# TODO: split it into cpp and cuda parts similarly to libtorch
	gpu_library_selector(
	name = "_C_impl",
	deps_cpu = [":_C_impl_cpu"],
	deps_cuda = [":_C_impl_cuda"],
	deps_hip = [":_C_impl_hip"],
	merge_cpu_deps = False,
	exclude_hip_target = False,
	)

	cpp_library(
	name = "_C_impl_cpu",
	srcs = libtorch_python_sources,
	link_whole = True,
	exported_deps = [
	"fbsource//third-party/fmt:fmt",
	":torch-cpp-cpu",
	"//caffe2/torch/fb/init:init",
	"//caffe2/torch/lib/libshm:libshm",
	],
	exported_external_deps = [
	("numpy", None, "cpp"),
	("pybind11", None),
	("python", None),
	],
	compiler_flags = compiler_flags_cpu,
	compiler_specific_flags = common_flags["compiler_specific_flags"],
	)

	# This target is used to help get headers for compile-time deps for torch::deploy
	# libinterpreter.so build _without_ getting link-time deps, which are supplied
	# separately by the application that dlopens libinterpreter.so.
	#
	# We make use of the buck auto-generated #headers flavor of a target to accomplish this.
	#
	# However, since #headers flavor of target with srcs can't be used in all build modes, we
	# work around this limitation by using this 'pass-through' target, which has a usable
	# #headers flavor in all build modes.
	cpp_library(
	name = "headers_for_torch_python_deps",
	exported_deps = [
	":_C_impl_cpu",
	],
	)
	cpp_library(
	name = "headers_for_torch_python_cuda_deps",
	exported_deps = [
	":_C_impl_cuda",
	],
	)

	# This target compiles torch_python bindings, but skips the deps on actual
	# torch and python since those will be integrated specially in the wrapper for
	# libinterpreter.so used in torch::deploy
	cpp_library(
	name = "torch_python_without_torch",
	srcs = libtorch_python_sources + torch_cpp_srcs,
	undefined_symbols = True,
	preferred_linkage = "static",
	exported_deps = [
	":headers_for_torch_python_deps#headers",
	],
	exported_external_deps = [
	("pybind11", None),
	("frozenpython", None, "python-headers"),
	],
	compiler_flags = compiler_flags_cpu + [
	# some code in the Python bindings compiles differently
	# when you are deploy
	"-DUSE_DEPLOY",
	],
	compiler_specific_flags = common_flags["compiler_specific_flags"],
	)

	cpp_library(
	name = "torch_python_cuda_without_torch",
	srcs = libtorch_python_sources + torch_cpp_srcs + libtorch_python_cuda_sources,
	undefined_symbols = True,
	preferred_linkage = "static",
	exported_deps = [
	":headers_for_torch_python_cuda_deps#headers",
	],
	exported_external_deps = [
	("pybind11", None),
	("frozenpython", None, "python-headers"),
	],
	compiler_flags = compiler_flags_cpu + [
	"-DUSE_CUDA",
	# some code in the Python bindings compiles differently
	# when you are deploy
	"-DUSE_DEPLOY",
	],
	compiler_specific_flags = common_flags["compiler_specific_flags"],
	)

	cpp_library(
	name = "_C_impl_cuda",
	srcs = libtorch_python_sources + libtorch_python_cuda_sources,
	link_whole = True,
	exported_deps = [
	"fbsource//third-party/fmt:fmt",
	":torch-cpp-cuda",
	"//caffe2/torch/fb/init:init",
	"//caffe2/torch/lib/libshm:libshm",
	],
	exported_external_deps = [
	("numpy", None, "cpp"),
	("pybind11", None),
	("python", None),
	],
	compiler_flags = compiler_flags_cpu + compiler_flags_cuda,
	compiler_specific_flags = common_flags["compiler_specific_flags"],
	)

	# Autogenerated files whose rules contain ":" are not hipified.
	libtorch_python_hip_sources = [f for f in (libtorch_python_sources + libtorch_python_cuda_sources) if ":" in f]
	libtorch_python_hip_sources_hipified = [f for f in (libtorch_python_sources + libtorch_python_cuda_sources) if not ":" in f]

	custom_rule(
	name = "fb_C_impl_hipify_gen",
	srcs = libtorch_python_hip_sources_hipified,
	build_args = "--source-dir= --hipify-dir= --copy-dir=",
	build_script_dep = "//caffe2:fb_caffe2_hipify",
	output_gen_files = libtorch_python_hip_sources_hipified,
	)

	cpp_library(
	name = "_C_impl_hip",
	srcs = [":fb_C_impl_hipify_gen={}".format(f) for f in (libtorch_python_hip_sources_hipified)] + libtorch_python_hip_sources,
	link_whole = True,
	exported_deps = [
	"fbsource//third-party/fmt:fmt",
	":torch-cpp-hip",
	"//caffe2/torch/fb/init:init",
	"//caffe2/torch/lib/libshm:libshm",
	],
	exported_external_deps = [
	("numpy", None, "cpp"),
	("pybind11", None),
	("python", None),
	],
	compiler_flags = compiler_flags_cpu + compiler_flags_hip + ["-Wno-unused-result"],
	compiler_specific_flags = common_flags["compiler_specific_flags"],
	)

	cpp_python_extension(
	name = "_C",
	srcs = [
	"torch/csrc/stub.c",
	],
	base_module = "torch",
	deps = [
	":_C_impl",
	"//caffe2:flatbuffer_loader",
	],
	)

	cpp_python_extension(
	name = "_C_flatbuffer",
	srcs = [
	"torch/csrc/stub_with_flatbuffer.c",
	"torch/csrc/init_flatbuffer_module.cpp",
	],
	base_module = "torch",
	deps = [
	":_C_impl",
	"//caffe2:flatbuffer_loader",
	"//caffe2:flatbuffer_serializer",
	],
	)

	return r