blob: b472add39cf575e72977a0b8585bf00b73a59406 [file] [log] [blame]
# NVIDIA NCCL 2
# A package of optimized primitives for collective multi-GPU communication.
licenses(["notice"])
exports_files(["LICENSE.txt"])
load("@local_config_cuda//cuda:build_defs.bzl", "cuda_library")
load(
"@local_config_nccl//:build_defs.bzl",
"cuda_rdc_library",
"gen_device_srcs",
)
cc_library(
name = "src_hdrs",
hdrs = [
"src/include/collectives.h",
"src/nccl.h",
],
strip_include_prefix = "src",
)
cc_library(
name = "include_hdrs",
hdrs = glob(["src/include/**"]),
strip_include_prefix = "src/include",
deps = ["@local_config_cuda//cuda:cuda_headers"],
)
cc_library(
name = "device_hdrs",
hdrs = glob(["src/collectives/device/*.h"]),
strip_include_prefix = "src/collectives/device",
)
# NCCL compiles the same source files with different NCCL_OP/NCCL_TYPE defines.
# RDC compilation requires that each compiled module has a unique ID. Clang
# derives the module ID from the path only so we need to copy the files to get
# different IDs for different parts of compilation. NVCC does not have that
# problem because it generates IDs based on preprocessed content.
gen_device_srcs(
name = "device_srcs",
srcs = [
"src/collectives/device/all_gather.cu.cc",
"src/collectives/device/all_reduce.cu.cc",
"src/collectives/device/broadcast.cu.cc",
"src/collectives/device/reduce.cu.cc",
"src/collectives/device/reduce_scatter.cu.cc",
"src/collectives/device/sendrecv.cu.cc",
],
)
cuda_rdc_library(
name = "device",
srcs = [
"src/collectives/device/functions.cu.cc",
"src/collectives/device/onerank_reduce.cu.cc",
":device_srcs",
] + glob([
# Required for header inclusion checking, see below for details.
"src/collectives/device/*.h",
"src/nccl.h",
]),
deps = [
":device_hdrs",
":include_hdrs",
":src_hdrs",
"@local_config_cuda//cuda:cuda_headers",
],
)
cc_library(
name = "net",
srcs = [
"src/transport/coll_net.cc",
"src/transport/net.cc",
],
include_prefix = "third_party/nccl/src",
linkopts = select({
"@org_tensorflow//tensorflow:macos": [],
"//conditions:default": ["-lrt"],
}),
deps = [
":include_hdrs",
":src_hdrs",
],
)
# Primary NCCL target.
#
# This needs to be cuda_library instead of cc_library so that clang uses the
# correct name for kernel host stubs (function pointers to initialize ncclKerns
# in enqueue.cc) after https://reviews.llvm.org/D68578.
cuda_library(
name = "nccl",
srcs = glob(
include = [
"src/**/*.cc",
# Required for header inclusion checking, see below for details.
"src/graph/*.h",
],
# Exclude device-library code.
exclude = [
"src/collectives/device/**",
"src/transport/coll_net.cc",
"src/transport/net.cc",
],
) + [
# Required for header inclusion checking (see
# http://docs.bazel.build/versions/master/be/c-cpp.html#hdrs).
# Files in src/ which #include "nccl.h" load it from there rather than
# from the virtual includes directory.
"src/include/collectives.h",
"src/nccl.h",
],
hdrs = ["src/nccl.h"],
include_prefix = "third_party/nccl",
linkopts = select({
"@org_tensorflow//tensorflow:macos": [],
"//conditions:default": ["-lrt"],
}),
strip_include_prefix = "src",
visibility = ["//visibility:public"],
deps = [
":device",
":include_hdrs",
":net",
":src_hdrs",
],
)