| # |
| # Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved. |
| # |
| # See LICENCE.txt for license information |
| # |
| |
| CUDA_HOME ?= /usr/local/cuda |
| PREFIX ?= /usr/local |
| VERBOSE ?= 0 |
| KEEP ?= 0 |
| DEBUG ?= 0 |
| PROFAPI ?= 0 |
| BUILDDIR ?= build |
| BUILDDIR := $(abspath $(BUILDDIR)) |
| |
| CUDA_LIB ?= $(CUDA_HOME)/lib64 |
| CUDA_INC ?= $(CUDA_HOME)/include |
| NVCC ?= $(CUDA_HOME)/bin/nvcc |
| |
| CUDA_VERSION ?= $(shell ls $(CUDA_LIB)/libcudart.so.* | head -1 | rev | cut -d "." -f -2 | rev) |
| CUDA_MAJOR = $(shell echo $(CUDA_VERSION) | cut -d "." -f 1) |
| CUDA_MINOR = $(shell echo $(CUDA_VERSION) | cut -d "." -f 2) |
| |
| ifeq ($(CUDA_MAJOR), 7) |
| NVCC_GENCODE ?= -gencode=arch=compute_30,code=sm_30 \ |
| -gencode=arch=compute_35,code=sm_35 \ |
| -gencode=arch=compute_50,code=sm_50 \ |
| -gencode=arch=compute_52,code=sm_52 \ |
| -gencode=arch=compute_52,code=compute_52 |
| else |
| NVCC_GENCODE ?= -gencode=arch=compute_30,code=sm_30 \ |
| -gencode=arch=compute_35,code=sm_35 \ |
| -gencode=arch=compute_50,code=sm_50 \ |
| -gencode=arch=compute_52,code=sm_52 \ |
| -gencode=arch=compute_60,code=sm_60\ |
| -gencode=arch=compute_61,code=sm_61 \ |
| -gencode=arch=compute_60,code=compute_60 |
| endif |
| |
| CXXFLAGS := -I$(CUDA_INC) -fPIC -fvisibility=hidden |
| NVCUFLAGS := -ccbin $(CXX) $(NVCC_GENCODE) -lineinfo -std=c++11 -maxrregcount 96 |
| # Use addprefix so that we can specify more than one path |
| LDFLAGS := $(addprefix -L,${CUDA_LIB}) -lcudart -lrt |
| |
| ifeq ($(DEBUG), 0) |
| NVCUFLAGS += -O3 |
| CXXFLAGS += -O3 |
| else |
| NVCUFLAGS += -O0 -G |
| CXXFLAGS += -O0 -g -ggdb3 |
| endif |
| |
| ifneq ($(VERBOSE), 0) |
| NVCUFLAGS += -Xptxas -v -Xcompiler -Wall,-Wextra |
| CXXFLAGS += -Wall -Wextra |
| else |
| .SILENT: |
| endif |
| |
| ifneq ($(KEEP), 0) |
| NVCUFLAGS += -keep |
| endif |
| |
| ifneq ($(PROFAPI), 0) |
| CXXFLAGS += -DPROFAPI |
| endif |
| |
| NCCL_MAJOR := 1 |
| NCCL_MINOR := 3 |
| NCCL_PATCH := 4 |
| CXXFLAGS += -DNCCL_MAJOR=$(NCCL_MAJOR) -DNCCL_MINOR=$(NCCL_MINOR) -DNCCL_PATCH=$(NCCL_PATCH) |
| |
| CXXFLAGS += -DCUDA_MAJOR=$(CUDA_MAJOR) -DCUDA_MINOR=$(CUDA_MINOR) |
| |
| .PHONY : all lib staticlib clean test mpitest install deb debian debclean forlib fortest forclean |
| .DEFAULT : all |
| |
| INCEXPORTS := nccl.h |
| LIBSRCFILES := libwrap.cu core.cu all_gather.cu all_reduce.cu broadcast.cu reduce.cu reduce_scatter.cu |
| LIBNAME := libnccl.so |
| STATICLIBNAME := libnccl_static.a |
| |
| INCDIR := $(BUILDDIR)/include |
| LIBDIR := $(BUILDDIR)/lib |
| OBJDIR := $(BUILDDIR)/obj |
| |
| INCTARGETS := $(patsubst %, $(INCDIR)/%, $(INCEXPORTS)) |
| LIBSONAME := $(patsubst %,%.$(NCCL_MAJOR),$(LIBNAME)) |
| LIBTARGET := $(patsubst %,%.$(NCCL_MAJOR).$(NCCL_MINOR).$(NCCL_PATCH),$(LIBNAME)) |
| STATICLIBTARGET := $(STATICLIBNAME) |
| LIBLINK := $(patsubst lib%.so, -l%, $(LIBNAME)) |
| LIBOBJ := $(patsubst %.cu, $(OBJDIR)/%.o, $(filter %.cu, $(LIBSRCFILES))) |
| DEPFILES := $(patsubst %.o, %.d, $(LIBOBJ)) $(patsubst %, %.d, $(TESTBINS)) $(patsubst %, %.d, $(MPITESTBINS)) |
| |
| all : lib staticlib |
| |
| lib : $(INCTARGETS) $(LIBDIR)/$(LIBTARGET) |
| |
| staticlib : $(INCTARGETS) $(LIBDIR)/$(STATICLIBTARGET) |
| |
| -include $(DEPFILES) |
| |
| $(LIBDIR)/$(LIBTARGET) : $(LIBOBJ) |
| @printf "Linking %-35s > %s\n" $(LIBTARGET) $@ |
| mkdir -p $(LIBDIR) |
| $(CXX) $(CXXFLAGS) -shared -Wl,--no-as-needed -Wl,-soname,$(LIBSONAME) -o $@ $(LDFLAGS) $(LIBOBJ) |
| ln -sf $(LIBSONAME) $(LIBDIR)/$(LIBNAME) |
| ln -sf $(LIBTARGET) $(LIBDIR)/$(LIBSONAME) |
| |
| $(LIBDIR)/$(STATICLIBTARGET) : $(LIBOBJ) |
| @printf "Archiving %-35s > %s\n" $(STATICLIBTARGET) $@ |
| mkdir -p $(LIBDIR) |
| ar cr $@ $(LIBOBJ) |
| |
| $(INCDIR)/%.h : src/%.h |
| @printf "Grabbing %-35s > %s\n" $< $@ |
| mkdir -p $(INCDIR) |
| cp -f $< $@ |
| |
| $(OBJDIR)/%.o : src/%.cu |
| @printf "Compiling %-35s > %s\n" $< $@ |
| mkdir -p $(OBJDIR) |
| $(NVCC) -c $(NVCUFLAGS) --compiler-options "$(CXXFLAGS)" $< -o $@ |
| @$(NVCC) -M $(NVCUFLAGS) --compiler-options "$(CXXFLAGS)" $< > $(@:%.o=%.d.tmp) |
| @sed "0,/^.*:/s//$(subst /,\/,$@):/" $(@:%.o=%.d.tmp) > $(@:%.o=%.d) |
| @sed -e 's/.*://' -e 's/\\$$//' < $(@:%.o=%.d.tmp) | fmt -1 | \ |
| sed -e 's/^ *//' -e 's/$$/:/' >> $(@:%.o=%.d) |
| @rm -f $(@:%.o=%.d.tmp) |
| |
| clean : |
| rm -rf $(BUILDDIR) |
| |
| install : lib |
| mkdir -p $(PREFIX)/lib |
| mkdir -p $(PREFIX)/include |
| cp -P -v $(BUILDDIR)/lib/* $(PREFIX)/lib/ |
| cp -v $(BUILDDIR)/include/* $(PREFIX)/include/ |
| |
| |
| #### TESTS #### |
| |
| TEST_ONLY ?= 0 |
| |
| # Tests depend on lib, except in TEST_ONLY mode. |
| ifeq ($(TEST_ONLY), 0) |
| TSTDEP = $(INCTARGETS) $(LIBDIR)/$(LIBTARGET) |
| endif |
| |
| NCCL_LIB ?= $(LIBDIR) |
| NCCL_INC ?= $(INCDIR) |
| |
| MPI_HOME ?= /usr |
| MPI_INC ?= $(MPI_HOME)/include |
| MPI_LIB ?= $(MPI_HOME)/lib |
| MPIFLAGS := -I$(MPI_INC) -L$(MPI_LIB) -lmpi |
| |
| TESTS := all_gather_test all_gather_scan \ |
| all_reduce_test all_reduce_scan \ |
| broadcast_test broadcast_scan \ |
| reduce_test reduce_scan \ |
| reduce_scatter_test reduce_scatter_scan |
| MPITESTS := mpi_test |
| |
| TSTINC := -I$(NCCL_INC) -Itest/include |
| TSTLIB := -L$(NCCL_LIB) $(LIBLINK) $(LDFLAGS) |
| TSTDIR := $(BUILDDIR)/test/single |
| MPITSTDIR := $(BUILDDIR)/test/mpi |
| TESTBINS := $(patsubst %, $(TSTDIR)/%, $(TESTS)) |
| MPITESTBINS:= $(patsubst %, $(MPITSTDIR)/%, $(MPITESTS)) |
| |
| test : $(TESTBINS) |
| |
| $(TSTDIR)/% : test/single/%.cu test/include/*.h $(TSTDEP) |
| @printf "Building %-35s > %s\n" $< $@ |
| mkdir -p $(TSTDIR) |
| $(NVCC) $(TSTINC) $(NVCUFLAGS) --compiler-options "$(CXXFLAGS)" -o $@ $< $(TSTLIB) -lcuda -lcurand -lnvToolsExt |
| @$(NVCC) -M $(TSTINC) $(NVCUFLAGS) --compiler-options "$(CXXFLAGS)" $< $(TSTLIB) -lcuda -lcurand -lnvToolsExt > $(@:%=%.d.tmp) |
| @sed "0,/^.*:/s//$(subst /,\/,$@):/" $(@:%=%.d.tmp) > $(@:%=%.d) |
| @sed -e 's/.*://' -e 's/\\$$//' < $(@:%=%.d.tmp) | fmt -1 | \ |
| sed -e 's/^ *//' -e 's/$$/:/' >> $(@:%=%.d) |
| @rm -f $(@:%=%.d.tmp) |
| |
| mpitest : $(MPITESTBINS) |
| |
| $(MPITSTDIR)/% : test/mpi/%.cu $(TSTDEP) |
| @printf "Building %-35s > %s\n" $< $@ |
| mkdir -p $(MPITSTDIR) |
| $(NVCC) $(MPIFLAGS) $(TSTINC) $(NVCUFLAGS) --compiler-options "$(CXXFLAGS)" -o $@ $< $(TSTLIB) -lcurand |
| @$(NVCC) $(MPIFLAGS) -M $(TSTINC) $(NVCUFLAGS) --compiler-options "$(CXXFLAGS)" $< $(TSTLIB) -lcurand > $(@:%=%.d.tmp) |
| @sed "0,/^.*:/s//$(subst /,\/,$@):/" $(@:%=%.d.tmp) > $(@:%=%.d) |
| @sed -e 's/.*://' -e 's/\\$$//' < $(@:%=%.d.tmp) | fmt -1 | \ |
| sed -e 's/^ *//' -e 's/$$/:/' >> $(@:%=%.d) |
| @rm -f $(@:%=%.d.tmp) |
| |
| #### PACKAGING #### |
| |
| DEBIANDIR := $(BUILDDIR)/debian |
| |
| DEBGEN_IN := $(shell (cd debian ; ls *.in)) |
| DEBGEN := $(DEBGEN_IN:.in=) |
| DEBFILES := compat copyright libnccl-dev.install libnccl-dev.manpages nccl.7 rules $(DEBGEN) |
| DEBTARGETS := $(patsubst %, $(DEBIANDIR)/%, $(DEBFILES)) |
| |
| DEB_REVISION ?= 1 |
| DEB_TIMESTAMP := $(shell date -R) |
| DEB_ARCH ?= amd64 |
| |
| debian : $(DEBTARGETS) |
| |
| deb : lib debian |
| @printf "Building Debian package\n" |
| (cd $(BUILDDIR); debuild -eLD_LIBRARY_PATH -uc -us -d -b) |
| mkdir -p $(BUILDDIR)/deb/ |
| mv $(BUILDDIR)/../libnccl*.deb $(BUILDDIR)/deb/ |
| |
| debclean : |
| rm -Rf $(DEBIANDIR) |
| |
| $(DEBIANDIR)/% : debian/%.in |
| @printf "Generating %-35s > %s\n" $< $@ |
| sed -e "s/\$${nccl:Major}/$(NCCL_MAJOR)/g" \ |
| -e "s/\$${nccl:Minor}/$(NCCL_MINOR)/g" \ |
| -e "s/\$${nccl:Patch}/$(NCCL_PATCH)/g" \ |
| -e "s/\$${cuda:Major}/$(CUDA_MAJOR)/g" \ |
| -e "s/\$${cuda:Minor}/$(CUDA_MINOR)/g" \ |
| -e "s/\$${deb:Revision}/$(DEB_REVISION)/g" \ |
| -e "s/\$${deb:Timestamp}/$(DEB_TIMESTAMP)/g" \ |
| -e "s/\$${deb:Arch}/$(DEB_ARCH)/g" \ |
| $< > $@ |
| |
| $(DEBIANDIR)/% : debian/% |
| @printf "Grabbing %-35s > %s\n" $< $@ |
| mkdir -p $(DEBIANDIR) |
| cp -f $< $@ |
| |
| #### FORTRAN BINDINGS #### |
| |
| export NCCL_MAJOR NCCL_MINOR NCCL_PATCH CUDA_MAJOR CUDA_MINOR LIBLINK CUDA_LIB BUILDDIR |
| |
| forlib : lib |
| $(MAKE) -C fortran lib |
| fortest : forlib |
| $(MAKE) -C fortran test |
| forclean : |
| $(MAKE) -C fortran clean |
| |