| import copy |
| import glob |
| import imp |
| import os |
| import re |
| import setuptools |
| import subprocess |
| import sys |
| import sysconfig |
| import tempfile |
| import warnings |
| |
| import torch |
| from .file_baton import FileBaton |
| |
| from setuptools.command.build_ext import build_ext |
| |
| |
| def _find_cuda_home(): |
| '''Finds the CUDA install path.''' |
| # Guess #1 |
| cuda_home = os.environ.get('CUDA_HOME') or os.environ.get('CUDA_PATH') |
| if cuda_home is None: |
| # Guess #2 |
| if sys.platform == 'win32': |
| cuda_home = glob.glob( |
| 'C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v*.*') |
| else: |
| cuda_home = '/usr/local/cuda' |
| if not os.path.exists(cuda_home): |
| # Guess #3 |
| try: |
| which = 'where' if sys.platform == 'win32' else 'which' |
| nvcc = subprocess.check_output( |
| [which, 'nvcc']).decode().rstrip('\r\n') |
| cuda_home = os.path.dirname(os.path.dirname(nvcc)) |
| except Exception: |
| cuda_home = None |
| return cuda_home |
| |
| |
| MINIMUM_GCC_VERSION = (4, 9) |
| MINIMUM_MSVC_VERSION = (19, 0, 24215) |
| ABI_INCOMPATIBILITY_WARNING = ''' |
| |
| !! WARNING !! |
| |
| !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
| Your compiler ({}) may be ABI-incompatible with PyTorch! |
| Please use a compiler that is ABI-compatible with GCC 4.9 and above. |
| See https://gcc.gnu.org/onlinedocs/libstdc++/manual/abi.html. |
| |
| See https://gist.github.com/goldsborough/d466f43e8ffc948ff92de7486c5216d6 |
| for instructions on how to install GCC 4.9 or higher. |
| !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
| |
| !! WARNING !! |
| ''' |
| CUDA_HOME = _find_cuda_home() if torch.cuda.is_available() else None |
| |
| |
| def check_compiler_abi_compatibility(compiler): |
| ''' |
| Verifies that the given compiler is ABI-compatible with PyTorch. |
| |
| Arguments: |
| compiler (str): The compiler executable name to check (e.g. ``g++``). |
| Must be executable in a shell process. |
| |
| Returns: |
| False if the compiler is (likely) ABI-incompatible with PyTorch, |
| else True. |
| ''' |
| try: |
| check_cmd = '{}' if sys.platform == 'win32' else '{} --version' |
| info = subprocess.check_output( |
| check_cmd.format(compiler).split(), stderr=subprocess.STDOUT) |
| except Exception: |
| _, error, _ = sys.exc_info() |
| warnings.warn('Error checking compiler version: {}'.format(error)) |
| else: |
| info = info.decode().lower() |
| if 'gcc' in info or 'g++' in info: |
| # Sometimes the version is given as "major.x" instead of semver. |
| version = re.search(r'(\d+)\.(\d+|x)', info) |
| if version is not None: |
| major, minor = version.groups() |
| minor = 0 if minor == 'x' else int(minor) |
| if (int(major), minor) >= MINIMUM_GCC_VERSION: |
| return True |
| else: |
| # Append the detected version for the warning. |
| compiler = '{} {}'.format(compiler, version.group(0)) |
| elif 'Microsoft' in info: |
| info = info.decode().lower() |
| version = re.search(r'(\d+)\.(\d+)\.(\d+)', info) |
| if version is not None: |
| major, minor, revision = version.groups() |
| if (int(major), int(minor), |
| int(revision)) >= MINIMUM_MSVC_VERSION: |
| return True |
| else: |
| # Append the detected version for the warning. |
| compiler = '{} {}'.format(compiler, version.group(0)) |
| |
| warnings.warn(ABI_INCOMPATIBILITY_WARNING.format(compiler)) |
| return False |
| |
| |
| class BuildExtension(build_ext): |
| ''' |
| A custom :mod:`setuptools` build extension . |
| |
| This :class:`setuptools.build_ext` subclass takes care of passing the |
| minimum required compiler flags (e.g. ``-std=c++11``) as well as mixed |
| C++/CUDA compilation (and support for CUDA files in general). |
| |
| When using :class:`BuildExtension`, it is allowed to supply a dictionary |
| for ``extra_compile_args`` (rather than the usual list) that maps from |
| languages (``cxx`` or ``cuda``) to a list of additional compiler flags to |
| supply to the compiler. This makes it possible to supply different flags to |
| the C++ and CUDA compiler during mixed compilation. |
| ''' |
| |
| def build_extensions(self): |
| self._check_abi() |
| for extension in self.extensions: |
| self._define_torch_extension_name(extension) |
| |
| # Register .cu and .cuh as valid source extensions. |
| self.compiler.src_extensions += ['.cu', '.cuh'] |
| # Save the original _compile method for later. |
| if self.compiler.compiler_type == 'msvc': |
| self.compiler._cpp_extensions += ['.cu', '.cuh'] |
| original_compile = self.compiler.compile |
| original_spawn = self.compiler.spawn |
| else: |
| original_compile = self.compiler._compile |
| |
| def unix_wrap_compile(obj, src, ext, cc_args, extra_postargs, pp_opts): |
| # Copy before we make any modifications. |
| cflags = copy.deepcopy(extra_postargs) |
| try: |
| original_compiler = self.compiler.compiler_so |
| if _is_cuda_file(src): |
| nvcc = _join_cuda_home('bin', 'nvcc') |
| self.compiler.set_executable('compiler_so', nvcc) |
| if isinstance(cflags, dict): |
| cflags = cflags['nvcc'] |
| cflags += ['--compiler-options', "'-fPIC'"] |
| elif isinstance(cflags, dict): |
| cflags = cflags['cxx'] |
| # NVCC does not allow multiple -std to be passed, so we avoid |
| # overriding the option if the user explicitly passed it. |
| if not any(flag.startswith('-std=') for flag in cflags): |
| cflags.append('-std=c++11') |
| |
| original_compile(obj, src, ext, cc_args, cflags, pp_opts) |
| finally: |
| # Put the original compiler back in place. |
| self.compiler.set_executable('compiler_so', original_compiler) |
| |
| def win_wrap_compile(sources, |
| output_dir=None, |
| macros=None, |
| include_dirs=None, |
| debug=0, |
| extra_preargs=None, |
| extra_postargs=None, |
| depends=None): |
| |
| self.cflags = copy.deepcopy(extra_postargs) |
| extra_postargs = None |
| |
| def spawn(cmd): |
| orig_cmd = cmd |
| # Using regex to match src, obj and include files |
| |
| src_regex = re.compile('/T(p|c)(.*)') |
| src_list = [ |
| m.group(2) for m in (src_regex.match(elem) for elem in cmd) |
| if m |
| ] |
| |
| obj_regex = re.compile('/Fo(.*)') |
| obj_list = [ |
| m.group(1) for m in (obj_regex.match(elem) for elem in cmd) |
| if m |
| ] |
| |
| include_regex = re.compile(r'((\-|\/)I.*)') |
| include_list = [ |
| m.group(1) |
| for m in (include_regex.match(elem) for elem in cmd) if m |
| ] |
| |
| if len(src_list) >= 1 and len(obj_list) >= 1: |
| src = src_list[0] |
| obj = obj_list[0] |
| if _is_cuda_file(src): |
| nvcc = _join_cuda_home('bin', 'nvcc') |
| if isinstance(self.cflags, dict): |
| cflags = self.cflags['nvcc'] |
| elif isinstance(self.cflags, list): |
| cflags = self.cflags |
| else: |
| cflags = [] |
| cmd = [ |
| nvcc, '-c', src, '-o', obj, '-Xcompiler', |
| '/wd4819', '-Xcompiler', '/MD' |
| ] + include_list + cflags |
| elif isinstance(self.cflags, dict): |
| cflags = self.cflags['cxx'] |
| cmd += cflags |
| elif isinstance(self.cflags, list): |
| cflags = self.cflags |
| cmd += cflags |
| |
| return original_spawn(cmd) |
| |
| try: |
| self.compiler.spawn = spawn |
| return original_compile(sources, output_dir, macros, |
| include_dirs, debug, extra_preargs, |
| extra_postargs, depends) |
| finally: |
| self.compiler.spawn = original_spawn |
| |
| # Monkey-patch the _compile method. |
| if self.compiler.compiler_type == 'msvc': |
| self.compiler.compile = win_wrap_compile |
| else: |
| self.compiler._compile = unix_wrap_compile |
| |
| build_ext.build_extensions(self) |
| |
| def _check_abi(self): |
| # On some platforms, like Windows, compiler_cxx is not available. |
| if hasattr(self.compiler, 'compiler_cxx'): |
| compiler = self.compiler.compiler_cxx[0] |
| elif sys.platform == 'win32': |
| compiler = os.environ.get('CXX', 'cl') |
| else: |
| compiler = os.environ.get('CXX', 'c++') |
| check_compiler_abi_compatibility(compiler) |
| |
| def _define_torch_extension_name(self, extension): |
| define = '-DTORCH_EXTENSION_NAME={}'.format(extension.name) |
| if isinstance(extension.extra_compile_args, dict): |
| for args in extension.extra_compile_args.values(): |
| args.append(define) |
| else: |
| extension.extra_compile_args.append(define) |
| |
| |
| def CppExtension(name, sources, *args, **kwargs): |
| ''' |
| Creates a :class:`setuptools.Extension` for C++. |
| |
| Convenience method that creates a :class:`setuptools.Extension` with the |
| bare minimum (but often sufficient) arguments to build a C++ extension. |
| |
| All arguments are forwarded to the :class:`setuptools.Extension` |
| constructor. |
| |
| Example: |
| >>> from setuptools import setup |
| >>> from torch.utils.cpp_extension import BuildExtension, CppExtension |
| >>> setup( |
| name='extension', |
| ext_modules=[ |
| CppExtension( |
| name='extension', |
| sources=['extension.cpp'], |
| extra_compile_args=['-g'])), |
| ], |
| cmdclass={ |
| 'build_ext': BuildExtension |
| }) |
| ''' |
| include_dirs = kwargs.get('include_dirs', []) |
| include_dirs += include_paths() |
| kwargs['include_dirs'] = include_dirs |
| |
| if sys.platform == 'win32': |
| library_dirs = kwargs.get('library_dirs', []) |
| library_dirs += library_paths() |
| kwargs['library_dirs'] = library_dirs |
| |
| libraries = kwargs.get('libraries', []) |
| libraries.append('ATen') |
| libraries.append('_C') |
| kwargs['libraries'] = libraries |
| |
| kwargs['language'] = 'c++' |
| return setuptools.Extension(name, sources, *args, **kwargs) |
| |
| |
| def CUDAExtension(name, sources, *args, **kwargs): |
| ''' |
| Creates a :class:`setuptools.Extension` for CUDA/C++. |
| |
| Convenience method that creates a :class:`setuptools.Extension` with the |
| bare minimum (but often sufficient) arguments to build a CUDA/C++ |
| extension. This includes the CUDA include path, library path and runtime |
| library. |
| |
| All arguments are forwarded to the :class:`setuptools.Extension` |
| constructor. |
| |
| Example: |
| >>> from setuptools import setup |
| >>> from torch.utils.cpp_extension import BuildExtension, CppExtension |
| >>> setup( |
| name='cuda_extension', |
| ext_modules=[ |
| CUDAExtension( |
| name='cuda_extension', |
| sources=['extension.cpp', 'extension_kernel.cu'], |
| extra_compile_args={'cxx': ['-g'], |
| 'nvcc': ['-O2']}) |
| ], |
| cmdclass={ |
| 'build_ext': BuildExtension |
| }) |
| ''' |
| library_dirs = kwargs.get('library_dirs', []) |
| library_dirs += library_paths(cuda=True) |
| kwargs['library_dirs'] = library_dirs |
| |
| libraries = kwargs.get('libraries', []) |
| libraries.append('cudart') |
| if sys.platform == 'win32': |
| libraries.append('ATen') |
| libraries.append('_C') |
| kwargs['libraries'] = libraries |
| |
| include_dirs = kwargs.get('include_dirs', []) |
| include_dirs += include_paths(cuda=True) |
| kwargs['include_dirs'] = include_dirs |
| |
| kwargs['language'] = 'c++' |
| |
| return setuptools.Extension(name, sources, *args, **kwargs) |
| |
| |
| def include_paths(cuda=False): |
| ''' |
| Get the include paths required to build a C++ or CUDA extension. |
| |
| Args: |
| cuda: If `True`, includes CUDA-specific include paths. |
| |
| Returns: |
| A list of include path strings. |
| ''' |
| here = os.path.abspath(__file__) |
| torch_path = os.path.dirname(os.path.dirname(here)) |
| lib_include = os.path.join(torch_path, 'lib', 'include') |
| # Some internal (old) Torch headers don't properly prefix their includes, |
| # so we need to pass -Itorch/lib/include/TH as well. |
| paths = [ |
| lib_include, |
| os.path.join(lib_include, 'TH'), |
| os.path.join(lib_include, 'THC') |
| ] |
| if cuda: |
| paths.append(_join_cuda_home('include')) |
| return paths |
| |
| |
| def library_paths(cuda=False): |
| ''' |
| Get the library paths required to build a C++ or CUDA extension. |
| |
| Args: |
| cuda: If `True`, includes CUDA-specific library paths. |
| |
| Returns: |
| A list of library path strings. |
| ''' |
| paths = [] |
| |
| if sys.platform == 'win32': |
| here = os.path.abspath(__file__) |
| torch_path = os.path.dirname(os.path.dirname(here)) |
| lib_path = os.path.join(torch_path, 'lib') |
| |
| paths.append(lib_path) |
| |
| if cuda: |
| lib_dir = 'lib/x64' if sys.platform == 'win32' else 'lib64' |
| paths.append(_join_cuda_home(lib_dir)) |
| return paths |
| |
| |
| def load(name, |
| sources, |
| extra_cflags=None, |
| extra_cuda_cflags=None, |
| extra_ldflags=None, |
| extra_include_paths=None, |
| build_directory=None, |
| verbose=False): |
| ''' |
| Loads a PyTorch C++ extension just-in-time (JIT). |
| |
| To load an extension, a Ninja build file is emitted, which is used to |
| compile the given sources into a dynamic library. This library is |
| subsequently loaded into the current Python process as a module and |
| returned from this function, ready for use. |
| |
| By default, the directory to which the build file is emitted and the |
| resulting library compiled to is ``<tmp>/torch_extensions/<name>``, where |
| ``<tmp>`` is the temporary folder on the current platform and ``<name>`` |
| the name of the extension. This location can be overridden in two ways. |
| First, if the ``TORCH_EXTENSIONS_DIR`` environment variable is set, it |
| replaces ``<tmp>/torch_extensions`` and all extensions will be compiled |
| into subfolders of this directory. Second, if the ``build_directory`` |
| argument to this function is supplied, it overrides the entire path, i.e. |
| the library will be compiled into that folder directly. |
| |
| To compile the sources, the default system compiler (``c++``) is used, |
| which can be overridden by setting the ``CXX`` environment variable. To pass |
| additional arguments to the compilation process, ``extra_cflags`` or |
| ``extra_ldflags`` can be provided. For example, to compile your extension |
| with optimizations, pass ``extra_cflags=['-O3']``. You can also use |
| ``extra_cflags`` to pass further include directories. |
| |
| CUDA support with mixed compilation is provided. Simply pass CUDA source |
| files (``.cu`` or ``.cuh``) along with other sources. Such files will be |
| detected and compiled with nvcc rather than the C++ compiler. This includes |
| passing the CUDA lib64 directory as a library directory, and linking |
| ``cudart``. You can pass additional flags to nvcc via |
| ``extra_cuda_cflags``, just like with ``extra_cflags`` for C++. Various |
| heuristics for finding the CUDA install directory are used, which usually |
| work fine. If not, setting the ``CUDA_HOME`` environment variable is the |
| safest option. |
| |
| Args: |
| name: The name of the extension to build. This MUST be the same as the |
| name of the pybind11 module! |
| sources: A list of relative or absolute paths to C++ source files. |
| extra_cflags: optional list of compiler flags to forward to the build. |
| extra_cuda_cflags: optional list of compiler flags to forward to nvcc |
| when building CUDA sources. |
| extra_ldflags: optional list of linker flags to forward to the build. |
| extra_include_paths: optional list of include directories to forward |
| to the build. |
| build_directory: optional path to use as build workspace. |
| verbose: If ``True``, turns on verbose logging of load steps. |
| |
| Returns: |
| The loaded PyTorch extension as a Python module. |
| |
| Example: |
| >>> from torch.utils.cpp_extension import load |
| >>> module = load( |
| name='extension', |
| sources=['extension.cpp', 'extension_kernel.cu'], |
| extra_cflags=['-O2'], |
| verbose=True) |
| ''' |
| |
| verify_ninja_availability() |
| |
| # Allows sources to be a single path or a list of paths. |
| if isinstance(sources, str): |
| sources = [sources] |
| |
| if build_directory is None: |
| build_directory = _get_build_directory(name, verbose) |
| |
| baton = FileBaton(os.path.join(build_directory, 'lock')) |
| |
| if baton.try_acquire(): |
| try: |
| with_cuda = any(map(_is_cuda_file, sources)) |
| extra_ldflags = _prepare_ldflags( |
| extra_ldflags or [], |
| with_cuda, |
| verbose) |
| build_file_path = os.path.join(build_directory, 'build.ninja') |
| if verbose: |
| print( |
| 'Emitting ninja build file {}...'.format(build_file_path)) |
| # NOTE: Emitting a new ninja build file does not cause re-compilation if |
| # the sources did not change, so it's ok to re-emit (and it's fast). |
| _write_ninja_file( |
| path=build_file_path, |
| name=name, |
| sources=sources, |
| extra_cflags=extra_cflags or [], |
| extra_cuda_cflags=extra_cuda_cflags or [], |
| extra_ldflags=extra_ldflags or [], |
| extra_include_paths=extra_include_paths or [], |
| with_cuda=with_cuda) |
| |
| if verbose: |
| print('Building extension module {}...'.format(name)) |
| _build_extension_module(name, build_directory) |
| finally: |
| baton.release() |
| else: |
| baton.wait() |
| |
| if verbose: |
| print('Loading extension module {}...'.format(name)) |
| return _import_module_from_library(name, build_directory) |
| |
| |
| def verify_ninja_availability(): |
| ''' |
| Returns ``True`` if the `ninja <https://ninja-build.org/>`_ build system is |
| available on the system. |
| ''' |
| with open(os.devnull, 'wb') as devnull: |
| try: |
| subprocess.check_call('ninja --version'.split(), stdout=devnull) |
| except OSError: |
| raise RuntimeError("Ninja is required to load C++ extensions") |
| |
| |
| def _prepare_ldflags(extra_ldflags, with_cuda, verbose): |
| if sys.platform == 'win32': |
| python_path = os.path.dirname(sys.executable) |
| python_lib_path = os.path.join(python_path, 'libs') |
| |
| here = os.path.abspath(__file__) |
| torch_path = os.path.dirname(os.path.dirname(here)) |
| lib_path = os.path.join(torch_path, 'lib') |
| |
| extra_ldflags.append('ATen.lib') |
| extra_ldflags.append('_C.lib') |
| extra_ldflags.append('/LIBPATH:{}'.format(python_lib_path)) |
| extra_ldflags.append('/LIBPATH:{}'.format(lib_path)) |
| |
| if with_cuda: |
| if verbose: |
| print('Detected CUDA files, patching ldflags') |
| if sys.platform == 'win32': |
| extra_ldflags.append('/LIBPATH:{}'.format( |
| _join_cuda_home('lib/x64'))) |
| extra_ldflags.append('cudart.lib') |
| else: |
| extra_ldflags.append('-L{}'.format(_join_cuda_home('lib64'))) |
| extra_ldflags.append('-lcudart') |
| |
| return extra_ldflags |
| |
| |
| def _get_build_directory(name, verbose): |
| root_extensions_directory = os.environ.get('TORCH_EXTENSIONS_DIR') |
| if root_extensions_directory is None: |
| # tempfile.gettempdir() will be /tmp on UNIX and \TEMP on Windows. |
| root_extensions_directory = os.path.join(tempfile.gettempdir(), |
| 'torch_extensions') |
| |
| if verbose: |
| print('Using {} as PyTorch extensions root...'.format( |
| root_extensions_directory)) |
| |
| build_directory = os.path.join(root_extensions_directory, name) |
| if not os.path.exists(build_directory): |
| if verbose: |
| print('Creating extension directory {}...'.format(build_directory)) |
| # This is like mkdir -p, i.e. will also create parent directories. |
| os.makedirs(build_directory) |
| |
| return build_directory |
| |
| |
| def _build_extension_module(name, build_directory): |
| try: |
| subprocess.check_output( |
| ['ninja', '-v'], stderr=subprocess.STDOUT, cwd=build_directory) |
| except subprocess.CalledProcessError: |
| # Python 2 and 3 compatible way of getting the error object. |
| _, error, _ = sys.exc_info() |
| # error.output contains the stdout and stderr of the build attempt. |
| raise RuntimeError("Error building extension '{}': {}".format( |
| name, error.output.decode())) |
| |
| |
| def _import_module_from_library(module_name, path): |
| # https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path |
| file, path, description = imp.find_module(module_name, [path]) |
| # Close the .so file after load. |
| with file: |
| return imp.load_module(module_name, file, path, description) |
| |
| |
| def _write_ninja_file(path, |
| name, |
| sources, |
| extra_cflags, |
| extra_cuda_cflags, |
| extra_ldflags, |
| extra_include_paths, |
| with_cuda=False): |
| # Version 1.3 is required for the `deps` directive. |
| config = ['ninja_required_version = 1.3'] |
| config.append('cxx = {}'.format(os.environ.get('CXX', 'c++'))) |
| if with_cuda: |
| config.append('nvcc = {}'.format(_join_cuda_home('bin', 'nvcc'))) |
| |
| # Turn into absolute paths so we can emit them into the ninja build |
| # file wherever it is. |
| sources = [os.path.abspath(file) for file in sources] |
| includes = [os.path.abspath(file) for file in extra_include_paths] |
| |
| # include_paths() gives us the location of torch/torch.h |
| includes += include_paths(with_cuda) |
| # sysconfig.get_paths()['include'] gives us the location of Python.h |
| includes.append(sysconfig.get_paths()['include']) |
| |
| common_cflags = ['-DTORCH_EXTENSION_NAME={}'.format(name)] |
| common_cflags += ['-I{}'.format(include) for include in includes] |
| |
| cflags = common_cflags + ['-fPIC', '-std=c++11'] + extra_cflags |
| if sys.platform == 'win32': |
| from distutils.spawn import _nt_quote_args |
| cflags = _nt_quote_args(cflags) |
| flags = ['cflags = {}'.format(' '.join(cflags))] |
| |
| if with_cuda: |
| cuda_flags = common_cflags |
| if sys.platform == 'win32': |
| cuda_flags = _nt_quote_args(cuda_flags) |
| else: |
| cuda_flags += ['--compiler-options', "'-fPIC'"] |
| cuda_flags += extra_cuda_cflags |
| if not any(flag.startswith('-std=') for flag in cuda_flags): |
| cuda_flags.append('-std=c++11') |
| |
| flags.append('cuda_flags = {}'.format(' '.join(cuda_flags))) |
| |
| if sys.platform == 'win32': |
| ldflags = ['/DLL'] + extra_ldflags |
| else: |
| ldflags = ['-shared'] + extra_ldflags |
| # The darwin linker needs explicit consent to ignore unresolved symbols. |
| if sys.platform == 'darwin': |
| ldflags.append('-undefined dynamic_lookup') |
| elif sys.platform == 'win32': |
| ldflags = _nt_quote_args(ldflags) |
| flags.append('ldflags = {}'.format(' '.join(ldflags))) |
| |
| # See https://ninja-build.org/build.ninja.html for reference. |
| compile_rule = ['rule compile'] |
| if sys.platform == 'win32': |
| compile_rule.append( |
| ' command = cl /showIncludes $cflags -c $in /Fo$out') |
| compile_rule.append(' deps = msvc') |
| else: |
| compile_rule.append( |
| ' command = $cxx -MMD -MF $out.d $cflags -c $in -o $out') |
| compile_rule.append(' depfile = $out.d') |
| compile_rule.append(' deps = gcc') |
| |
| if with_cuda: |
| cuda_compile_rule = ['rule cuda_compile'] |
| cuda_compile_rule.append( |
| ' command = $nvcc $cuda_flags -c $in -o $out') |
| |
| link_rule = ['rule link'] |
| if sys.platform == 'win32': |
| cl_paths = subprocess.check_output(['where', |
| 'cl']).decode().split('\r\n') |
| if len(cl_paths) >= 1: |
| cl_path = os.path.dirname(cl_paths[0]).replace(':', '$:') |
| else: |
| raise RuntimeError("MSVC is required to load C++ extensions") |
| link_rule.append( |
| ' command = "{}/link.exe" $in /nologo $ldflags /out:$out'.format( |
| cl_path)) |
| else: |
| link_rule.append(' command = $cxx $ldflags $in -o $out') |
| |
| # Emit one build rule per source to enable incremental build. |
| object_files = [] |
| build = [] |
| for source_file in sources: |
| # '/path/to/file.cpp' -> 'file' |
| file_name = os.path.splitext(os.path.basename(source_file))[0] |
| if _is_cuda_file(source_file): |
| rule = 'cuda_compile' |
| # Use a different object filename in case a C++ and CUDA file have |
| # the same filename but different extension (.cpp vs. .cu). |
| target = '{}.cuda.o'.format(file_name) |
| else: |
| rule = 'compile' |
| target = '{}.o'.format(file_name) |
| object_files.append(target) |
| if sys.platform == 'win32': |
| source_file = source_file.replace(':', '$:') |
| build.append('build {}: {} {}'.format(target, rule, source_file)) |
| |
| ext = '.pyd' if sys.platform == 'win32' else '.so' |
| library_target = '{}{}'.format(name, ext) |
| link = ['build {}: link {}'.format(library_target, ' '.join(object_files))] |
| |
| default = ['default {}'.format(library_target)] |
| |
| # 'Blocks' should be separated by newlines, for visual benefit. |
| blocks = [config, flags, compile_rule] |
| if with_cuda: |
| blocks.append(cuda_compile_rule) |
| blocks += [link_rule, build, link, default] |
| with open(path, 'w') as build_file: |
| for block in blocks: |
| lines = '\n'.join(block) |
| build_file.write('{}\n\n'.format(lines)) |
| |
| |
| def _join_cuda_home(*paths): |
| ''' |
| Joins paths with CUDA_HOME, or raises an error if it CUDA_HOME is not set. |
| |
| This is basically a lazy way of raising an error for missing $CUDA_HOME |
| only once we need to get any CUDA-specific path. |
| ''' |
| if CUDA_HOME is None: |
| raise EnvironmentError('CUDA_HOME environment variable is not set. ' |
| 'Please set it to your CUDA install root.') |
| return os.path.join(CUDA_HOME, *paths) |
| |
| |
| def _is_cuda_file(path): |
| return os.path.splitext(path)[1] in ['.cu', '.cuh'] |