| from __future__ import absolute_import, division, print_function, unicode_literals |
| import copy |
| import glob |
| import imp |
| import os |
| import re |
| import shlex |
| import setuptools |
| import subprocess |
| import sys |
| import sysconfig |
| import warnings |
| import collections |
| |
| import torch |
| import torch._appdirs |
| from .file_baton import FileBaton |
| from ._cpp_extension_versioner import ExtensionVersioner |
| from .hipify import hipify_python |
| from .hipify.hipify_python import get_hip_file_path, GeneratedFileCleaner |
| |
| from setuptools.command.build_ext import build_ext |
| |
| |
| IS_WINDOWS = sys.platform == 'win32' |
| |
| def _find_cuda_home(): |
| r'''Finds the CUDA install path.''' |
| # Guess #1 |
| cuda_home = os.environ.get('CUDA_HOME') or os.environ.get('CUDA_PATH') |
| if cuda_home is None: |
| # Guess #2 |
| try: |
| which = 'where' if IS_WINDOWS else 'which' |
| with open(os.devnull, 'w') as devnull: |
| nvcc = subprocess.check_output([which, 'nvcc'], |
| stderr=devnull).decode().rstrip('\r\n') |
| cuda_home = os.path.dirname(os.path.dirname(nvcc)) |
| except Exception: |
| # Guess #3 |
| if IS_WINDOWS: |
| cuda_homes = glob.glob( |
| 'C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v*.*') |
| if len(cuda_homes) == 0: |
| cuda_home = '' |
| else: |
| cuda_home = cuda_homes[0] |
| else: |
| cuda_home = '/usr/local/cuda' |
| if not os.path.exists(cuda_home): |
| cuda_home = None |
| if cuda_home and not torch.cuda.is_available(): |
| print("No CUDA runtime is found, using CUDA_HOME='{}'".format(cuda_home)) |
| return cuda_home |
| |
| def _find_rocm_home(): |
| r'''Finds the ROCm install path.''' |
| # Guess #1 |
| rocm_home = os.environ.get('ROCM_HOME') or os.environ.get('ROCM_PATH') |
| if rocm_home is None: |
| # Guess #2 |
| try: |
| hipcc = subprocess.check_output( |
| ['which', 'hipcc'], stderr=subprocess.DEVNULL).decode().rstrip('\r\n') |
| # this will be either <ROCM_HOME>/hip/bin/hipcc or <ROCM_HOME>/bin/hipcc |
| rocm_home = os.path.dirname(os.path.dirname(hipcc)) |
| if os.path.basename(rocm_home) == 'hip': |
| rocm_home = os.path.dirname(rocm_home) |
| except Exception: |
| # Guess #3 |
| rocm_home = '/opt/rocm' |
| if not os.path.exists(rocm_home): |
| rocm_home = None |
| if rocm_home and torch.version.hip is None: |
| print("No ROCm runtime is found, using ROCM_HOME='{}'".format(rocm_home)) |
| return rocm_home |
| |
| |
| def _join_rocm_home(*paths): |
| r''' |
| Joins paths with ROCM_HOME, or raises an error if it ROCM_HOME is not set. |
| |
| This is basically a lazy way of raising an error for missing $ROCM_HOME |
| only once we need to get any ROCm-specific path. |
| ''' |
| if ROCM_HOME is None: |
| raise EnvironmentError('ROCM_HOME environment variable is not set. ' |
| 'Please set it to your ROCm install root.') |
| elif IS_WINDOWS: |
| raise EnvironmentError('Building PyTorch extensions using ' |
| 'ROCm and Windows is not supported.') |
| return os.path.join(ROCM_HOME, *paths) |
| |
| |
| MINIMUM_GCC_VERSION = (5, 0, 0) |
| MINIMUM_MSVC_VERSION = (19, 0, 24215) |
| ABI_INCOMPATIBILITY_WARNING = ''' |
| |
| !! WARNING !! |
| |
| !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
| Your compiler ({}) may be ABI-incompatible with PyTorch! |
| Please use a compiler that is ABI-compatible with GCC 5.0 and above. |
| See https://gcc.gnu.org/onlinedocs/libstdc++/manual/abi.html. |
| |
| See https://gist.github.com/goldsborough/d466f43e8ffc948ff92de7486c5216d6 |
| for instructions on how to install GCC 5 or higher. |
| !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
| |
| !! WARNING !! |
| ''' |
| WRONG_COMPILER_WARNING = ''' |
| |
| !! WARNING !! |
| |
| !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
| Your compiler ({user_compiler}) is not compatible with the compiler Pytorch was |
| built with for this platform, which is {pytorch_compiler} on {platform}. Please |
| use {pytorch_compiler} to to compile your extension. Alternatively, you may |
| compile PyTorch from source using {user_compiler}, and then you can also use |
| {user_compiler} to compile your extension. |
| |
| See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help |
| with compiling PyTorch from source. |
| !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
| |
| !! WARNING !! |
| ''' |
| ROCM_HOME = _find_rocm_home() |
| MIOPEN_HOME = _join_rocm_home('miopen') if ROCM_HOME else None |
| IS_HIP_EXTENSION = True if ((ROCM_HOME is not None) and (torch.version.hip is not None)) else False |
| CUDA_HOME = _find_cuda_home() |
| CUDNN_HOME = os.environ.get('CUDNN_HOME') or os.environ.get('CUDNN_PATH') |
| # PyTorch releases have the version pattern major.minor.patch, whereas when |
| # PyTorch is built from source, we append the git commit hash, which gives |
| # it the below pattern. |
| BUILT_FROM_SOURCE_VERSION_PATTERN = re.compile(r'\d+\.\d+\.\d+\w+\+\w+') |
| |
| COMMON_MSVC_FLAGS = ['/MD', '/wd4819', '/wd4251', '/wd4244', '/wd4267', '/wd4275', '/wd4018', '/wd4190', '/EHsc'] |
| |
| MSVC_IGNORE_CUDAFE_WARNINGS = [ |
| 'base_class_has_different_dll_interface', |
| 'field_without_dll_interface', |
| 'dll_interface_conflict_none_assumed', |
| 'dll_interface_conflict_dllexport_assumed' |
| ] |
| |
| COMMON_NVCC_FLAGS = [ |
| '-D__CUDA_NO_HALF_OPERATORS__', |
| '-D__CUDA_NO_HALF_CONVERSIONS__', |
| '-D__CUDA_NO_HALF2_OPERATORS__', |
| '--expt-relaxed-constexpr' |
| ] |
| |
| COMMON_HIPCC_FLAGS = [ |
| '-fPIC', |
| '-D__HIP_PLATFORM_HCC__=1', |
| '-DCUDA_HAS_FP16=1', |
| '-D__HIP_NO_HALF_OPERATORS__=1', |
| '-D__HIP_NO_HALF_CONVERSIONS__=1', |
| ] |
| |
| JIT_EXTENSION_VERSIONER = ExtensionVersioner() |
| |
| PLAT_TO_VCVARS = { |
| 'win32' : 'x86', |
| 'win-amd64' : 'x86_amd64', |
| } |
| |
| |
| def _is_binary_build(): |
| return not BUILT_FROM_SOURCE_VERSION_PATTERN.match(torch.version.__version__) |
| |
| |
| def _accepted_compilers_for_platform(): |
| # gnu-c++ and gnu-cc are the conda gcc compilers |
| return ['clang++', 'clang'] if sys.platform.startswith('darwin') else ['g++', 'gcc', 'gnu-c++', 'gnu-cc'] |
| |
| |
| def get_default_build_root(): |
| r''' |
| Returns the path to the root folder under which extensions will built. |
| |
| For each extension module built, there will be one folder underneath the |
| folder returned by this function. For example, if ``p`` is the path |
| returned by this function and ``ext`` the name of an extension, the build |
| folder for the extension will be ``p/ext``. |
| |
| This directory is **user-specific** so that multiple users on the same |
| machine won't meet permission issues. |
| ''' |
| return os.path.realpath(torch._appdirs.user_cache_dir(appname='torch_extensions')) |
| |
| |
| def check_compiler_ok_for_platform(compiler): |
| r''' |
| Verifies that the compiler is the expected one for the current platform. |
| |
| Arguments: |
| compiler (str): The compiler executable to check. |
| |
| Returns: |
| True if the compiler is gcc/g++ on Linux or clang/clang++ on macOS, |
| and always True for Windows. |
| ''' |
| if IS_WINDOWS: |
| return True |
| which = subprocess.check_output(['which', compiler], stderr=subprocess.STDOUT) |
| # Use os.path.realpath to resolve any symlinks, in particular from 'c++' to e.g. 'g++'. |
| compiler_path = os.path.realpath(which.decode().strip()) |
| # Check the compiler name |
| if any(name in compiler_path for name in _accepted_compilers_for_platform()): |
| return True |
| # If ccache is used the compiler path is /usr/bin/ccache. Check by -v flag. |
| version_string = subprocess.check_output([compiler, '-v'], stderr=subprocess.STDOUT).decode() |
| if sys.platform.startswith('linux'): |
| # Check for 'gcc' or 'g++' |
| pattern = re.compile("^COLLECT_GCC=(.*)$", re.MULTILINE) |
| results = re.findall(pattern, version_string) |
| if len(results) != 1: |
| return False |
| compiler_path = os.path.realpath(results[0].strip()) |
| return any(name in compiler_path for name in _accepted_compilers_for_platform()) |
| if sys.platform.startswith('darwin'): |
| # Check for 'clang' or 'clang++' |
| return version_string.startswith("Apple clang") |
| return False |
| |
| |
| def check_compiler_abi_compatibility(compiler): |
| r''' |
| Verifies that the given compiler is ABI-compatible with PyTorch. |
| |
| Arguments: |
| compiler (str): The compiler executable name to check (e.g. ``g++``). |
| Must be executable in a shell process. |
| |
| Returns: |
| False if the compiler is (likely) ABI-incompatible with PyTorch, |
| else True. |
| ''' |
| if not _is_binary_build(): |
| return True |
| if os.environ.get('TORCH_DONT_CHECK_COMPILER_ABI') in ['ON', '1', 'YES', 'TRUE', 'Y']: |
| return True |
| |
| # First check if the compiler is one of the expected ones for the particular platform. |
| if not check_compiler_ok_for_platform(compiler): |
| warnings.warn(WRONG_COMPILER_WARNING.format( |
| user_compiler=compiler, |
| pytorch_compiler=_accepted_compilers_for_platform()[0], |
| platform=sys.platform)) |
| return False |
| |
| if sys.platform.startswith('darwin'): |
| # There is no particular minimum version we need for clang, so we're good here. |
| return True |
| try: |
| if sys.platform.startswith('linux'): |
| minimum_required_version = MINIMUM_GCC_VERSION |
| version = subprocess.check_output([compiler, '-dumpfullversion', '-dumpversion']) |
| version = version.decode().strip().split('.') |
| else: |
| minimum_required_version = MINIMUM_MSVC_VERSION |
| compiler_info = subprocess.check_output(compiler, stderr=subprocess.STDOUT) |
| match = re.search(r'(\d+)\.(\d+)\.(\d+)', compiler_info.decode().strip()) |
| version = (0, 0, 0) if match is None else match.groups() |
| except Exception: |
| _, error, _ = sys.exc_info() |
| warnings.warn('Error checking compiler version for {}: {}'.format(compiler, error)) |
| return False |
| |
| if tuple(map(int, version)) >= minimum_required_version: |
| return True |
| |
| compiler = '{} {}'.format(compiler, ".".join(version)) |
| warnings.warn(ABI_INCOMPATIBILITY_WARNING.format(compiler)) |
| |
| return False |
| |
| |
| # See below for why we inherit BuildExtension from object. |
| # https://stackoverflow.com/questions/1713038/super-fails-with-error-typeerror-argument-1-must-be-type-not-classobj-when |
| |
| |
| class BuildExtension(build_ext, object): |
| r''' |
| A custom :mod:`setuptools` build extension . |
| |
| This :class:`setuptools.build_ext` subclass takes care of passing the |
| minimum required compiler flags (e.g. ``-std=c++14``) as well as mixed |
| C++/CUDA compilation (and support for CUDA files in general). |
| |
| When using :class:`BuildExtension`, it is allowed to supply a dictionary |
| for ``extra_compile_args`` (rather than the usual list) that maps from |
| languages (``cxx`` or ``nvcc``) to a list of additional compiler flags to |
| supply to the compiler. This makes it possible to supply different flags to |
| the C++ and CUDA compiler during mixed compilation. |
| |
| ``use_ninja`` (bool): If ``use_ninja`` is ``True`` (default), then we |
| attempt to build using the Ninja backend. Ninja greatly speeds up |
| compilation compared to the standard ``setuptools.build_ext``. |
| Fallbacks to the standard distutils backend if Ninja is not available. |
| |
| .. note:: |
| By default, the Ninja backend uses #CPUS + 2 workers to build the |
| extension. This may use up too many resources on some systems. One |
| can control the number of workers by setting the `MAX_JOBS` environment |
| variable to a non-negative number. |
| ''' |
| |
| @classmethod |
| def with_options(cls, **options): |
| r''' |
| Returns an alternative constructor that extends any original keyword |
| arguments to the original constructor with the given options. |
| ''' |
| def init_with_options(*args, **kwargs): |
| kwargs = kwargs.copy() |
| kwargs.update(options) |
| return cls(*args, **kwargs) |
| return init_with_options |
| |
| def __init__(self, *args, **kwargs): |
| super(BuildExtension, self).__init__(*args, **kwargs) |
| self.no_python_abi_suffix = kwargs.get("no_python_abi_suffix", False) |
| |
| self.use_ninja = kwargs.get('use_ninja', True) |
| if self.use_ninja: |
| # Test if we can use ninja. Fallback otherwise. |
| msg = ('Attempted to use ninja as the BuildExtension backend but ' |
| '{}. Falling back to using the slow distutils backend.') |
| if not is_ninja_available(): |
| warnings.warn(msg.format('we could not find ninja.')) |
| self.use_ninja = False |
| |
| def build_extensions(self): |
| self._check_abi() |
| for extension in self.extensions: |
| self._add_compile_flag(extension, '-DTORCH_API_INCLUDE_EXTENSION_H') |
| self._define_torch_extension_name(extension) |
| self._add_gnu_cpp_abi_flag(extension) |
| |
| # Register .cu, .cuh and .hip as valid source extensions. |
| self.compiler.src_extensions += ['.cu', '.cuh', '.hip'] |
| # Save the original _compile method for later. |
| if self.compiler.compiler_type == 'msvc': |
| self.compiler._cpp_extensions += ['.cu', '.cuh'] |
| original_compile = self.compiler.compile |
| original_spawn = self.compiler.spawn |
| else: |
| original_compile = self.compiler._compile |
| |
| def append_std14_if_no_std_present(cflags): |
| # NVCC does not allow multiple -std to be passed, so we avoid |
| # overriding the option if the user explicitly passed it. |
| cpp_format_prefix = '/{}:' if self.compiler.compiler_type == 'msvc' else '-{}=' |
| cpp_flag_prefix = cpp_format_prefix.format('std') |
| cpp_flag = cpp_flag_prefix + 'c++14' |
| if not any(flag.startswith(cpp_flag_prefix) for flag in cflags): |
| cflags.append(cpp_flag) |
| |
| def unix_cuda_flags(cflags): |
| return (COMMON_NVCC_FLAGS + |
| ['--compiler-options', "'-fPIC'"] + |
| cflags + _get_cuda_arch_flags(cflags)) |
| |
| def convert_to_absolute_paths_inplace(paths): |
| # Helper function. See Note [Absolute include_dirs] |
| if paths is not None: |
| for i in range(len(paths)): |
| if not os.path.isabs(paths[i]): |
| paths[i] = os.path.abspath(paths[i]) |
| |
| def unix_wrap_single_compile(obj, src, ext, cc_args, extra_postargs, pp_opts): |
| # Copy before we make any modifications. |
| cflags = copy.deepcopy(extra_postargs) |
| try: |
| original_compiler = self.compiler.compiler_so |
| if _is_cuda_file(src): |
| nvcc = (_join_rocm_home('bin', 'hipcc') if IS_HIP_EXTENSION else _join_cuda_home('bin', 'nvcc')) |
| if not isinstance(nvcc, list): |
| nvcc = [nvcc] |
| self.compiler.set_executable('compiler_so', nvcc) |
| if isinstance(cflags, dict): |
| cflags = cflags['nvcc'] |
| if IS_HIP_EXTENSION: |
| cflags = cflags + _get_rocm_arch_flags(cflags) |
| else: |
| cflags = unix_cuda_flags(cflags) |
| elif isinstance(cflags, dict): |
| cflags = cflags['cxx'] |
| if IS_HIP_EXTENSION: |
| cflags = cflags + COMMON_HIPCC_FLAGS |
| append_std14_if_no_std_present(cflags) |
| |
| original_compile(obj, src, ext, cc_args, cflags, pp_opts) |
| finally: |
| # Put the original compiler back in place. |
| self.compiler.set_executable('compiler_so', original_compiler) |
| |
| def unix_wrap_ninja_compile(sources, |
| output_dir=None, |
| macros=None, |
| include_dirs=None, |
| debug=0, |
| extra_preargs=None, |
| extra_postargs=None, |
| depends=None): |
| r"""Compiles sources by outputting a ninja file and running it.""" |
| # NB: I copied some lines from self.compiler (which is an instance |
| # of distutils.UnixCCompiler). See the following link. |
| # https://github.com/python/cpython/blob/f03a8f8d5001963ad5b5b28dbd95497e9cc15596/Lib/distutils/ccompiler.py#L564-L567 |
| # This can be fragile, but a lot of other repos also do this |
| # (see https://github.com/search?q=_setup_compile&type=Code) |
| # so it is probably OK; we'll also get CI signal if/when |
| # we update our python version (which is when distutils can be |
| # upgraded) |
| |
| # Use absolute path for output_dir so that the object file paths |
| # (`objects`) get generated with absolute paths. |
| output_dir = os.path.abspath(output_dir) |
| |
| # See Note [Absolute include_dirs] |
| convert_to_absolute_paths_inplace(self.compiler.include_dirs) |
| |
| _, objects, extra_postargs, pp_opts, _ = \ |
| self.compiler._setup_compile(output_dir, macros, |
| include_dirs, sources, |
| depends, extra_postargs) |
| common_cflags = self.compiler._get_cc_args(pp_opts, debug, extra_preargs) |
| extra_cc_cflags = self.compiler.compiler_so[1:] |
| with_cuda = any(map(_is_cuda_file, sources)) |
| |
| # extra_postargs can be either: |
| # - a dict mapping cxx/nvcc to extra flags |
| # - a list of extra flags. |
| if isinstance(extra_postargs, dict): |
| post_cflags = extra_postargs['cxx'] |
| else: |
| post_cflags = list(extra_postargs) |
| if IS_HIP_EXTENSION: |
| post_cflags += COMMON_HIPCC_FLAGS |
| append_std14_if_no_std_present(post_cflags) |
| |
| cuda_post_cflags = None |
| cuda_cflags = None |
| if with_cuda: |
| cuda_cflags = common_cflags |
| if isinstance(extra_postargs, dict): |
| cuda_post_cflags = extra_postargs['nvcc'] |
| else: |
| cuda_post_cflags = list(extra_postargs) |
| if IS_HIP_EXTENSION: |
| cuda_post_cflags = cuda_post_cflags + _get_rocm_arch_flags(cuda_post_cflags) |
| cuda_post_cflags = cuda_post_cflags + COMMON_HIPCC_FLAGS |
| else: |
| cuda_post_cflags = unix_cuda_flags(cuda_post_cflags) |
| append_std14_if_no_std_present(cuda_post_cflags) |
| cuda_cflags = [shlex.quote(f) for f in cuda_cflags] |
| cuda_post_cflags = [shlex.quote(f) for f in cuda_post_cflags] |
| |
| _write_ninja_file_and_compile_objects( |
| sources=sources, |
| objects=objects, |
| cflags=[shlex.quote(f) for f in extra_cc_cflags + common_cflags], |
| post_cflags=[shlex.quote(f) for f in post_cflags], |
| cuda_cflags=cuda_cflags, |
| cuda_post_cflags=cuda_post_cflags, |
| build_directory=output_dir, |
| verbose=True, |
| with_cuda=with_cuda) |
| |
| # Return *all* object filenames, not just the ones we just built. |
| return objects |
| |
| def win_cuda_flags(cflags): |
| return (COMMON_NVCC_FLAGS + |
| cflags + _get_cuda_arch_flags(cflags)) |
| |
| def win_wrap_single_compile(sources, |
| output_dir=None, |
| macros=None, |
| include_dirs=None, |
| debug=0, |
| extra_preargs=None, |
| extra_postargs=None, |
| depends=None): |
| |
| self.cflags = copy.deepcopy(extra_postargs) |
| extra_postargs = None |
| |
| def spawn(cmd): |
| # Using regex to match src, obj and include files |
| src_regex = re.compile('/T(p|c)(.*)') |
| src_list = [ |
| m.group(2) for m in (src_regex.match(elem) for elem in cmd) |
| if m |
| ] |
| |
| obj_regex = re.compile('/Fo(.*)') |
| obj_list = [ |
| m.group(1) for m in (obj_regex.match(elem) for elem in cmd) |
| if m |
| ] |
| |
| include_regex = re.compile(r'((\-|\/)I.*)') |
| include_list = [ |
| m.group(1) |
| for m in (include_regex.match(elem) for elem in cmd) if m |
| ] |
| |
| if len(src_list) >= 1 and len(obj_list) >= 1: |
| src = src_list[0] |
| obj = obj_list[0] |
| if _is_cuda_file(src): |
| nvcc = _join_cuda_home('bin', 'nvcc') |
| if isinstance(self.cflags, dict): |
| cflags = self.cflags['nvcc'] |
| elif isinstance(self.cflags, list): |
| cflags = self.cflags |
| else: |
| cflags = [] |
| |
| cflags = win_cuda_flags(cflags) |
| for flag in COMMON_MSVC_FLAGS: |
| cflags = ['-Xcompiler', flag] + cflags |
| for ignore_warning in MSVC_IGNORE_CUDAFE_WARNINGS: |
| cflags = ['-Xcudafe', '--diag_suppress=' + ignore_warning] + cflags |
| cmd = [nvcc, '-c', src, '-o', obj] + include_list + cflags |
| elif isinstance(self.cflags, dict): |
| cflags = COMMON_MSVC_FLAGS + self.cflags['cxx'] |
| cmd += cflags |
| elif isinstance(self.cflags, list): |
| cflags = COMMON_MSVC_FLAGS + self.cflags |
| cmd += cflags |
| |
| return original_spawn(cmd) |
| |
| try: |
| self.compiler.spawn = spawn |
| return original_compile(sources, output_dir, macros, |
| include_dirs, debug, extra_preargs, |
| extra_postargs, depends) |
| finally: |
| self.compiler.spawn = original_spawn |
| |
| def win_wrap_ninja_compile(sources, |
| output_dir=None, |
| macros=None, |
| include_dirs=None, |
| debug=0, |
| extra_preargs=None, |
| extra_postargs=None, |
| depends=None): |
| |
| if not self.compiler.initialized: |
| self.compiler.initialize() |
| output_dir = os.path.abspath(output_dir) |
| |
| # Note [Absolute include_dirs] |
| # Convert relative path in self.compiler.include_dirs to absolute path if any, |
| # For ninja build, the build location is not local, the build happens |
| # in a in script created build folder, relative path lost their correctness. |
| # To be consistent with jit extension, we allow user to enter relative include_dirs |
| # in setuptools.setup, and we convert the relative path to absolute path here |
| convert_to_absolute_paths_inplace(self.compiler.include_dirs) |
| |
| _, objects, extra_postargs, pp_opts, _ = \ |
| self.compiler._setup_compile(output_dir, macros, |
| include_dirs, sources, |
| depends, extra_postargs) |
| common_cflags = extra_preargs or [] |
| cflags = [] |
| if debug: |
| cflags.extend(self.compiler.compile_options_debug) |
| else: |
| cflags.extend(self.compiler.compile_options) |
| common_cflags.extend(COMMON_MSVC_FLAGS) |
| cflags = cflags + common_cflags + pp_opts |
| with_cuda = any(map(_is_cuda_file, sources)) |
| |
| # extra_postargs can be either: |
| # - a dict mapping cxx/nvcc to extra flags |
| # - a list of extra flags. |
| if isinstance(extra_postargs, dict): |
| post_cflags = extra_postargs['cxx'] |
| else: |
| post_cflags = list(extra_postargs) |
| append_std14_if_no_std_present(post_cflags) |
| |
| cuda_post_cflags = None |
| cuda_cflags = None |
| if with_cuda: |
| cuda_cflags = [] |
| for common_cflag in common_cflags: |
| cuda_cflags.append('-Xcompiler') |
| cuda_cflags.append(common_cflag) |
| for ignore_warning in MSVC_IGNORE_CUDAFE_WARNINGS: |
| cuda_cflags.append('-Xcudafe') |
| cuda_cflags.append('--diag_suppress=' + ignore_warning) |
| cuda_cflags.extend(pp_opts) |
| if isinstance(extra_postargs, dict): |
| cuda_post_cflags = extra_postargs['nvcc'] |
| else: |
| cuda_post_cflags = list(extra_postargs) |
| cuda_post_cflags = win_cuda_flags(cuda_post_cflags) |
| |
| from distutils.spawn import _nt_quote_args |
| cflags = _nt_quote_args(cflags) |
| post_cflags = _nt_quote_args(post_cflags) |
| if with_cuda: |
| cuda_cflags = _nt_quote_args(cuda_cflags) |
| cuda_post_cflags = _nt_quote_args(cuda_post_cflags) |
| |
| _write_ninja_file_and_compile_objects( |
| sources=sources, |
| objects=objects, |
| cflags=cflags, |
| post_cflags=post_cflags, |
| cuda_cflags=cuda_cflags, |
| cuda_post_cflags=cuda_post_cflags, |
| build_directory=output_dir, |
| verbose=True, |
| with_cuda=with_cuda) |
| |
| # Return *all* object filenames, not just the ones we just built. |
| return objects |
| |
| # Monkey-patch the _compile or compile method. |
| # https://github.com/python/cpython/blob/dc0284ee8f7a270b6005467f26d8e5773d76e959/Lib/distutils/ccompiler.py#L511 |
| if self.compiler.compiler_type == 'msvc': |
| if self.use_ninja: |
| self.compiler.compile = win_wrap_ninja_compile |
| else: |
| self.compiler.compile = win_wrap_single_compile |
| else: |
| if self.use_ninja: |
| self.compiler.compile = unix_wrap_ninja_compile |
| else: |
| self.compiler._compile = unix_wrap_single_compile |
| |
| build_ext.build_extensions(self) |
| |
| def get_ext_filename(self, ext_name): |
| # Get the original shared library name. For Python 3, this name will be |
| # suffixed with "<SOABI>.so", where <SOABI> will be something like |
| # cpython-37m-x86_64-linux-gnu. |
| ext_filename = super(BuildExtension, self).get_ext_filename(ext_name) |
| # If `no_python_abi_suffix` is `True`, we omit the Python 3 ABI |
| # component. This makes building shared libraries with setuptools that |
| # aren't Python modules nicer. |
| if self.no_python_abi_suffix: |
| # The parts will be e.g. ["my_extension", "cpython-37m-x86_64-linux-gnu", "so"]. |
| ext_filename_parts = ext_filename.split('.') |
| # Omit the second to last element. |
| without_abi = ext_filename_parts[:-2] + ext_filename_parts[-1:] |
| ext_filename = '.'.join(without_abi) |
| return ext_filename |
| |
| def _check_abi(self): |
| # On some platforms, like Windows, compiler_cxx is not available. |
| if hasattr(self.compiler, 'compiler_cxx'): |
| compiler = self.compiler.compiler_cxx[0] |
| elif IS_WINDOWS: |
| compiler = os.environ.get('CXX', 'cl') |
| else: |
| compiler = os.environ.get('CXX', 'c++') |
| check_compiler_abi_compatibility(compiler) |
| # Warn user if VC env is activated but `DISTUILS_USE_SDK` is not set. |
| if IS_WINDOWS and 'VSCMD_ARG_TGT_ARCH' in os.environ and 'DISTUTILS_USE_SDK' not in os.environ: |
| msg = ('It seems that the VC environment is activated but DISTUTILS_USE_SDK is not set.' |
| 'This may lead to multiple activations of the VC env.' |
| 'Please set `DISTUTILS_USE_SDK=1` and try again.') |
| raise UserWarning(msg) |
| |
| def _add_compile_flag(self, extension, flag): |
| extension.extra_compile_args = copy.deepcopy(extension.extra_compile_args) |
| if isinstance(extension.extra_compile_args, dict): |
| for args in extension.extra_compile_args.values(): |
| args.append(flag) |
| else: |
| extension.extra_compile_args.append(flag) |
| |
| def _define_torch_extension_name(self, extension): |
| # pybind11 doesn't support dots in the names |
| # so in order to support extensions in the packages |
| # like torch._C, we take the last part of the string |
| # as the library name |
| names = extension.name.split('.') |
| name = names[-1] |
| define = '-DTORCH_EXTENSION_NAME={}'.format(name) |
| self._add_compile_flag(extension, define) |
| |
| def _add_gnu_cpp_abi_flag(self, extension): |
| # use the same CXX ABI as what PyTorch was compiled with |
| self._add_compile_flag(extension, '-D_GLIBCXX_USE_CXX11_ABI=' + str(int(torch._C._GLIBCXX_USE_CXX11_ABI))) |
| |
| |
| def CppExtension(name, sources, *args, **kwargs): |
| r''' |
| Creates a :class:`setuptools.Extension` for C++. |
| |
| Convenience method that creates a :class:`setuptools.Extension` with the |
| bare minimum (but often sufficient) arguments to build a C++ extension. |
| |
| All arguments are forwarded to the :class:`setuptools.Extension` |
| constructor. |
| |
| Example: |
| >>> from setuptools import setup |
| >>> from torch.utils.cpp_extension import BuildExtension, CppExtension |
| >>> setup( |
| name='extension', |
| ext_modules=[ |
| CppExtension( |
| name='extension', |
| sources=['extension.cpp'], |
| extra_compile_args=['-g']), |
| ], |
| cmdclass={ |
| 'build_ext': BuildExtension |
| }) |
| ''' |
| include_dirs = kwargs.get('include_dirs', []) |
| include_dirs += include_paths() |
| kwargs['include_dirs'] = include_dirs |
| |
| library_dirs = kwargs.get('library_dirs', []) |
| library_dirs += library_paths() |
| kwargs['library_dirs'] = library_dirs |
| |
| libraries = kwargs.get('libraries', []) |
| libraries.append('c10') |
| libraries.append('torch') |
| libraries.append('torch_cpu') |
| libraries.append('torch_python') |
| kwargs['libraries'] = libraries |
| |
| kwargs['language'] = 'c++' |
| return setuptools.Extension(name, sources, *args, **kwargs) |
| |
| |
| def CUDAExtension(name, sources, *args, **kwargs): |
| r''' |
| Creates a :class:`setuptools.Extension` for CUDA/C++. |
| |
| Convenience method that creates a :class:`setuptools.Extension` with the |
| bare minimum (but often sufficient) arguments to build a CUDA/C++ |
| extension. This includes the CUDA include path, library path and runtime |
| library. |
| |
| All arguments are forwarded to the :class:`setuptools.Extension` |
| constructor. |
| |
| Example: |
| >>> from setuptools import setup |
| >>> from torch.utils.cpp_extension import BuildExtension, CUDAExtension |
| >>> setup( |
| name='cuda_extension', |
| ext_modules=[ |
| CUDAExtension( |
| name='cuda_extension', |
| sources=['extension.cpp', 'extension_kernel.cu'], |
| extra_compile_args={'cxx': ['-g'], |
| 'nvcc': ['-O2']}) |
| ], |
| cmdclass={ |
| 'build_ext': BuildExtension |
| }) |
| ''' |
| library_dirs = kwargs.get('library_dirs', []) |
| library_dirs += library_paths(cuda=True) |
| kwargs['library_dirs'] = library_dirs |
| |
| libraries = kwargs.get('libraries', []) |
| libraries.append('c10') |
| libraries.append('torch') |
| libraries.append('torch_cpu') |
| libraries.append('torch_python') |
| if IS_HIP_EXTENSION: |
| libraries.append('c10_hip') |
| libraries.append('torch_hip') |
| else: |
| libraries.append('cudart') |
| libraries.append('c10_cuda') |
| libraries.append('torch_cuda') |
| kwargs['libraries'] = libraries |
| |
| include_dirs = kwargs.get('include_dirs', []) |
| include_dirs += include_paths(cuda=True) |
| kwargs['include_dirs'] = include_dirs |
| |
| kwargs['language'] = 'c++' |
| |
| return setuptools.Extension(name, sources, *args, **kwargs) |
| |
| |
| def include_paths(cuda=False): |
| ''' |
| Get the include paths required to build a C++ or CUDA extension. |
| |
| Args: |
| cuda: If `True`, includes CUDA-specific include paths. |
| |
| Returns: |
| A list of include path strings. |
| ''' |
| here = os.path.abspath(__file__) |
| torch_path = os.path.dirname(os.path.dirname(here)) |
| lib_include = os.path.join(torch_path, 'include') |
| paths = [ |
| lib_include, |
| # Remove this once torch/torch.h is officially no longer supported for C++ extensions. |
| os.path.join(lib_include, 'torch', 'csrc', 'api', 'include'), |
| # Some internal (old) Torch headers don't properly prefix their includes, |
| # so we need to pass -Itorch/lib/include/TH as well. |
| os.path.join(lib_include, 'TH'), |
| os.path.join(lib_include, 'THC') |
| ] |
| if cuda and IS_HIP_EXTENSION: |
| paths.append(os.path.join(lib_include, 'THH')) |
| paths.append(_join_rocm_home('include')) |
| if MIOPEN_HOME is not None: |
| paths.append(os.path.join(MIOPEN_HOME, 'include')) |
| elif cuda: |
| cuda_home_include = _join_cuda_home('include') |
| # if we have the Debian/Ubuntu packages for cuda, we get /usr as cuda home. |
| # but gcc doesn't like having /usr/include passed explicitly |
| if cuda_home_include != '/usr/include': |
| paths.append(cuda_home_include) |
| if CUDNN_HOME is not None: |
| paths.append(os.path.join(CUDNN_HOME, 'include')) |
| return paths |
| |
| |
| def library_paths(cuda=False): |
| r''' |
| Get the library paths required to build a C++ or CUDA extension. |
| |
| Args: |
| cuda: If `True`, includes CUDA-specific library paths. |
| |
| Returns: |
| A list of library path strings. |
| ''' |
| paths = [] |
| |
| # We need to link against libtorch.so |
| here = os.path.abspath(__file__) |
| torch_path = os.path.dirname(os.path.dirname(here)) |
| lib_path = os.path.join(torch_path, 'lib') |
| paths.append(lib_path) |
| |
| if cuda and IS_HIP_EXTENSION: |
| lib_dir = 'lib' |
| paths.append(_join_rocm_home(lib_dir)) |
| elif cuda: |
| if IS_WINDOWS: |
| lib_dir = 'lib/x64' |
| else: |
| lib_dir = 'lib64' |
| if (not os.path.exists(_join_cuda_home(lib_dir)) and |
| os.path.exists(_join_cuda_home('lib'))): |
| # 64-bit CUDA may be installed in 'lib' (see e.g. gh-16955) |
| # Note that it's also possible both don't exist (see |
| # _find_cuda_home) - in that case we stay with 'lib64'. |
| lib_dir = 'lib' |
| |
| paths.append(_join_cuda_home(lib_dir)) |
| if CUDNN_HOME is not None: |
| paths.append(os.path.join(CUDNN_HOME, lib_dir)) |
| return paths |
| |
| |
| def load(name, |
| sources, |
| extra_cflags=None, |
| extra_cuda_cflags=None, |
| extra_ldflags=None, |
| extra_include_paths=None, |
| build_directory=None, |
| verbose=False, |
| with_cuda=None, |
| is_python_module=True, |
| keep_intermediates=True): |
| r''' |
| Loads a PyTorch C++ extension just-in-time (JIT). |
| |
| To load an extension, a Ninja build file is emitted, which is used to |
| compile the given sources into a dynamic library. This library is |
| subsequently loaded into the current Python process as a module and |
| returned from this function, ready for use. |
| |
| By default, the directory to which the build file is emitted and the |
| resulting library compiled to is ``<tmp>/torch_extensions/<name>``, where |
| ``<tmp>`` is the temporary folder on the current platform and ``<name>`` |
| the name of the extension. This location can be overridden in two ways. |
| First, if the ``TORCH_EXTENSIONS_DIR`` environment variable is set, it |
| replaces ``<tmp>/torch_extensions`` and all extensions will be compiled |
| into subfolders of this directory. Second, if the ``build_directory`` |
| argument to this function is supplied, it overrides the entire path, i.e. |
| the library will be compiled into that folder directly. |
| |
| To compile the sources, the default system compiler (``c++``) is used, |
| which can be overridden by setting the ``CXX`` environment variable. To pass |
| additional arguments to the compilation process, ``extra_cflags`` or |
| ``extra_ldflags`` can be provided. For example, to compile your extension |
| with optimizations, pass ``extra_cflags=['-O3']``. You can also use |
| ``extra_cflags`` to pass further include directories. |
| |
| CUDA support with mixed compilation is provided. Simply pass CUDA source |
| files (``.cu`` or ``.cuh``) along with other sources. Such files will be |
| detected and compiled with nvcc rather than the C++ compiler. This includes |
| passing the CUDA lib64 directory as a library directory, and linking |
| ``cudart``. You can pass additional flags to nvcc via |
| ``extra_cuda_cflags``, just like with ``extra_cflags`` for C++. Various |
| heuristics for finding the CUDA install directory are used, which usually |
| work fine. If not, setting the ``CUDA_HOME`` environment variable is the |
| safest option. |
| |
| Args: |
| name: The name of the extension to build. This MUST be the same as the |
| name of the pybind11 module! |
| sources: A list of relative or absolute paths to C++ source files. |
| extra_cflags: optional list of compiler flags to forward to the build. |
| extra_cuda_cflags: optional list of compiler flags to forward to nvcc |
| when building CUDA sources. |
| extra_ldflags: optional list of linker flags to forward to the build. |
| extra_include_paths: optional list of include directories to forward |
| to the build. |
| build_directory: optional path to use as build workspace. |
| verbose: If ``True``, turns on verbose logging of load steps. |
| with_cuda: Determines whether CUDA headers and libraries are added to |
| the build. If set to ``None`` (default), this value is |
| automatically determined based on the existence of ``.cu`` or |
| ``.cuh`` in ``sources``. Set it to `True`` to force CUDA headers |
| and libraries to be included. |
| is_python_module: If ``True`` (default), imports the produced shared |
| library as a Python module. If ``False``, loads it into the process |
| as a plain dynamic library. |
| |
| Returns: |
| If ``is_python_module`` is ``True``, returns the loaded PyTorch |
| extension as a Python module. If ``is_python_module`` is ``False`` |
| returns nothing (the shared library is loaded into the process as a side |
| effect). |
| |
| Example: |
| >>> from torch.utils.cpp_extension import load |
| >>> module = load( |
| name='extension', |
| sources=['extension.cpp', 'extension_kernel.cu'], |
| extra_cflags=['-O2'], |
| verbose=True) |
| ''' |
| return _jit_compile( |
| name, |
| [sources] if isinstance(sources, str) else sources, |
| extra_cflags, |
| extra_cuda_cflags, |
| extra_ldflags, |
| extra_include_paths, |
| build_directory or _get_build_directory(name, verbose), |
| verbose, |
| with_cuda, |
| is_python_module, |
| keep_intermediates=keep_intermediates) |
| |
| |
| def load_inline(name, |
| cpp_sources, |
| cuda_sources=None, |
| functions=None, |
| extra_cflags=None, |
| extra_cuda_cflags=None, |
| extra_ldflags=None, |
| extra_include_paths=None, |
| build_directory=None, |
| verbose=False, |
| with_cuda=None, |
| is_python_module=True, |
| with_pytorch_error_handling=True, |
| keep_intermediates=True): |
| r''' |
| Loads a PyTorch C++ extension just-in-time (JIT) from string sources. |
| |
| This function behaves exactly like :func:`load`, but takes its sources as |
| strings rather than filenames. These strings are stored to files in the |
| build directory, after which the behavior of :func:`load_inline` is |
| identical to :func:`load`. |
| |
| See `the |
| tests <https://github.com/pytorch/pytorch/blob/master/test/test_cpp_extensions.py>`_ |
| for good examples of using this function. |
| |
| Sources may omit two required parts of a typical non-inline C++ extension: |
| the necessary header includes, as well as the (pybind11) binding code. More |
| precisely, strings passed to ``cpp_sources`` are first concatenated into a |
| single ``.cpp`` file. This file is then prepended with ``#include |
| <torch/extension.h>``. |
| |
| Furthermore, if the ``functions`` argument is supplied, bindings will be |
| automatically generated for each function specified. ``functions`` can |
| either be a list of function names, or a dictionary mapping from function |
| names to docstrings. If a list is given, the name of each function is used |
| as its docstring. |
| |
| The sources in ``cuda_sources`` are concatenated into a separate ``.cu`` |
| file and prepended with ``torch/types.h``, ``cuda.h`` and |
| ``cuda_runtime.h`` includes. The ``.cpp`` and ``.cu`` files are compiled |
| separately, but ultimately linked into a single library. Note that no |
| bindings are generated for functions in ``cuda_sources`` per se. To bind |
| to a CUDA kernel, you must create a C++ function that calls it, and either |
| declare or define this C++ function in one of the ``cpp_sources`` (and |
| include its name in ``functions``). |
| |
| See :func:`load` for a description of arguments omitted below. |
| |
| Args: |
| cpp_sources: A string, or list of strings, containing C++ source code. |
| cuda_sources: A string, or list of strings, containing CUDA source code. |
| functions: A list of function names for which to generate function |
| bindings. If a dictionary is given, it should map function names to |
| docstrings (which are otherwise just the function names). |
| with_cuda: Determines whether CUDA headers and libraries are added to |
| the build. If set to ``None`` (default), this value is |
| automatically determined based on whether ``cuda_sources`` is |
| provided. Set it to ``True`` to force CUDA headers |
| and libraries to be included. |
| with_pytorch_error_handling: Determines whether pytorch error and |
| warning macros are handled by pytorch instead of pybind. To do |
| this, each function ``foo`` is called via an intermediary ``_safe_foo`` |
| function. This redirection might cause issues in obscure cases |
| of cpp. This flag should be set to ``False`` when this redirect |
| causes issues. |
| |
| Example: |
| >>> from torch.utils.cpp_extension import load_inline |
| >>> source = \'\'\' |
| at::Tensor sin_add(at::Tensor x, at::Tensor y) { |
| return x.sin() + y.sin(); |
| } |
| \'\'\' |
| >>> module = load_inline(name='inline_extension', |
| cpp_sources=[source], |
| functions=['sin_add']) |
| |
| .. note:: |
| By default, the Ninja backend uses #CPUS + 2 workers to build the |
| extension. This may use up too many resources on some systems. One |
| can control the number of workers by setting the `MAX_JOBS` environment |
| variable to a non-negative number. |
| ''' |
| build_directory = build_directory or _get_build_directory(name, verbose) |
| |
| if isinstance(cpp_sources, str): |
| cpp_sources = [cpp_sources] |
| cuda_sources = cuda_sources or [] |
| if isinstance(cuda_sources, str): |
| cuda_sources = [cuda_sources] |
| |
| cpp_sources.insert(0, '#include <torch/extension.h>') |
| |
| # If `functions` is supplied, we create the pybind11 bindings for the user. |
| # Here, `functions` is (or becomes, after some processing) a map from |
| # function names to function docstrings. |
| if functions is not None: |
| module_def = [] |
| module_def.append('PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {') |
| if isinstance(functions, str): |
| functions = [functions] |
| if isinstance(functions, list): |
| # Make the function docstring the same as the function name. |
| functions = dict((f, f) for f in functions) |
| elif not isinstance(functions, dict): |
| raise ValueError( |
| "Expected 'functions' to be a list or dict, but was {}".format( |
| type(functions))) |
| for function_name, docstring in functions.items(): |
| if with_pytorch_error_handling: |
| module_def.append( |
| 'm.def("{0}", torch::wrap_pybind_function({0}), "{1}");' |
| .format(function_name, docstring)) |
| else: |
| module_def.append('m.def("{0}", {0}, "{1}");'.format(function_name, docstring)) |
| module_def.append('}') |
| cpp_sources += module_def |
| |
| cpp_source_path = os.path.join(build_directory, 'main.cpp') |
| with open(cpp_source_path, 'w') as cpp_source_file: |
| cpp_source_file.write('\n'.join(cpp_sources)) |
| |
| sources = [cpp_source_path] |
| |
| if cuda_sources: |
| cuda_sources.insert(0, '#include <torch/types.h>') |
| cuda_sources.insert(1, '#include <cuda.h>') |
| cuda_sources.insert(2, '#include <cuda_runtime.h>') |
| |
| cuda_source_path = os.path.join(build_directory, 'cuda.cu') |
| with open(cuda_source_path, 'w') as cuda_source_file: |
| cuda_source_file.write('\n'.join(cuda_sources)) |
| |
| sources.append(cuda_source_path) |
| |
| return _jit_compile( |
| name, |
| sources, |
| extra_cflags, |
| extra_cuda_cflags, |
| extra_ldflags, |
| extra_include_paths, |
| build_directory, |
| verbose, |
| with_cuda, |
| is_python_module, |
| keep_intermediates=keep_intermediates) |
| |
| |
| def _jit_compile(name, |
| sources, |
| extra_cflags, |
| extra_cuda_cflags, |
| extra_ldflags, |
| extra_include_paths, |
| build_directory, |
| verbose, |
| with_cuda, |
| is_python_module, |
| keep_intermediates=True): |
| if with_cuda is None: |
| with_cuda = any(map(_is_cuda_file, sources)) |
| with_cudnn = any(['cudnn' in f for f in extra_ldflags or []]) |
| old_version = JIT_EXTENSION_VERSIONER.get_version(name) |
| version = JIT_EXTENSION_VERSIONER.bump_version_if_changed( |
| name, |
| sources, |
| build_arguments=[extra_cflags, extra_cuda_cflags, extra_ldflags, extra_include_paths], |
| build_directory=build_directory, |
| with_cuda=with_cuda |
| ) |
| if version > 0: |
| if version != old_version and verbose: |
| print('The input conditions for extension module {} have changed. '.format(name) + |
| 'Bumping to version {0} and re-building as {1}_v{0}...'.format(version, name)) |
| name = '{}_v{}'.format(name, version) |
| |
| if version != old_version: |
| baton = FileBaton(os.path.join(build_directory, 'lock')) |
| if baton.try_acquire(): |
| try: |
| with GeneratedFileCleaner(keep_intermediates=keep_intermediates) as clean_ctx: |
| if IS_HIP_EXTENSION and (with_cuda or with_cudnn): |
| hipify_python.hipify( |
| project_directory=build_directory, |
| output_directory=build_directory, |
| includes=os.path.join(build_directory, '*'), |
| extra_files=[os.path.abspath(s) for s in sources], |
| show_detailed=verbose, |
| is_pytorch_extension=True, |
| clean_ctx=clean_ctx |
| ) |
| _write_ninja_file_and_build_library( |
| name=name, |
| sources=sources, |
| extra_cflags=extra_cflags or [], |
| extra_cuda_cflags=extra_cuda_cflags or [], |
| extra_ldflags=extra_ldflags or [], |
| extra_include_paths=extra_include_paths or [], |
| build_directory=build_directory, |
| verbose=verbose, |
| with_cuda=with_cuda) |
| finally: |
| baton.release() |
| else: |
| baton.wait() |
| elif verbose: |
| print('No modifications detected for re-loaded extension ' |
| 'module {}, skipping build step...'.format(name)) |
| |
| if verbose: |
| print('Loading extension module {}...'.format(name)) |
| return _import_module_from_library(name, build_directory, is_python_module) |
| |
| |
| def _write_ninja_file_and_compile_objects( |
| sources, |
| objects, |
| cflags, |
| post_cflags, |
| cuda_cflags, |
| cuda_post_cflags, |
| build_directory, |
| verbose, |
| with_cuda): |
| verify_ninja_availability() |
| if IS_WINDOWS: |
| compiler = os.environ.get('CXX', 'cl') |
| else: |
| compiler = os.environ.get('CXX', 'c++') |
| check_compiler_abi_compatibility(compiler) |
| if with_cuda is None: |
| with_cuda = any(map(_is_cuda_file, sources)) |
| build_file_path = os.path.join(build_directory, 'build.ninja') |
| if verbose: |
| print( |
| 'Emitting ninja build file {}...'.format(build_file_path)) |
| _write_ninja_file( |
| path=build_file_path, |
| cflags=cflags, |
| post_cflags=post_cflags, |
| cuda_cflags=cuda_cflags, |
| cuda_post_cflags=cuda_post_cflags, |
| sources=sources, |
| objects=objects, |
| ldflags=None, |
| library_target=None, |
| with_cuda=with_cuda) |
| if verbose: |
| print('Compiling objects...') |
| _run_ninja_build( |
| build_directory, |
| verbose, |
| # It would be better if we could tell users the name of the extension |
| # that failed to build but there isn't a good way to get it here. |
| error_prefix='Error compiling objects for extension') |
| |
| |
| def _write_ninja_file_and_build_library( |
| name, |
| sources, |
| extra_cflags, |
| extra_cuda_cflags, |
| extra_ldflags, |
| extra_include_paths, |
| build_directory, |
| verbose, |
| with_cuda): |
| verify_ninja_availability() |
| if IS_WINDOWS: |
| compiler = os.environ.get('CXX', 'cl') |
| else: |
| compiler = os.environ.get('CXX', 'c++') |
| check_compiler_abi_compatibility(compiler) |
| if with_cuda is None: |
| with_cuda = any(map(_is_cuda_file, sources)) |
| extra_ldflags = _prepare_ldflags( |
| extra_ldflags or [], |
| with_cuda, |
| verbose) |
| build_file_path = os.path.join(build_directory, 'build.ninja') |
| if verbose: |
| print( |
| 'Emitting ninja build file {}...'.format(build_file_path)) |
| # NOTE: Emitting a new ninja build file does not cause re-compilation if |
| # the sources did not change, so it's ok to re-emit (and it's fast). |
| _write_ninja_file_to_build_library( |
| path=build_file_path, |
| name=name, |
| sources=sources, |
| extra_cflags=extra_cflags or [], |
| extra_cuda_cflags=extra_cuda_cflags or [], |
| extra_ldflags=extra_ldflags or [], |
| extra_include_paths=extra_include_paths or [], |
| with_cuda=with_cuda) |
| |
| if verbose: |
| print('Building extension module {}...'.format(name)) |
| _run_ninja_build( |
| build_directory, |
| verbose, |
| error_prefix="Error building extension '{}'".format(name)) |
| |
| |
| def is_ninja_available(): |
| r''' |
| Returns ``True`` if the `ninja <https://ninja-build.org/>`_ build system is |
| available on the system, ``False`` otherwise. |
| ''' |
| with open(os.devnull, 'wb') as devnull: |
| try: |
| subprocess.check_call('ninja --version'.split(), stdout=devnull) |
| except OSError: |
| return False |
| else: |
| return True |
| |
| |
| def verify_ninja_availability(): |
| r''' |
| Raises ``RuntimeError`` if `ninja <https://ninja-build.org/>`_ build system is not |
| available on the system, does nothing otherwise. |
| ''' |
| if not is_ninja_available(): |
| raise RuntimeError("Ninja is required to load C++ extensions") |
| |
| |
| def _prepare_ldflags(extra_ldflags, with_cuda, verbose): |
| here = os.path.abspath(__file__) |
| torch_path = os.path.dirname(os.path.dirname(here)) |
| lib_path = os.path.join(torch_path, 'lib') |
| |
| if IS_WINDOWS: |
| python_path = os.path.dirname(sys.executable) |
| python_lib_path = os.path.join(python_path, 'libs') |
| |
| extra_ldflags.append('c10.lib') |
| if with_cuda: |
| extra_ldflags.append('c10_cuda.lib') |
| extra_ldflags.append('torch_cpu.lib') |
| if with_cuda: |
| extra_ldflags.append('torch_cuda.lib') |
| # /INCLUDE is used to ensure torch_cuda is linked against in a project that relies on it. |
| # Related issue: https://github.com/pytorch/pytorch/issues/31611 |
| extra_ldflags.append('-INCLUDE:?warp_size@cuda@at@@YAHXZ') |
| extra_ldflags.append('torch.lib') |
| extra_ldflags.append('torch_python.lib') |
| extra_ldflags.append('/LIBPATH:{}'.format(python_lib_path)) |
| extra_ldflags.append('/LIBPATH:{}'.format(lib_path)) |
| else: |
| extra_ldflags.append('-L{}'.format(lib_path)) |
| extra_ldflags.append('-lc10') |
| if with_cuda: |
| extra_ldflags.append('-lc10_hip' if IS_HIP_EXTENSION else '-lc10_cuda') |
| extra_ldflags.append('-ltorch_cpu') |
| if with_cuda: |
| extra_ldflags.append('-ltorch_hip' if IS_HIP_EXTENSION else '-ltorch_cuda') |
| extra_ldflags.append('-ltorch') |
| extra_ldflags.append('-ltorch_python') |
| |
| if with_cuda: |
| if verbose: |
| print('Detected CUDA files, patching ldflags') |
| if IS_WINDOWS: |
| extra_ldflags.append('/LIBPATH:{}'.format( |
| _join_cuda_home('lib/x64'))) |
| extra_ldflags.append('cudart.lib') |
| if CUDNN_HOME is not None: |
| extra_ldflags.append(os.path.join(CUDNN_HOME, 'lib/x64')) |
| elif not IS_HIP_EXTENSION: |
| extra_ldflags.append('-L{}'.format(_join_cuda_home('lib64'))) |
| extra_ldflags.append('-lcudart') |
| if CUDNN_HOME is not None: |
| extra_ldflags.append('-L{}'.format(os.path.join(CUDNN_HOME, 'lib64'))) |
| |
| return extra_ldflags |
| |
| |
| def _get_cuda_arch_flags(cflags=None): |
| r''' |
| Determine CUDA arch flags to use. |
| |
| For an arch, say "6.1", the added compile flag will be |
| ``-gencode=arch=compute_61,code=sm_61``. |
| For an added "+PTX", an additional |
| ``-gencode=arch=compute_xx,code=compute_xx`` is added. |
| |
| See select_compute_arch.cmake for corresponding named and supported arches |
| when building with CMake. |
| ''' |
| # If cflags is given, there may already be user-provided arch flags in it |
| # (from `extra_compile_args`) |
| if cflags is not None: |
| for flag in cflags: |
| if 'arch' in flag: |
| return [] |
| |
| # Note: keep combined names ("arch1+arch2") above single names, otherwise |
| # string replacement may not do the right thing |
| named_arches = collections.OrderedDict([ |
| ('Kepler+Tesla', '3.7'), |
| ('Kepler', '3.5+PTX'), |
| ('Maxwell+Tegra', '5.3'), |
| ('Maxwell', '5.0;5.2+PTX'), |
| ('Pascal', '6.0;6.1+PTX'), |
| ('Volta', '7.0+PTX'), |
| ('Turing', '7.5+PTX'), |
| ('Ampere', '8.0+PTX'), |
| ]) |
| |
| supported_arches = ['3.5', '3.7', '5.0', '5.2', '5.3', '6.0', '6.1', '6.2', |
| '7.0', '7.2', '7.5', '8.0'] |
| valid_arch_strings = supported_arches + [s + "+PTX" for s in supported_arches] |
| |
| # The default is sm_30 for CUDA 9.x and 10.x |
| # First check for an env var (same as used by the main setup.py) |
| # Can be one or more architectures, e.g. "6.1" or "3.5;5.2;6.0;6.1;7.0+PTX" |
| # See cmake/Modules_CUDA_fix/upstream/FindCUDA/select_compute_arch.cmake |
| arch_list = os.environ.get('TORCH_CUDA_ARCH_LIST', None) |
| |
| # If not given, determine what's needed for the GPU that can be found |
| if not arch_list: |
| capability = torch.cuda.get_device_capability() |
| arch_list = ['{}.{}'.format(capability[0], capability[1])] |
| else: |
| # Deal with lists that are ' ' separated (only deal with ';' after) |
| arch_list = arch_list.replace(' ', ';') |
| # Expand named arches |
| for named_arch, archval in named_arches.items(): |
| arch_list = arch_list.replace(named_arch, archval) |
| |
| arch_list = arch_list.split(';') |
| |
| flags = [] |
| for arch in arch_list: |
| if arch not in valid_arch_strings: |
| raise ValueError("Unknown CUDA arch ({}) or GPU not supported".format(arch)) |
| else: |
| num = arch[0] + arch[2] |
| flags.append('-gencode=arch=compute_{},code=sm_{}'.format(num, num)) |
| if arch.endswith('+PTX'): |
| flags.append('-gencode=arch=compute_{},code=compute_{}'.format(num, num)) |
| |
| return list(set(flags)) |
| |
| |
| def _get_rocm_arch_flags(cflags=None): |
| # If cflags is given, there may already be user-provided arch flags in it |
| # (from `extra_compile_args`) |
| if cflags is not None: |
| for flag in cflags: |
| if 'amdgpu-target' in flag: |
| return ['-fno-gpu-rdc'] |
| return [ |
| '--amdgpu-target=gfx803', |
| '--amdgpu-target=gfx900', |
| '--amdgpu-target=gfx906', |
| '--amdgpu-target=gfx908', |
| '-fno-gpu-rdc' |
| ] |
| |
| |
| def _get_build_directory(name, verbose): |
| root_extensions_directory = os.environ.get('TORCH_EXTENSIONS_DIR') |
| if root_extensions_directory is None: |
| root_extensions_directory = get_default_build_root() |
| |
| if verbose: |
| print('Using {} as PyTorch extensions root...'.format( |
| root_extensions_directory)) |
| |
| build_directory = os.path.join(root_extensions_directory, name) |
| if not os.path.exists(build_directory): |
| if verbose: |
| print('Creating extension directory {}...'.format(build_directory)) |
| # This is like mkdir -p, i.e. will also create parent directories. |
| os.makedirs(build_directory, exist_ok=True) |
| |
| return build_directory |
| |
| |
| def _get_num_workers(verbose): |
| max_jobs = os.environ.get('MAX_JOBS') |
| if max_jobs is not None and max_jobs.isdigit(): |
| if verbose: |
| print('Using envvar MAX_JOBS ({}) as the number of workers...'.format(max_jobs)) |
| return int(max_jobs) |
| if verbose: |
| print('Allowing ninja to set a default number of workers... ' |
| '(overridable by setting the environment variable MAX_JOBS=N)') |
| return None |
| |
| |
| def _run_ninja_build(build_directory, verbose, error_prefix): |
| command = ['ninja', '-v'] |
| num_workers = _get_num_workers(verbose) |
| if num_workers is not None: |
| command.extend(['-j', str(num_workers)]) |
| env = os.environ.copy() |
| # Try to activate the vc env for the users |
| if IS_WINDOWS and 'VSCMD_ARG_TGT_ARCH' not in env: |
| from distutils.util import get_platform |
| from distutils._msvccompiler import _get_vc_env |
| |
| plat_name = get_platform() |
| plat_spec = PLAT_TO_VCVARS[plat_name] |
| |
| vc_env = _get_vc_env(plat_spec) |
| vc_env = {k.upper(): v for k, v in vc_env.items()} |
| for k, v in env.items(): |
| uk = k.upper() |
| if uk not in vc_env: |
| vc_env[uk] = v |
| env = vc_env |
| try: |
| sys.stdout.flush() |
| sys.stderr.flush() |
| if sys.version_info >= (3, 5): |
| # Warning: don't pass stdout=None to subprocess.run to get output. |
| # subprocess.run assumes that sys.__stdout__ has not been modified and |
| # attempts to write to it by default. However, when we call _run_ninja_build |
| # from ahead-of-time cpp extensions, the following happens: |
| # 1) If the stdout encoding is not utf-8, setuptools detachs __stdout__. |
| # https://github.com/pypa/setuptools/blob/7e97def47723303fafabe48b22168bbc11bb4821/setuptools/dist.py#L1110 |
| # (it probably shouldn't do this) |
| # 2) subprocess.run (on POSIX, with no stdout override) relies on |
| # __stdout__ not being detached: |
| # https://github.com/python/cpython/blob/c352e6c7446c894b13643f538db312092b351789/Lib/subprocess.py#L1214 |
| # To work around this, we pass in the fileno directly and hope that |
| # it is valid. |
| stdout_fileno = 1 |
| subprocess.run( |
| command, |
| stdout=stdout_fileno if verbose else subprocess.PIPE, |
| stderr=subprocess.STDOUT, |
| cwd=build_directory, |
| check=True, |
| env=env) |
| else: |
| subprocess.check_output( |
| command, |
| stderr=subprocess.STDOUT, |
| cwd=build_directory, |
| env=env) |
| except subprocess.CalledProcessError: |
| # Python 2 and 3 compatible way of getting the error object. |
| _, error, _ = sys.exc_info() |
| # error.output contains the stdout and stderr of the build attempt. |
| message = error_prefix |
| if hasattr(error, 'output') and error.output: |
| message += ": {}".format(error.output.decode()) |
| raise RuntimeError(message) |
| |
| |
| def _import_module_from_library(module_name, path, is_python_module): |
| # https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path |
| file, path, description = imp.find_module(module_name, [path]) |
| # Close the .so file after load. |
| with file: |
| if is_python_module: |
| return imp.load_module(module_name, file, path, description) |
| else: |
| torch.ops.load_library(path) |
| |
| |
| def _write_ninja_file_to_build_library(path, |
| name, |
| sources, |
| extra_cflags, |
| extra_cuda_cflags, |
| extra_ldflags, |
| extra_include_paths, |
| with_cuda): |
| extra_cflags = [flag.strip() for flag in extra_cflags] |
| extra_cuda_cflags = [flag.strip() for flag in extra_cuda_cflags] |
| extra_ldflags = [flag.strip() for flag in extra_ldflags] |
| extra_include_paths = [flag.strip() for flag in extra_include_paths] |
| |
| # Turn into absolute paths so we can emit them into the ninja build |
| # file wherever it is. |
| user_includes = [os.path.abspath(file) for file in extra_include_paths] |
| |
| # include_paths() gives us the location of torch/extension.h |
| system_includes = include_paths(with_cuda) |
| # sysconfig.get_paths()['include'] gives us the location of Python.h |
| system_includes.append(sysconfig.get_paths()['include']) |
| |
| # Windows does not understand `-isystem`. |
| if IS_WINDOWS: |
| user_includes += system_includes |
| system_includes.clear() |
| |
| common_cflags = ['-DTORCH_EXTENSION_NAME={}'.format(name)] |
| common_cflags.append('-DTORCH_API_INCLUDE_EXTENSION_H') |
| common_cflags += ['-I{}'.format(include) for include in user_includes] |
| common_cflags += ['-isystem {}'.format(include) for include in system_includes] |
| |
| common_cflags += ['-D_GLIBCXX_USE_CXX11_ABI=' + str(int(torch._C._GLIBCXX_USE_CXX11_ABI))] |
| |
| if IS_WINDOWS: |
| cflags = common_cflags + COMMON_MSVC_FLAGS + extra_cflags |
| from distutils.spawn import _nt_quote_args |
| cflags = _nt_quote_args(cflags) |
| else: |
| cflags = common_cflags + ['-fPIC', '-std=c++14'] + extra_cflags |
| |
| if with_cuda and IS_HIP_EXTENSION: |
| cuda_flags = ['-DWITH_HIP'] + cflags + COMMON_HIPCC_FLAGS |
| cuda_flags += extra_cuda_cflags |
| cuda_flags += _get_rocm_arch_flags(cuda_flags) |
| sources = [s if not _is_cuda_file(s) else |
| os.path.abspath(os.path.join( |
| path, get_hip_file_path(os.path.relpath(s, path)))) |
| for s in sources] |
| elif with_cuda: |
| cuda_flags = common_cflags + COMMON_NVCC_FLAGS + _get_cuda_arch_flags() |
| if IS_WINDOWS: |
| for flag in COMMON_MSVC_FLAGS: |
| cuda_flags = ['-Xcompiler', flag] + cuda_flags |
| for ignore_warning in MSVC_IGNORE_CUDAFE_WARNINGS: |
| cuda_flags = ['-Xcudafe', '--diag_suppress=' + ignore_warning] + cuda_flags |
| cuda_flags = _nt_quote_args(cuda_flags) |
| cuda_flags += _nt_quote_args(extra_cuda_cflags) |
| else: |
| cuda_flags += ['--compiler-options', "'-fPIC'"] |
| cuda_flags += extra_cuda_cflags |
| if not any(flag.startswith('-std=') for flag in cuda_flags): |
| cuda_flags.append('-std=c++14') |
| else: |
| cuda_flags = None |
| |
| def object_file_path(source_file): |
| # '/path/to/file.cpp' -> 'file' |
| file_name = os.path.splitext(os.path.basename(source_file))[0] |
| if _is_cuda_file(source_file) and with_cuda: |
| # Use a different object filename in case a C++ and CUDA file have |
| # the same filename but different extension (.cpp vs. .cu). |
| target = '{}.cuda.o'.format(file_name) |
| else: |
| target = '{}.o'.format(file_name) |
| return target |
| |
| objects = list(map(object_file_path, sources)) |
| |
| if IS_WINDOWS: |
| ldflags = ['/DLL'] + extra_ldflags |
| else: |
| ldflags = ['-shared'] + extra_ldflags |
| # The darwin linker needs explicit consent to ignore unresolved symbols. |
| if sys.platform.startswith('darwin'): |
| ldflags.append('-undefined dynamic_lookup') |
| elif IS_WINDOWS: |
| ldflags = _nt_quote_args(ldflags) |
| |
| ext = 'pyd' if IS_WINDOWS else 'so' |
| library_target = '{}.{}'.format(name, ext) |
| |
| _write_ninja_file( |
| path=path, |
| cflags=cflags, |
| post_cflags=None, |
| cuda_cflags=cuda_flags, |
| cuda_post_cflags=None, |
| sources=sources, |
| objects=objects, |
| ldflags=ldflags, |
| library_target=library_target, |
| with_cuda=with_cuda) |
| |
| |
| def _write_ninja_file(path, |
| cflags, |
| post_cflags, |
| cuda_cflags, |
| cuda_post_cflags, |
| sources, |
| objects, |
| ldflags, |
| library_target, |
| with_cuda): |
| r"""Write a ninja file that does the desired compiling and linking. |
| |
| `path`: Where to write this file |
| `cflags`: list of flags to pass to $cxx. Can be None. |
| `post_cflags`: list of flags to append to the $cxx invocation. Can be None. |
| `cuda_cflags`: list of flags to pass to $nvcc. Can be None. |
| `cuda_postflags`: list of flags to append to the $nvcc invocation. Can be None. |
| `sources`: list of paths to source files |
| `objects`: list of desired paths to objects, one per source. |
| `ldflags`: list of flags to pass to linker. Can be None. |
| `library_target`: Name of the output library. Can be None; in that case, |
| we do no linking. |
| `with_cuda`: If we should be compiling with CUDA. |
| """ |
| def sanitize_flags(flags): |
| if flags is None: |
| return [] |
| else: |
| return [flag.strip() for flag in flags] |
| |
| cflags = sanitize_flags(cflags) |
| post_cflags = sanitize_flags(post_cflags) |
| cuda_cflags = sanitize_flags(cuda_cflags) |
| cuda_post_cflags = sanitize_flags(cuda_post_cflags) |
| ldflags = sanitize_flags(ldflags) |
| |
| # Sanity checks... |
| assert len(sources) == len(objects) |
| assert len(sources) > 0 |
| |
| if IS_WINDOWS: |
| compiler = os.environ.get('CXX', 'cl') |
| else: |
| compiler = os.environ.get('CXX', 'c++') |
| |
| # Version 1.3 is required for the `deps` directive. |
| config = ['ninja_required_version = 1.3'] |
| config.append('cxx = {}'.format(compiler)) |
| if with_cuda: |
| if IS_HIP_EXTENSION: |
| nvcc = _join_rocm_home('bin', 'hipcc') |
| else: |
| nvcc = _join_cuda_home('bin', 'nvcc') |
| config.append('nvcc = {}'.format(nvcc)) |
| |
| flags = ['cflags = {}'.format(' '.join(cflags))] |
| flags.append('post_cflags = {}'.format(' '.join(post_cflags))) |
| if with_cuda: |
| flags.append('cuda_cflags = {}'.format(' '.join(cuda_cflags))) |
| flags.append('cuda_post_cflags = {}'.format(' '.join(cuda_post_cflags))) |
| flags.append('ldflags = {}'.format(' '.join(ldflags))) |
| |
| # Turn into absolute paths so we can emit them into the ninja build |
| # file wherever it is. |
| sources = [os.path.abspath(file) for file in sources] |
| |
| # See https://ninja-build.org/build.ninja.html for reference. |
| compile_rule = ['rule compile'] |
| if IS_WINDOWS: |
| compile_rule.append( |
| ' command = cl /showIncludes $cflags -c $in /Fo$out $post_cflags') |
| compile_rule.append(' deps = msvc') |
| else: |
| compile_rule.append( |
| ' command = $cxx -MMD -MF $out.d $cflags -c $in -o $out $post_cflags') |
| compile_rule.append(' depfile = $out.d') |
| compile_rule.append(' deps = gcc') |
| |
| if with_cuda: |
| cuda_compile_rule = ['rule cuda_compile'] |
| cuda_compile_rule.append( |
| ' command = $nvcc $cuda_cflags -c $in -o $out $cuda_post_cflags') |
| |
| # Emit one build rule per source to enable incremental build. |
| build = [] |
| for source_file, object_file in zip(sources, objects): |
| is_cuda_source = _is_cuda_file(source_file) and with_cuda |
| rule = 'cuda_compile' if is_cuda_source else 'compile' |
| if IS_WINDOWS: |
| source_file = source_file.replace(':', '$:') |
| object_file = object_file.replace(':', '$:') |
| source_file = source_file.replace(" ", "$ ") |
| object_file = object_file.replace(" ", "$ ") |
| build.append('build {}: {} {}'.format(object_file, rule, source_file)) |
| |
| if library_target is not None: |
| link_rule = ['rule link'] |
| if IS_WINDOWS: |
| cl_paths = subprocess.check_output(['where', |
| 'cl']).decode().split('\r\n') |
| if len(cl_paths) >= 1: |
| cl_path = os.path.dirname(cl_paths[0]).replace(':', '$:') |
| else: |
| raise RuntimeError("MSVC is required to load C++ extensions") |
| link_rule.append( |
| ' command = "{}/link.exe" $in /nologo $ldflags /out:$out'.format( |
| cl_path)) |
| else: |
| link_rule.append(' command = $cxx $in $ldflags -o $out') |
| |
| link = ['build {}: link {}'.format(library_target, ' '.join(objects))] |
| |
| default = ['default {}'.format(library_target)] |
| else: |
| link_rule, link, default = [], [], [] |
| |
| # 'Blocks' should be separated by newlines, for visual benefit. |
| blocks = [config, flags, compile_rule] |
| if with_cuda: |
| blocks.append(cuda_compile_rule) |
| blocks += [link_rule, build, link, default] |
| with open(path, 'w') as build_file: |
| for block in blocks: |
| lines = '\n'.join(block) |
| build_file.write('{}\n\n'.format(lines)) |
| |
| |
| def _join_cuda_home(*paths): |
| r''' |
| Joins paths with CUDA_HOME, or raises an error if it CUDA_HOME is not set. |
| |
| This is basically a lazy way of raising an error for missing $CUDA_HOME |
| only once we need to get any CUDA-specific path. |
| ''' |
| if CUDA_HOME is None: |
| raise EnvironmentError('CUDA_HOME environment variable is not set. ' |
| 'Please set it to your CUDA install root.') |
| return os.path.join(CUDA_HOME, *paths) |
| |
| |
| def _is_cuda_file(path): |
| valid_ext = ['.cu', '.cuh'] |
| if IS_HIP_EXTENSION: |
| valid_ext.append('.hip') |
| return os.path.splitext(path)[1] in valid_ext |