torch/utils/cpp_extension.py - platform/external/pytorch - Git at Google

 import copy
 import glob
 import imp
 import os
 import re
 import setuptools
 import subprocess
 import sys
 import sysconfig
 import tempfile
 import warnings

 import torch
 from .file_baton import FileBaton

 from setuptools.command.build_ext import build_ext


 def _find_cuda_home():
     '''Finds the CUDA install path.'''
     # Guess #1
     cuda_home = os.environ.get('CUDA_HOME') or os.environ.get('CUDA_PATH')
     if cuda_home is None:
         # Guess #2
         if sys.platform == 'win32':
             cuda_home = glob.glob(
                 'C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v*.*')
         else:
             cuda_home = '/usr/local/cuda'
         if not os.path.exists(cuda_home):
             # Guess #3
             try:
                 which = 'where' if sys.platform == 'win32' else 'which'
                 nvcc = subprocess.check_output(
                     [which, 'nvcc']).decode().rstrip('\r\n')
                 cuda_home = os.path.dirname(os.path.dirname(nvcc))
             except Exception:
                 cuda_home = None
     return cuda_home


 MINIMUM_GCC_VERSION = (4, 9)
 MINIMUM_MSVC_VERSION = (19, 0, 24215)
 ABI_INCOMPATIBILITY_WARNING = '''

                                !! WARNING !!

 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 Your compiler ({}) may be ABI-incompatible with PyTorch!
 Please use a compiler that is ABI-compatible with GCC 4.9 and above.
 See https://gcc.gnu.org/onlinedocs/libstdc++/manual/abi.html.

 See https://gist.github.com/goldsborough/d466f43e8ffc948ff92de7486c5216d6
 for instructions on how to install GCC 4.9 or higher.
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

                               !! WARNING !!
 '''
 CUDA_HOME = _find_cuda_home() if torch.cuda.is_available() else None


 def check_compiler_abi_compatibility(compiler):
     '''
     Verifies that the given compiler is ABI-compatible with PyTorch.

     Arguments:
         compiler (str): The compiler executable name to check (e.g. ``g++``).
             Must be executable in a shell process.

     Returns:
         False if the compiler is (likely) ABI-incompatible with PyTorch,
         else True.
     '''
     try:
         check_cmd = '{}' if sys.platform == 'win32' else '{} --version'
         info = subprocess.check_output(
             check_cmd.format(compiler).split(), stderr=subprocess.STDOUT)
     except Exception:
         _, error, _ = sys.exc_info()
         warnings.warn('Error checking compiler version: {}'.format(error))
     else:
         info = info.decode().lower()
         if 'gcc' in info or 'g++' in info:
             # Sometimes the version is given as "major.x" instead of semver.
             version = re.search(r'(\d+)\.(\d+|x)', info)
             if version is not None:
                 major, minor = version.groups()
                 minor = 0 if minor == 'x' else int(minor)
                 if (int(major), minor) >= MINIMUM_GCC_VERSION:
                     return True
                 else:
                     # Append the detected version for the warning.
                     compiler = '{} {}'.format(compiler, version.group(0))
         elif 'Microsoft' in info:
             info = info.decode().lower()
             version = re.search(r'(\d+)\.(\d+)\.(\d+)', info)
             if version is not None:
                 major, minor, revision = version.groups()
                 if (int(major), int(minor),
                         int(revision)) >= MINIMUM_MSVC_VERSION:
                     return True
                 else:
                     # Append the detected version for the warning.
                     compiler = '{} {}'.format(compiler, version.group(0))

     warnings.warn(ABI_INCOMPATIBILITY_WARNING.format(compiler))
     return False


 class BuildExtension(build_ext):
     '''
     A custom :mod:`setuptools` build extension .

     This :class:`setuptools.build_ext` subclass takes care of passing the
     minimum required compiler flags (e.g. ``-std=c++11``) as well as mixed
     C++/CUDA compilation (and support for CUDA files in general).

     When using :class:`BuildExtension`, it is allowed to supply a dictionary
     for ``extra_compile_args`` (rather than the usual list) that maps from
     languages (``cxx`` or ``cuda``) to a list of additional compiler flags to
     supply to the compiler. This makes it possible to supply different flags to
     the C++ and CUDA compiler during mixed compilation.
     '''

     def build_extensions(self):
         self._check_abi()
         for extension in self.extensions:
             self._define_torch_extension_name(extension)

         # Register .cu and .cuh as valid source extensions.
         self.compiler.src_extensions += ['.cu', '.cuh']
         # Save the original _compile method for later.
         if self.compiler.compiler_type == 'msvc':
             self.compiler._cpp_extensions += ['.cu', '.cuh']
             original_compile = self.compiler.compile
             original_spawn = self.compiler.spawn
         else:
             original_compile = self.compiler._compile

         def unix_wrap_compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
             # Copy before we make any modifications.
             cflags = copy.deepcopy(extra_postargs)
             try:
                 original_compiler = self.compiler.compiler_so
                 if _is_cuda_file(src):
                     nvcc = _join_cuda_home('bin', 'nvcc')
                     self.compiler.set_executable('compiler_so', nvcc)
                     if isinstance(cflags, dict):
                         cflags = cflags['nvcc']
                     cflags += ['--compiler-options', "'-fPIC'"]
                 elif isinstance(cflags, dict):
                     cflags = cflags['cxx']
                 # NVCC does not allow multiple -std to be passed, so we avoid
                 # overriding the option if the user explicitly passed it.
                 if not any(flag.startswith('-std=') for flag in cflags):
                     cflags.append('-std=c++11')

                 original_compile(obj, src, ext, cc_args, cflags, pp_opts)
             finally:
                 # Put the original compiler back in place.
                 self.compiler.set_executable('compiler_so', original_compiler)

         def win_wrap_compile(sources,
                              output_dir=None,
                              macros=None,
                              include_dirs=None,
                              debug=0,
                              extra_preargs=None,
                              extra_postargs=None,
                              depends=None):

             self.cflags = copy.deepcopy(extra_postargs)
             extra_postargs = None

             def spawn(cmd):
                 orig_cmd = cmd
                 # Using regex to match src, obj and include files

                 src_regex = re.compile('/T(p|c)(.*)')
                 src_list = [
                     m.group(2) for m in (src_regex.match(elem) for elem in cmd)
                     if m
                 ]

                 obj_regex = re.compile('/Fo(.*)')
                 obj_list = [
                     m.group(1) for m in (obj_regex.match(elem) for elem in cmd)
                     if m
                 ]

                 include_regex = re.compile(r'((\-|\/)I.*)')
                 include_list = [
                     m.group(1)
                     for m in (include_regex.match(elem) for elem in cmd) if m
                 ]

                 if len(src_list) >= 1 and len(obj_list) >= 1:
                     src = src_list[0]
                     obj = obj_list[0]
                     if _is_cuda_file(src):
                         nvcc = _join_cuda_home('bin', 'nvcc')
                         if isinstance(self.cflags, dict):
                             cflags = self.cflags['nvcc']
                         elif isinstance(self.cflags, list):
                             cflags = self.cflags
                         else:
                             cflags = []
                         cmd = [
                             nvcc, '-c', src, '-o', obj, '-Xcompiler',
                             '/wd4819', '-Xcompiler', '/MD'
                         ] + include_list + cflags
                     elif isinstance(self.cflags, dict):
                         cflags = self.cflags['cxx']
                         cmd += cflags
                     elif isinstance(self.cflags, list):
                         cflags = self.cflags
                         cmd += cflags

                 return original_spawn(cmd)

             try:
                 self.compiler.spawn = spawn
                 return original_compile(sources, output_dir, macros,
                                         include_dirs, debug, extra_preargs,
                                         extra_postargs, depends)
             finally:
                 self.compiler.spawn = original_spawn

         # Monkey-patch the _compile method.
         if self.compiler.compiler_type == 'msvc':
             self.compiler.compile = win_wrap_compile
         else:
             self.compiler._compile = unix_wrap_compile

         build_ext.build_extensions(self)

     def _check_abi(self):
         # On some platforms, like Windows, compiler_cxx is not available.
         if hasattr(self.compiler, 'compiler_cxx'):
             compiler = self.compiler.compiler_cxx[0]
         elif sys.platform == 'win32':
             compiler = os.environ.get('CXX', 'cl')
         else:
             compiler = os.environ.get('CXX', 'c++')
         check_compiler_abi_compatibility(compiler)

     def _define_torch_extension_name(self, extension):
         define = '-DTORCH_EXTENSION_NAME={}'.format(extension.name)
         if isinstance(extension.extra_compile_args, dict):
             for args in extension.extra_compile_args.values():
                 args.append(define)
         else:
             extension.extra_compile_args.append(define)


 def CppExtension(name, sources, *args, **kwargs):
     '''
     Creates a :class:`setuptools.Extension` for C++.

     Convenience method that creates a :class:`setuptools.Extension` with the
     bare minimum (but often sufficient) arguments to build a C++ extension.

     All arguments are forwarded to the :class:`setuptools.Extension`
     constructor.

     Example:
         >>> from setuptools import setup
         >>> from torch.utils.cpp_extension import BuildExtension, CppExtension
         >>> setup(
                 name='extension',
                 ext_modules=[
                     CppExtension(
                         name='extension',
                         sources=['extension.cpp'],
                         extra_compile_args=['-g'])),
                 ],
                 cmdclass={
                     'build_ext': BuildExtension
                 })
     '''
     include_dirs = kwargs.get('include_dirs', [])
     include_dirs += include_paths()
     kwargs['include_dirs'] = include_dirs

     if sys.platform == 'win32':
         library_dirs = kwargs.get('library_dirs', [])
         library_dirs += library_paths()
         kwargs['library_dirs'] = library_dirs

         libraries = kwargs.get('libraries', [])
         libraries.append('ATen')
         libraries.append('_C')
         kwargs['libraries'] = libraries

     kwargs['language'] = 'c++'
     return setuptools.Extension(name, sources, *args, **kwargs)


 def CUDAExtension(name, sources, *args, **kwargs):
     '''
     Creates a :class:`setuptools.Extension` for CUDA/C++.

     Convenience method that creates a :class:`setuptools.Extension` with the
     bare minimum (but often sufficient) arguments to build a CUDA/C++
     extension. This includes the CUDA include path, library path and runtime
     library.

     All arguments are forwarded to the :class:`setuptools.Extension`
     constructor.

     Example:
         >>> from setuptools import setup
         >>> from torch.utils.cpp_extension import BuildExtension, CppExtension
         >>> setup(
                 name='cuda_extension',
                 ext_modules=[
                     CUDAExtension(
                             name='cuda_extension',
                             sources=['extension.cpp', 'extension_kernel.cu'],
                             extra_compile_args={'cxx': ['-g'],
                                                 'nvcc': ['-O2']})
                 ],
                 cmdclass={
                     'build_ext': BuildExtension
                 })
     '''
     library_dirs = kwargs.get('library_dirs', [])
     library_dirs += library_paths(cuda=True)
     kwargs['library_dirs'] = library_dirs

     libraries = kwargs.get('libraries', [])
     libraries.append('cudart')
     if sys.platform == 'win32':
         libraries.append('ATen')
         libraries.append('_C')
     kwargs['libraries'] = libraries

     include_dirs = kwargs.get('include_dirs', [])
     include_dirs += include_paths(cuda=True)
     kwargs['include_dirs'] = include_dirs

     kwargs['language'] = 'c++'

     return setuptools.Extension(name, sources, *args, **kwargs)


 def include_paths(cuda=False):
     '''
     Get the include paths required to build a C++ or CUDA extension.

     Args:
         cuda: If `True`, includes CUDA-specific include paths.

     Returns:
         A list of include path strings.
     '''
     here = os.path.abspath(__file__)
     torch_path = os.path.dirname(os.path.dirname(here))
     lib_include = os.path.join(torch_path, 'lib', 'include')
     # Some internal (old) Torch headers don't properly prefix their includes,
     # so we need to pass -Itorch/lib/include/TH as well.
     paths = [
         lib_include,
         os.path.join(lib_include, 'TH'),
         os.path.join(lib_include, 'THC')
     ]
     if cuda:
         paths.append(_join_cuda_home('include'))
     return paths


 def library_paths(cuda=False):
     '''
     Get the library paths required to build a C++ or CUDA extension.

     Args:
         cuda: If `True`, includes CUDA-specific library paths.

     Returns:
         A list of library path strings.
     '''
     paths = []

     if sys.platform == 'win32':
         here = os.path.abspath(__file__)
         torch_path = os.path.dirname(os.path.dirname(here))
         lib_path = os.path.join(torch_path, 'lib')

         paths.append(lib_path)

     if cuda:
         lib_dir = 'lib/x64' if sys.platform == 'win32' else 'lib64'
         paths.append(_join_cuda_home(lib_dir))
     return paths


 def load(name,
          sources,
          extra_cflags=None,
          extra_cuda_cflags=None,
          extra_ldflags=None,
          extra_include_paths=None,
          build_directory=None,
          verbose=False):
     '''
     Loads a PyTorch C++ extension just-in-time (JIT).

     To load an extension, a Ninja build file is emitted, which is used to
     compile the given sources into a dynamic library. This library is
     subsequently loaded into the current Python process as a module and
     returned from this function, ready for use.

     By default, the directory to which the build file is emitted and the
     resulting library compiled to is ``<tmp>/torch_extensions/<name>``, where
     ``<tmp>`` is the temporary folder on the current platform and ``<name>``
     the name of the extension. This location can be overridden in two ways.
     First, if the ``TORCH_EXTENSIONS_DIR`` environment variable is set, it
     replaces ``<tmp>/torch_extensions`` and all extensions will be compiled
     into subfolders of this directory. Second, if the ``build_directory``
     argument to this function is supplied, it overrides the entire path, i.e.
     the library will be compiled into that folder directly.

     To compile the sources, the default system compiler (``c++``) is used,
     which can be overridden by setting the ``CXX`` environment variable. To pass
     additional arguments to the compilation process, ``extra_cflags`` or
     ``extra_ldflags`` can be provided. For example, to compile your extension
     with optimizations, pass ``extra_cflags=['-O3']``. You can also use
     ``extra_cflags`` to pass further include directories.

     CUDA support with mixed compilation is provided. Simply pass CUDA source
     files (``.cu`` or ``.cuh``) along with other sources. Such files will be
     detected and compiled with nvcc rather than the C++ compiler. This includes
     passing the CUDA lib64 directory as a library directory, and linking
     ``cudart``. You can pass additional flags to nvcc via
     ``extra_cuda_cflags``, just like with ``extra_cflags`` for C++. Various
     heuristics for finding the CUDA install directory are used, which usually
     work fine. If not, setting the ``CUDA_HOME`` environment variable is the
     safest option.

     Args:
         name: The name of the extension to build. This MUST be the same as the
             name of the pybind11 module!
         sources: A list of relative or absolute paths to C++ source files.
         extra_cflags: optional list of compiler flags to forward to the build.
         extra_cuda_cflags: optional list of compiler flags to forward to nvcc
             when building CUDA sources.
         extra_ldflags: optional list of linker flags to forward to the build.
         extra_include_paths: optional list of include directories to forward
             to the build.
         build_directory: optional path to use as build workspace.
         verbose: If ``True``, turns on verbose logging of load steps.

     Returns:
         The loaded PyTorch extension as a Python module.

     Example:
         >>> from torch.utils.cpp_extension import load
         >>> module = load(
                 name='extension',
                 sources=['extension.cpp', 'extension_kernel.cu'],
                 extra_cflags=['-O2'],
                 verbose=True)
     '''

     verify_ninja_availability()

     # Allows sources to be a single path or a list of paths.
     if isinstance(sources, str):
         sources = [sources]

     if build_directory is None:
         build_directory = _get_build_directory(name, verbose)

     baton = FileBaton(os.path.join(build_directory, 'lock'))

     if baton.try_acquire():
         try:
             with_cuda = any(map(_is_cuda_file, sources))
             extra_ldflags = _prepare_ldflags(
                 extra_ldflags or [],
                 with_cuda,
                 verbose)
             build_file_path = os.path.join(build_directory, 'build.ninja')
             if verbose:
                 print(
                     'Emitting ninja build file {}...'.format(build_file_path))
             # NOTE: Emitting a new ninja build file does not cause re-compilation if
             # the sources did not change, so it's ok to re-emit (and it's fast).
             _write_ninja_file(
                 path=build_file_path,
                 name=name,
                 sources=sources,
                 extra_cflags=extra_cflags or [],
                 extra_cuda_cflags=extra_cuda_cflags or [],
                 extra_ldflags=extra_ldflags or [],
                 extra_include_paths=extra_include_paths or [],
                 with_cuda=with_cuda)

             if verbose:
                 print('Building extension module {}...'.format(name))
             _build_extension_module(name, build_directory)
         finally:
             baton.release()
     else:
         baton.wait()

     if verbose:
         print('Loading extension module {}...'.format(name))
     return _import_module_from_library(name, build_directory)


 def verify_ninja_availability():
     '''
     Returns ``True`` if the `ninja <https://ninja-build.org/>`_ build system is
     available on the system.
     '''
     with open(os.devnull, 'wb') as devnull:
         try:
             subprocess.check_call('ninja --version'.split(), stdout=devnull)
         except OSError:
             raise RuntimeError("Ninja is required to load C++ extensions")


 def _prepare_ldflags(extra_ldflags, with_cuda, verbose):
     if sys.platform == 'win32':
         python_path = os.path.dirname(sys.executable)
         python_lib_path = os.path.join(python_path, 'libs')

         here = os.path.abspath(__file__)
         torch_path = os.path.dirname(os.path.dirname(here))
         lib_path = os.path.join(torch_path, 'lib')

         extra_ldflags.append('ATen.lib')
         extra_ldflags.append('_C.lib')
         extra_ldflags.append('/LIBPATH:{}'.format(python_lib_path))
         extra_ldflags.append('/LIBPATH:{}'.format(lib_path))

     if with_cuda:
         if verbose:
             print('Detected CUDA files, patching ldflags')
         if sys.platform == 'win32':
             extra_ldflags.append('/LIBPATH:{}'.format(
                 _join_cuda_home('lib/x64')))
             extra_ldflags.append('cudart.lib')
         else:
             extra_ldflags.append('-L{}'.format(_join_cuda_home('lib64')))
             extra_ldflags.append('-lcudart')

     return extra_ldflags


 def _get_build_directory(name, verbose):
     root_extensions_directory = os.environ.get('TORCH_EXTENSIONS_DIR')
     if root_extensions_directory is None:
         # tempfile.gettempdir() will be /tmp on UNIX and \TEMP on Windows.
         root_extensions_directory = os.path.join(tempfile.gettempdir(),
                                                  'torch_extensions')

     if verbose:
         print('Using {} as PyTorch extensions root...'.format(
             root_extensions_directory))

     build_directory = os.path.join(root_extensions_directory, name)
     if not os.path.exists(build_directory):
         if verbose:
             print('Creating extension directory {}...'.format(build_directory))
         # This is like mkdir -p, i.e. will also create parent directories.
         os.makedirs(build_directory)

     return build_directory


 def _build_extension_module(name, build_directory):
     try:
         subprocess.check_output(
             ['ninja', '-v'], stderr=subprocess.STDOUT, cwd=build_directory)
     except subprocess.CalledProcessError:
         # Python 2 and 3 compatible way of getting the error object.
         _, error, _ = sys.exc_info()
         # error.output contains the stdout and stderr of the build attempt.
         raise RuntimeError("Error building extension '{}': {}".format(
             name, error.output.decode()))


 def _import_module_from_library(module_name, path):
     # https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path
     file, path, description = imp.find_module(module_name, [path])
     # Close the .so file after load.
     with file:
         return imp.load_module(module_name, file, path, description)


 def _write_ninja_file(path,
                       name,
                       sources,
                       extra_cflags,
                       extra_cuda_cflags,
                       extra_ldflags,
                       extra_include_paths,
                       with_cuda=False):
     # Version 1.3 is required for the `deps` directive.
     config = ['ninja_required_version = 1.3']
     config.append('cxx = {}'.format(os.environ.get('CXX', 'c++')))
     if with_cuda:
         config.append('nvcc = {}'.format(_join_cuda_home('bin', 'nvcc')))

     # Turn into absolute paths so we can emit them into the ninja build
     # file wherever it is.
     sources = [os.path.abspath(file) for file in sources]
     includes = [os.path.abspath(file) for file in extra_include_paths]

     # include_paths() gives us the location of torch/torch.h
     includes += include_paths(with_cuda)
     # sysconfig.get_paths()['include'] gives us the location of Python.h
     includes.append(sysconfig.get_paths()['include'])

     common_cflags = ['-DTORCH_EXTENSION_NAME={}'.format(name)]
     common_cflags += ['-I{}'.format(include) for include in includes]

     cflags = common_cflags + ['-fPIC', '-std=c++11'] + extra_cflags
     if sys.platform == 'win32':
         from distutils.spawn import _nt_quote_args
         cflags = _nt_quote_args(cflags)
     flags = ['cflags = {}'.format(' '.join(cflags))]

     if with_cuda:
         cuda_flags = common_cflags
         if sys.platform == 'win32':
             cuda_flags = _nt_quote_args(cuda_flags)
         else:
             cuda_flags += ['--compiler-options', "'-fPIC'"]
             cuda_flags += extra_cuda_cflags
             if not any(flag.startswith('-std=') for flag in cuda_flags):
                 cuda_flags.append('-std=c++11')

         flags.append('cuda_flags = {}'.format(' '.join(cuda_flags)))

     if sys.platform == 'win32':
         ldflags = ['/DLL'] + extra_ldflags
     else:
         ldflags = ['-shared'] + extra_ldflags
     # The darwin linker needs explicit consent to ignore unresolved symbols.
     if sys.platform == 'darwin':
         ldflags.append('-undefined dynamic_lookup')
     elif sys.platform == 'win32':
         ldflags = _nt_quote_args(ldflags)
     flags.append('ldflags = {}'.format(' '.join(ldflags)))

     # See https://ninja-build.org/build.ninja.html for reference.
     compile_rule = ['rule compile']
     if sys.platform == 'win32':
         compile_rule.append(
             '  command = cl /showIncludes $cflags -c $in /Fo$out')
         compile_rule.append('  deps = msvc')
     else:
         compile_rule.append(
             '  command = $cxx -MMD -MF $out.d $cflags -c $in -o $out')
         compile_rule.append('  depfile = $out.d')
         compile_rule.append('  deps = gcc')

     if with_cuda:
         cuda_compile_rule = ['rule cuda_compile']
         cuda_compile_rule.append(
             '  command = $nvcc $cuda_flags -c $in -o $out')

     link_rule = ['rule link']
     if sys.platform == 'win32':
         cl_paths = subprocess.check_output(['where',
                                             'cl']).decode().split('\r\n')
         if len(cl_paths) >= 1:
             cl_path = os.path.dirname(cl_paths[0]).replace(':', '$:')
         else:
             raise RuntimeError("MSVC is required to load C++ extensions")
         link_rule.append(
             '  command = "{}/link.exe" $in /nologo $ldflags /out:$out'.format(
                 cl_path))
     else:
         link_rule.append('  command = $cxx $ldflags $in -o $out')

     # Emit one build rule per source to enable incremental build.
     object_files = []
     build = []
     for source_file in sources:
         # '/path/to/file.cpp' -> 'file'
         file_name = os.path.splitext(os.path.basename(source_file))[0]
         if _is_cuda_file(source_file):
             rule = 'cuda_compile'
             # Use a different object filename in case a C++ and CUDA file have
             # the same filename but different extension (.cpp vs. .cu).
             target = '{}.cuda.o'.format(file_name)
         else:
             rule = 'compile'
             target = '{}.o'.format(file_name)
         object_files.append(target)
         if sys.platform == 'win32':
             source_file = source_file.replace(':', '$:')
         build.append('build {}: {} {}'.format(target, rule, source_file))

     ext = '.pyd' if sys.platform == 'win32' else '.so'
     library_target = '{}{}'.format(name, ext)
     link = ['build {}: link {}'.format(library_target, ' '.join(object_files))]

     default = ['default {}'.format(library_target)]

     # 'Blocks' should be separated by newlines, for visual benefit.
     blocks = [config, flags, compile_rule]
     if with_cuda:
         blocks.append(cuda_compile_rule)
     blocks += [link_rule, build, link, default]
     with open(path, 'w') as build_file:
         for block in blocks:
             lines = '\n'.join(block)
             build_file.write('{}\n\n'.format(lines))


 def _join_cuda_home(*paths):
     '''
     Joins paths with CUDA_HOME, or raises an error if it CUDA_HOME is not set.

     This is basically a lazy way of raising an error for missing $CUDA_HOME
     only once we need to get any CUDA-specific path.
     '''
     if CUDA_HOME is None:
         raise EnvironmentError('CUDA_HOME environment variable is not set. '
                                'Please set it to your CUDA install root.')
     return os.path.join(CUDA_HOME, *paths)


 def _is_cuda_file(path):
     return os.path.splitext(path)[1] in ['.cu', '.cuh']