| #!/usr/bin/env python |
| """ The Python Hipify script. |
| ## |
| # Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. |
| # 2017-2018 Advanced Micro Devices, Inc. and |
| # Facebook Inc. All rights reserved. |
| # |
| # Permission is hereby granted, free of charge, to any person obtaining a copy |
| # of this software and associated documentation files (the "Software"), to deal |
| # in the Software without restriction, including without limitation the rights |
| # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| # copies of the Software, and to permit persons to whom the Software is |
| # furnished to do so, subject to the following conditions: |
| # |
| # The above copyright notice and this permission notice shall be included in |
| # all copies or substantial portions of the Software. |
| # |
| # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
| # THE SOFTWARE. |
| """ |
| |
| from __future__ import absolute_import, division, print_function |
| import argparse |
| import fnmatch |
| import re |
| import shutil |
| import sys |
| import os |
| |
| from . import constants |
| from .cuda_to_hip_mappings import CUDA_TO_HIP_MAPPINGS |
| from .cuda_to_hip_mappings import MATH_TRANSPILATIONS |
| |
| # Hardcode the PyTorch template map |
| """This dictionary provides the mapping from PyTorch kernel template types |
| to their actual types.""" |
| PYTORCH_TEMPLATE_MAP = {"Dtype": "scalar_t", "T": "scalar_t"} |
| CAFFE2_TEMPLATE_MAP = {} |
| |
| |
| class InputError(Exception): |
| # Exception raised for errors in the input. |
| |
| def __init__(self, message): |
| super(InputError, self).__init__(message) |
| self.message = message |
| |
| def __str__(self): |
| return "{}: {}".format("Input error", self.message) |
| |
| |
| def openf(filename, mode): |
| if sys.version_info[0] == 3: |
| return open(filename, mode, errors='ignore') |
| else: |
| return open(filename, mode) |
| |
| |
| # Color coding for printing |
| class bcolors: |
| HEADER = '\033[95m' |
| OKBLUE = '\033[94m' |
| OKGREEN = '\033[92m' |
| WARNING = '\033[93m' |
| FAIL = '\033[91m' |
| ENDC = '\033[0m' |
| BOLD = '\033[1m' |
| UNDERLINE = '\033[4m' |
| |
| |
| def matched_files_iter(root_path, includes=('*',), ignores=(), extensions=(), out_of_place_only=False): |
| def _fnmatch(filepath, patterns): |
| return any(fnmatch.fnmatch(filepath, pattern) for pattern in patterns) |
| |
| def match_extensions(filename): |
| """Helper method to see if filename ends with certain extension""" |
| return any(filename.endswith(e) for e in extensions) |
| |
| exact_matches = set(includes) |
| |
| # This is a very rough heuristic; really, we want to avoid scanning |
| # any file which is not checked into source control, but this script |
| # needs to work even if you're in a Git or Hg checkout, so easier to |
| # just blacklist the biggest time sinks that won't matter in the |
| # end. |
| for (abs_dirpath, dirs, filenames) in os.walk(root_path, topdown=True): |
| rel_dirpath = os.path.relpath(abs_dirpath, root_path) |
| if rel_dirpath == '.': |
| # Blah blah blah O(n) blah blah |
| if ".git" in dirs: |
| dirs.remove(".git") |
| if "build" in dirs: |
| dirs.remove("build") |
| if "third_party" in dirs: |
| dirs.remove("third_party") |
| for filename in filenames: |
| filepath = os.path.join(rel_dirpath, filename) |
| # We respect extensions, UNLESS you wrote the entire |
| # filename verbatim, in which case we always accept it |
| if ( |
| _fnmatch(filepath, includes) |
| and (not _fnmatch(filepath, ignores)) |
| and (match_extensions(filepath) or filepath in exact_matches) |
| ): |
| if not is_pytorch_file(filepath) and not is_caffe2_gpu_file(filepath): |
| continue |
| if out_of_place_only and not is_out_of_place(filepath): |
| continue |
| yield filepath |
| |
| |
| def preprocess( |
| output_directory, |
| all_files, |
| show_detailed=False, |
| show_progress=True, |
| hip_clang_launch=False): |
| """ |
| Call preprocessor on selected files. |
| |
| Arguments) |
| show_detailed - Show a detailed summary of the transpilation process. |
| """ |
| |
| # Preprocessing statistics. |
| stats = {"unsupported_calls": [], "kernel_launches": []} |
| |
| for filepath in all_files: |
| result = preprocessor(output_directory, filepath, stats, hip_clang_launch) |
| # Show what happened |
| if show_progress: |
| print( |
| filepath, "->", |
| get_hip_file_path(filepath), result) |
| |
| print(bcolors.OKGREEN + "Successfully preprocessed all matching files." + bcolors.ENDC, file=sys.stderr) |
| |
| # Show detailed summary |
| if show_detailed: |
| compute_stats(stats) |
| |
| |
| def compute_stats(stats): |
| unsupported_calls = {cuda_call for (cuda_call, _filepath) in stats["unsupported_calls"]} |
| |
| # Print the number of unsupported calls |
| print("Total number of unsupported CUDA function calls: {0:d}".format(len(unsupported_calls))) |
| |
| # Print the list of unsupported calls |
| print(", ".join(unsupported_calls)) |
| |
| # Print the number of kernel launches |
| print("\nTotal number of replaced kernel launches: {0:d}".format(len(stats["kernel_launches"]))) |
| |
| |
| def add_dim3(kernel_string, cuda_kernel): |
| '''adds dim3() to the second and third arguments in the kernel launch''' |
| count = 0 |
| closure = 0 |
| kernel_string = kernel_string.replace("<<<", "").replace(">>>", "") |
| arg_locs = [{} for _ in range(2)] |
| arg_locs[count]['start'] = 0 |
| for ind, c in enumerate(kernel_string): |
| if count > 1: |
| break |
| if c == "(": |
| closure += 1 |
| elif c == ")": |
| closure -= 1 |
| elif (c == "," or ind == len(kernel_string) - 1) and closure == 0: |
| arg_locs[count]['end'] = ind + (c != ",") |
| count += 1 |
| if count < 2: |
| arg_locs[count]['start'] = ind + 1 |
| |
| first_arg_raw = kernel_string[arg_locs[0]['start']:arg_locs[0]['end'] + 1] |
| second_arg_raw = kernel_string[arg_locs[1]['start']:arg_locs[1]['end']] |
| |
| first_arg_clean = kernel_string[arg_locs[0]['start']:arg_locs[0]['end']].replace("\n", "").strip(" ") |
| second_arg_clean = kernel_string[arg_locs[1]['start']:arg_locs[1]['end']].replace("\n", "").strip(" ") |
| |
| first_arg_dim3 = "dim3({})".format(first_arg_clean) |
| second_arg_dim3 = "dim3({})".format(second_arg_clean) |
| |
| first_arg_raw_dim3 = first_arg_raw.replace(first_arg_clean, first_arg_dim3) |
| second_arg_raw_dim3 = second_arg_raw.replace(second_arg_clean, second_arg_dim3) |
| cuda_kernel = cuda_kernel.replace(first_arg_raw + second_arg_raw, first_arg_raw_dim3 + second_arg_raw_dim3) |
| return cuda_kernel |
| |
| |
| RE_KERNEL_LAUNCH = re.compile(r'([ ]+)(detail?)::[ ]+\\\n[ ]+') |
| |
| |
| def processKernelLaunches(string, stats): |
| """ Replace the CUDA style Kernel launches with the HIP style kernel launches.""" |
| # Concat the namespace with the kernel names. (Find cleaner way of doing this later). |
| string = RE_KERNEL_LAUNCH.sub(lambda inp: "{0}{1}::".format(inp.group(1), inp.group(2)), string) |
| |
| def grab_method_and_template(in_kernel): |
| # The positions for relevant kernel components. |
| pos = { |
| "kernel_launch": {"start": in_kernel["start"], "end": in_kernel["end"]}, |
| "kernel_name": {"start": -1, "end": -1}, |
| "template": {"start": -1, "end": -1} |
| } |
| |
| # Count for balancing template |
| count = {"<>": 0} |
| |
| # Status for whether we are parsing a certain item. |
| START = 0 |
| AT_TEMPLATE = 1 |
| AFTER_TEMPLATE = 2 |
| AT_KERNEL_NAME = 3 |
| |
| status = START |
| |
| # Parse the string character by character |
| for i in range(pos["kernel_launch"]["start"] - 1, -1, -1): |
| char = string[i] |
| |
| # Handle Templating Arguments |
| if status == START or status == AT_TEMPLATE: |
| if char == ">": |
| if status == START: |
| status = AT_TEMPLATE |
| pos["template"]["end"] = i |
| count["<>"] += 1 |
| |
| if char == "<": |
| count["<>"] -= 1 |
| if count["<>"] == 0 and (status == AT_TEMPLATE): |
| pos["template"]["start"] = i |
| status = AFTER_TEMPLATE |
| |
| # Handle Kernel Name |
| if status != AT_TEMPLATE: |
| if string[i].isalnum() or string[i] in {'(', ')', '_', ':', '#'}: |
| if status != AT_KERNEL_NAME: |
| status = AT_KERNEL_NAME |
| pos["kernel_name"]["end"] = i |
| |
| # Case: Kernel name starts the string. |
| if i == 0: |
| pos["kernel_name"]["start"] = 0 |
| |
| # Finished |
| return [(pos["kernel_name"]), (pos["template"]), (pos["kernel_launch"])] |
| |
| else: |
| # Potential ending point if we're already traversing a kernel's name. |
| if status == AT_KERNEL_NAME: |
| pos["kernel_name"]["start"] = i |
| |
| # Finished |
| return [(pos["kernel_name"]), (pos["template"]), (pos["kernel_launch"])] |
| |
| def find_kernel_bounds(string): |
| """Finds the starting and ending points for all kernel launches in the string.""" |
| kernel_end = 0 |
| kernel_positions = [] |
| |
| # Continue until we cannot find any more kernels anymore. |
| while string.find("<<<", kernel_end) != -1: |
| # Get kernel starting position (starting from the previous ending point) |
| kernel_start = string.find("<<<", kernel_end) |
| |
| # Get kernel ending position (adjust end point past the >>>) |
| kernel_end = string.find(">>>", kernel_start) + 3 |
| if kernel_end <= 0: |
| raise InputError("no kernel end found") |
| |
| # Add to list of traversed kernels |
| kernel_positions.append({"start": kernel_start, "end": kernel_end, |
| "group": string[kernel_start: kernel_end]}) |
| |
| return kernel_positions |
| |
| # Grab positional ranges of all kernel launchces |
| get_kernel_positions = [k for k in find_kernel_bounds(string)] |
| output_string = string |
| |
| # Replace each CUDA kernel with a HIP kernel. |
| for kernel in get_kernel_positions: |
| # Get kernel components |
| params = grab_method_and_template(kernel) |
| |
| # Find parenthesis after kernel launch |
| parenthesis = string.find("(", kernel["end"]) |
| |
| # Extract cuda kernel |
| cuda_kernel = string[params[0]["start"]:parenthesis + 1] |
| kernel_string = string[kernel['start']:kernel['end']] |
| cuda_kernel_dim3 = add_dim3(kernel_string, cuda_kernel) |
| # Keep number of kernel launch params consistent (grid dims, group dims, stream, dynamic shared size) |
| num_klp = len(extract_arguments(0, kernel["group"].replace("<<<", "(").replace(">>>", ")"))) |
| |
| hip_kernel = "hipLaunchKernelGGL(" + cuda_kernel_dim3[0:-1].replace( |
| ">>>", ", 0" * (4 - num_klp) + ">>>").replace("<<<", ", ").replace(">>>", ", ") |
| |
| # Replace cuda kernel with hip kernel |
| output_string = output_string.replace(cuda_kernel, hip_kernel) |
| |
| # Update the statistics |
| stats["kernel_launches"].append(hip_kernel) |
| |
| return output_string |
| |
| |
| def find_closure_group(input_string, start, group): |
| """Generalization for finding a balancing closure group |
| |
| if group = ["(", ")"], then finds the first balanced parantheses. |
| if group = ["{", "}"], then finds the first balanced bracket. |
| |
| Given an input string, a starting position in the input string, and the group type, |
| find_closure_group returns the positions of group[0] and group[1] as a tuple. |
| |
| Example: |
| find_closure_group("(hi)", 0, ["(", ")"]) |
| |
| Returns: |
| 0, 3 |
| """ |
| |
| inside_parenthesis = False |
| parens = 0 |
| pos = start |
| p_start, p_end = -1, -1 |
| |
| while pos < len(input_string): |
| if input_string[pos] == group[0]: |
| if inside_parenthesis is False: |
| inside_parenthesis = True |
| parens = 1 |
| p_start = pos |
| else: |
| parens += 1 |
| elif input_string[pos] == group[1] and inside_parenthesis: |
| parens -= 1 |
| |
| if parens == 0: |
| p_end = pos |
| return p_start, p_end |
| |
| pos += 1 |
| return None, None |
| |
| |
| def find_bracket_group(input_string, start): |
| """Finds the first balanced parantheses.""" |
| return find_closure_group(input_string, start, group=["{", "}"]) |
| |
| |
| def find_parentheses_group(input_string, start): |
| """Finds the first balanced bracket.""" |
| return find_closure_group(input_string, start, group=["(", ")"]) |
| |
| |
| RE_ASSERT = re.compile(r"\bassert[ ]*\(") |
| |
| |
| def replace_math_functions(input_string): |
| """FIXME: Temporarily replace std:: invocations of math functions |
| with non-std:: versions to prevent linker errors NOTE: This |
| can lead to correctness issues when running tests, since the |
| correct version of the math function (exp/expf) might not get |
| called. Plan is to remove this function once HIP supports |
| std:: math function calls inside device code |
| |
| """ |
| output_string = input_string |
| for func in MATH_TRANSPILATIONS: |
| output_string = output_string.replace(r'{}('.format(func), '{}('.format(MATH_TRANSPILATIONS[func])) |
| |
| return output_string |
| |
| |
| RE_SYNCTHREADS = re.compile(r"[:]?[:]?\b(__syncthreads)\b(\w*\()") |
| |
| |
| def hip_header_magic(input_string): |
| """If the file makes kernel builtin calls and does not include the cuda_runtime.h header, |
| then automatically add an #include to match the "magic" includes provided by NVCC. |
| TODO: |
| Update logic to ignore cases where the cuda_runtime.h is included by another file. |
| """ |
| |
| # Copy the input. |
| output_string = input_string |
| |
| # Check if one of the following headers is already included. |
| headers = ["hip/hip_runtime.h", "hip/hip_runtime_api.h"] |
| if any(re.search(r'#include ("{0}"|<{0}>)'.format(ext), output_string) for ext in headers): |
| return output_string |
| |
| # Rough logic to detect if we're inside device code |
| hasDeviceLogic = "hipLaunchKernelGGL" in output_string |
| hasDeviceLogic += "__global__" in output_string |
| hasDeviceLogic += "__shared__" in output_string |
| hasDeviceLogic += RE_SYNCTHREADS.search(output_string) is not None |
| |
| # If device logic found, provide the necessary header. |
| if hasDeviceLogic: |
| output_string = '#include "hip/hip_runtime.h"\n' + input_string |
| |
| return output_string |
| |
| |
| RE_EXTERN_SHARED = re.compile(r"extern\s+([\w\(\)]+)?\s*__shared__\s+([\w:<>\s]+)\s+(\w+)\s*\[\s*\]\s*;") |
| |
| |
| def replace_extern_shared(input_string): |
| """Match extern __shared__ type foo[]; syntax and use HIP_DYNAMIC_SHARED() MACRO instead. |
| https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_kernel_language.md#__shared__ |
| Example: |
| "extern __shared__ char smemChar[];" => "HIP_DYNAMIC_SHARED( char, smemChar)" |
| "extern __shared__ unsigned char smem[];" => "HIP_DYNAMIC_SHARED( unsigned char, my_smem)" |
| """ |
| output_string = input_string |
| output_string = RE_EXTERN_SHARED.sub( |
| lambda inp: "HIP_DYNAMIC_SHARED({0} {1}, {2})".format( |
| inp.group(1) or "", inp.group(2), inp.group(3)), output_string) |
| |
| return output_string |
| |
| |
| def get_hip_file_path(filepath): |
| """ |
| Returns the new name of the hipified file |
| """ |
| # At the moment, some files are HIPified in place. The predicate |
| # is_out_of_place tells us if this is the case or not. |
| if not is_out_of_place(filepath): |
| return filepath |
| |
| dirpath, filename = os.path.split(filepath) |
| root, ext = os.path.splitext(filename) |
| |
| # Here's the plan: |
| # |
| # In general, we need to disambiguate the HIPified filename so that |
| # it gets a different name from the original Caffe2 filename, so |
| # that we don't overwrite the original file. (Additionally, |
| # hcc historically had a bug where if you had two files with |
| # the same basename, they would clobber each other.) |
| # |
| # There's a lot of different naming conventions across PyTorch |
| # and Caffe2, but the general recipe is to convert occurrences |
| # of cuda/gpu to hip, and add hip if there are no occurrences |
| # of cuda/gpu anywhere. |
| # |
| # Concretely, we do the following: |
| # |
| # - If there is a directory component named "cuda", replace |
| # it with "hip", AND |
| # |
| # - If the file name contains "CUDA", replace it with "HIP", AND |
| # |
| # If NONE of the above occurred, then insert "hip" in the file path |
| # as the direct parent folder of the file |
| # |
| # Furthermore, ALWAYS replace '.cu' with '.hip', because those files |
| # contain CUDA kernels that needs to be hipified and processed with |
| # hcc compiler |
| # |
| # This isn't set in stone; we might adjust this to support other |
| # naming conventions. |
| |
| if ext == '.cu': |
| ext = '.hip' |
| |
| orig_dirpath = dirpath |
| |
| dirpath = dirpath.replace('cuda', 'hip') |
| dirpath = dirpath.replace('THC', 'THH') |
| |
| root = root.replace('cuda', 'hip') |
| root = root.replace('CUDA', 'HIP') |
| # Special case to handle caffe2/core/THCCachingAllocator |
| if dirpath != "caffe2/core": |
| root = root.replace('THC', 'THH') |
| |
| if dirpath == orig_dirpath: |
| dirpath = os.path.join(dirpath, 'hip') |
| |
| return os.path.join(dirpath, root + ext) |
| |
| |
| def is_out_of_place(filepath): |
| if filepath.startswith("torch/"): |
| return False |
| if filepath.startswith("tools/autograd/templates/"): |
| return False |
| return True |
| |
| |
| # Keep this synchronized with includes/ignores in build_amd.py |
| def is_pytorch_file(filepath): |
| if filepath.startswith("aten/"): |
| if filepath.startswith("aten/src/ATen/core/"): |
| return False |
| return True |
| if filepath.startswith("torch/"): |
| return True |
| if filepath.startswith("tools/autograd/templates/"): |
| return True |
| return False |
| |
| |
| def is_caffe2_gpu_file(filepath): |
| if filepath.startswith("c10/cuda"): |
| return True |
| filename = os.path.basename(filepath) |
| _, ext = os.path.splitext(filename) |
| return ('gpu' in filename or ext in ['.cu', '.cuh']) and ('cudnn' not in filename) |
| |
| |
| # Cribbed from https://stackoverflow.com/questions/42742810/speed-up-millions-of-regex-replacements-in-python-3/42789508#42789508 |
| class Trie(): |
| """Regex::Trie in Python. Creates a Trie out of a list of words. The trie can be exported to a Regex pattern. |
| The corresponding Regex should match much faster than a simple Regex union.""" |
| |
| def __init__(self): |
| self.data = {} |
| |
| def add(self, word): |
| ref = self.data |
| for char in word: |
| ref[char] = char in ref and ref[char] or {} |
| ref = ref[char] |
| ref[''] = 1 |
| |
| def dump(self): |
| return self.data |
| |
| def quote(self, char): |
| return re.escape(char) |
| |
| def _pattern(self, pData): |
| data = pData |
| if "" in data and len(data.keys()) == 1: |
| return None |
| |
| alt = [] |
| cc = [] |
| q = 0 |
| for char in sorted(data.keys()): |
| if isinstance(data[char], dict): |
| try: |
| recurse = self._pattern(data[char]) |
| alt.append(self.quote(char) + recurse) |
| except Exception: |
| cc.append(self.quote(char)) |
| else: |
| q = 1 |
| cconly = not len(alt) > 0 |
| |
| if len(cc) > 0: |
| if len(cc) == 1: |
| alt.append(cc[0]) |
| else: |
| alt.append('[' + ''.join(cc) + ']') |
| |
| if len(alt) == 1: |
| result = alt[0] |
| else: |
| result = "(?:" + "|".join(alt) + ")" |
| |
| if q: |
| if cconly: |
| result += "?" |
| else: |
| result = "(?:%s)?" % result |
| return result |
| |
| def pattern(self): |
| return self._pattern(self.dump()) |
| |
| |
| CAFFE2_TRIE = Trie() |
| CAFFE2_MAP = {} |
| PYTORCH_TRIE = Trie() |
| PYTORCH_MAP = {} |
| for mapping in CUDA_TO_HIP_MAPPINGS: |
| for src, value in mapping.items(): |
| dst = value[0] |
| meta_data = value[1:] |
| if constants.API_CAFFE2 not in meta_data: |
| PYTORCH_TRIE.add(src) |
| PYTORCH_MAP[src] = dst |
| if constants.API_PYTORCH not in meta_data: |
| CAFFE2_TRIE.add(src) |
| CAFFE2_MAP[src] = dst |
| RE_CAFFE2_PREPROCESSOR = re.compile(CAFFE2_TRIE.pattern()) |
| RE_PYTORCH_PREPROCESSOR = re.compile(r'(?<=\W)({0})(?=\W)'.format(PYTORCH_TRIE.pattern())) |
| |
| RE_QUOTE_HEADER = re.compile(r'#include "([^"]+)"') |
| RE_ANGLE_HEADER = re.compile(r'#include <([^>]+)>') |
| RE_THC_GENERIC_FILE = re.compile(r'#define THC_GENERIC_FILE "([^"]+)"') |
| RE_CU_SUFFIX = re.compile(r'\.cu\b') # be careful not to pick up .cuh |
| |
| def preprocessor(output_directory, filepath, stats, hip_clang_launch): |
| """ Executes the CUDA -> HIP conversion on the specified file. """ |
| fin_path = os.path.join(output_directory, filepath) |
| with open(fin_path, 'r') as fin: |
| output_source = fin.read() |
| |
| fout_path = os.path.join(output_directory, get_hip_file_path(filepath)) |
| if not os.path.exists(os.path.dirname(fout_path)): |
| os.makedirs(os.path.dirname(fout_path)) |
| |
| # unsupported_calls statistics reporting is broken atm |
| if is_pytorch_file(filepath): |
| def pt_repl(m): |
| return PYTORCH_MAP[m.group(0)] |
| output_source = RE_PYTORCH_PREPROCESSOR.sub(pt_repl, output_source) |
| else: |
| def c2_repl(m): |
| return CAFFE2_MAP[m.group(0)] |
| output_source = RE_CAFFE2_PREPROCESSOR.sub(c2_repl, output_source) |
| |
| # Header rewrites |
| def mk_repl(templ): |
| def repl(m): |
| f = m.group(1) |
| if ( |
| f.startswith("ATen/cuda") |
| or f.startswith("ATen/native/cuda") |
| or f.startswith("ATen/native/quantized/cuda") |
| or f.startswith("ATen/native/sparse/cuda") |
| or f.startswith("THC/") |
| or f.startswith("THCUNN/") |
| or (f.startswith("THC") and not f.startswith("THCP")) |
| ): |
| return templ.format(get_hip_file_path(m.group(1))) |
| return m.group(0) |
| return repl |
| output_source = RE_QUOTE_HEADER.sub(mk_repl('#include "{0}"'), output_source) |
| output_source = RE_ANGLE_HEADER.sub(mk_repl('#include <{0}>'), output_source) |
| output_source = RE_THC_GENERIC_FILE.sub(mk_repl('#define THC_GENERIC_FILE "{0}"'), output_source) |
| |
| # CMakeLists.txt rewrites |
| if filepath.endswith('CMakeLists.txt'): |
| output_source = output_source.replace('CUDA', 'HIP') |
| output_source = output_source.replace('THC', 'THH') |
| output_source = RE_CU_SUFFIX.sub('.hip', output_source) |
| |
| # Perform Kernel Launch Replacements |
| if not hip_clang_launch: |
| output_source = processKernelLaunches(output_source, stats) |
| |
| # Replace std:: with non-std:: versions |
| if filepath.endswith(".cu") or filepath.endswith(".cuh"): |
| output_source = replace_math_functions(output_source) |
| |
| # Include header if device code is contained. |
| output_source = hip_header_magic(output_source) |
| |
| # Replace the extern __shared__ |
| output_source = replace_extern_shared(output_source) |
| |
| do_write = True |
| if os.path.exists(fout_path): |
| with open(fout_path, 'r') as fout_old: |
| do_write = fout_old.read() != output_source |
| if do_write: |
| with open(fout_path, 'w') as fout: |
| fout.write(output_source) |
| return "ok" |
| else: |
| return "skipped" |
| |
| def file_specific_replacement(filepath, search_string, replace_string, strict=False): |
| with openf(filepath, "r+") as f: |
| contents = f.read() |
| if strict: |
| contents = re.sub(r'\b({0})\b'.format(re.escape(search_string)), lambda x: replace_string, contents) |
| else: |
| contents = contents.replace(search_string, replace_string) |
| f.seek(0) |
| f.write(contents) |
| f.truncate() |
| |
| |
| def file_add_header(filepath, header): |
| with openf(filepath, "r+") as f: |
| contents = f.read() |
| if header[0] != "<" and header[-1] != ">": |
| header = '"{0}"'.format(header) |
| contents = ('#include {0} \n'.format(header)) + contents |
| f.seek(0) |
| f.write(contents) |
| f.truncate() |
| |
| |
| def fix_static_global_kernels(in_txt): |
| """Static global kernels in HIP results in a compilation error.""" |
| in_txt = in_txt.replace(" __global__ static", "__global__") |
| return in_txt |
| |
| |
| RE_INCLUDE = re.compile(r"#include .*\n") |
| |
| |
| def extract_arguments(start, string): |
| """ Return the list of arguments in the upcoming function parameter closure. |
| Example: |
| string (input): '(blocks, threads, 0, THCState_getCurrentStream(state))' |
| arguments (output): |
| '[{'start': 1, 'end': 7}, |
| {'start': 8, 'end': 16}, |
| {'start': 17, 'end': 19}, |
| {'start': 20, 'end': 53}]' |
| """ |
| |
| arguments = [] |
| closures = { |
| "<": 0, |
| "(": 0 |
| } |
| current_position = start |
| argument_start_pos = current_position + 1 |
| |
| # Search for final parenthesis |
| while current_position < len(string): |
| if string[current_position] == "(": |
| closures["("] += 1 |
| elif string[current_position] == ")": |
| closures["("] -= 1 |
| elif string[current_position] == "<": |
| closures["<"] += 1 |
| elif string[current_position] == ">" and string[current_position - 1] != "-" and closures["<"] > 0: |
| closures["<"] -= 1 |
| |
| # Finished all arguments |
| if closures["("] == 0 and closures["<"] == 0: |
| # Add final argument |
| arguments.append({"start": argument_start_pos, "end": current_position}) |
| break |
| |
| # Finished current argument |
| if closures["("] == 1 and closures["<"] == 0 and string[current_position] == ",": |
| arguments.append({"start": argument_start_pos, "end": current_position}) |
| argument_start_pos = current_position + 1 |
| |
| current_position += 1 |
| |
| return arguments |
| |
| |
| def str2bool(v): |
| """ArgumentParser doesn't support type=bool. Thus, this helper method will convert |
| from possible string types to True / False.""" |
| if v.lower() in ('yes', 'true', 't', 'y', '1'): |
| return True |
| elif v.lower() in ('no', 'false', 'f', 'n', '0'): |
| return False |
| else: |
| raise argparse.ArgumentTypeError('Boolean value expected.') |
| |
| |
| def hipify( |
| project_directory, |
| show_detailed=False, |
| extensions=(".cu", ".cuh", ".c", ".cc", ".cpp", ".h", ".in", ".hpp"), |
| output_directory="", |
| includes=(), |
| out_of_place_only=False, |
| ignores=(), |
| show_progress=True, |
| hip_clang_launch=False, |
| ): |
| if project_directory == "": |
| project_directory = os.getcwd() |
| |
| # Verify the project directory exists. |
| if not os.path.exists(project_directory): |
| print("The project folder specified does not exist.") |
| sys.exit(1) |
| |
| # If no output directory, provide a default one. |
| if not output_directory: |
| project_directory.rstrip("/") |
| output_directory = project_directory + "_amd" |
| |
| # Copy from project directory to output directory if not done already. |
| if not os.path.exists(output_directory): |
| shutil.copytree(project_directory, output_directory) |
| |
| all_files = list(matched_files_iter(output_directory, includes=includes, |
| ignores=ignores, extensions=extensions, |
| out_of_place_only=out_of_place_only)) |
| |
| # Start Preprocessor |
| preprocess( |
| output_directory, |
| all_files, |
| show_detailed=show_detailed, |
| show_progress=show_progress, |
| hip_clang_launch=hip_clang_launch) |