| # Copyright (C) 2018 The Android Open Source Project |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # |
| |
| """Utility for ICU4C code generation""" |
| |
| from __future__ import absolute_import |
| from __future__ import division |
| from __future__ import print_function |
| |
| import logging |
| import os |
| import site |
| import sys |
| import textwrap |
| from collections import deque |
| |
| import jinja2 |
| |
| THIS_DIR = os.path.dirname(os.path.realpath(__file__)) |
| ANDROID_TOP = os.path.realpath(os.path.join(THIS_DIR, '../../../..')) |
| |
| JINJA_ENV = jinja2.Environment(loader=jinja2.FileSystemLoader( |
| os.path.join(THIS_DIR, 'jinja_templates'))) |
| JINJA_ENV.trim_blocks = True |
| JINJA_ENV.lstrip_blocks = True |
| |
| def generate_shim(functions, includes, suffix, template_file): |
| """Generates the library source file from the given functions.""" |
| data = { |
| 'functions': functions, |
| 'icu_headers': includes, |
| 'suffix': suffix, |
| } |
| return JINJA_ENV.get_template(template_file).render(data) |
| |
| def generate_symbol_txt(shim_functions, extra_function_names, template_file): |
| """Generates the symbol txt file from the given functions.""" |
| data = { |
| # Each shim_function is given a suffix. |
| 'shim_functions' : shim_functions, |
| # Each extra function name is included as given. |
| 'extra_function_names': extra_function_names, |
| } |
| return JINJA_ENV.get_template(template_file).render(data) |
| |
| def get_allowlisted_apis(allowlist_file): |
| """Return all allowlisted API in allowlist_file""" |
| allowlisted_apis = set() |
| with open(os.path.join(THIS_DIR, allowlist_file), 'r') as file: |
| for line in file: |
| line = line.strip() |
| if line and not line.startswith("#"): |
| allowlisted_apis.add(line) |
| return allowlisted_apis |
| |
| def android_path(*args): |
| """Returns the absolute path to a directory within the Android tree.""" |
| return os.path.join(ANDROID_TOP, *args) |
| |
| |
| def get_clang_path(): |
| """Find the latest clang version and return the full path""" |
| base_path = android_path('prebuilts/clang/host/linux-x86/') |
| files = [f for f in os.listdir(base_path) if f.startswith('clang-r')] |
| # TODO: Don't use sort() because it assumes the same number of digits in the version name |
| files.sort(reverse=True) |
| selected = files[0] |
| print("Using clang version %s" % selected) |
| path = os.path.join(base_path, selected) |
| return path |
| |
| |
| def get_clang_lib_path(clang_path): |
| """Return the libclang.so path""" |
| base_path = os.path.join(clang_path, 'lib64') |
| files = [f for f in os.listdir(base_path) if f.startswith('libclang.so.')] |
| return os.path.join(base_path, files[0]) |
| |
| |
| def get_clang_header_dir(clang_path): |
| """Return the path to clang header directory""" |
| base_path = os.path.join(clang_path, 'lib64/clang/') |
| files = os.listdir(base_path) |
| return os.path.join(base_path, files[0], 'include/') |
| |
| |
| CLANG_PATH = get_clang_path() |
| CLANG_LIB_PATH = get_clang_lib_path(CLANG_PATH) |
| CLANG_HEADER_PATH = get_clang_header_dir(CLANG_PATH) |
| |
| site.addsitedir(os.path.join(CLANG_PATH, 'lib64/python3/site-packages/')) |
| import clang.cindex # pylint: disable=import-error,wrong-import-position |
| |
| |
| class Function: |
| """A visible function found in an ICU header.""" |
| |
| def __init__(self, name, result_type, params, is_variadic, module): |
| self.name = name |
| self.result_type = result_type |
| self.params = params |
| self.is_variadic = is_variadic |
| self.va_list_insert_position = -1 |
| |
| # callee will be used in dlsym and may be identical to others for |
| # functions with variable argument lists. |
| self.callee = self.name |
| if self.is_variadic: |
| self.last_param = self.params[-1][1] |
| self.handle = 'handle_' + module |
| self.return_void = self.result_type == 'void' |
| |
| @property |
| def param_str(self): |
| """Returns a string usable as a parameter list in a function decl.""" |
| params = [] |
| for param_type, param_name in self.params: |
| if '[' in param_type: |
| # `int foo[42]` will be a param_type of `int [42]` and a |
| # param_name of `foo`. We need to put these back in the right |
| # order. |
| param_name += param_type[param_type.find('['):] |
| param_type = param_type[:param_type.find('[')] |
| params.append('{} {}'.format(param_type, param_name)) |
| if self.is_variadic: |
| params.append('...') |
| return ', '.join(params) |
| |
| @property |
| def arg_str(self): |
| """Returns a string usable as an argument list in a function call.""" |
| args = [] |
| for _, param_name in self.params: |
| args.append(param_name) |
| if self.is_variadic: |
| if self.va_list_insert_position >= 0: |
| args.insert(self.va_list_insert_position, 'args') |
| else: |
| raise ValueError(textwrap.dedent("""\ |
| {}({}) is variadic, but has no valid \ |
| inserted position""".format( |
| self.name, |
| self.param_str))) |
| return ', '.join(args) |
| |
| def set_variadic_callee(self, callee, inserted_position): |
| """Set variadic callee with callee name and inserted position""" |
| if self.is_variadic: |
| self.callee = callee |
| self.va_list_insert_position = inserted_position |
| |
| |
| def logger(): |
| """Returns the module level logger.""" |
| return logging.getLogger(__name__) |
| |
| |
| class DeclaredFunctionsParser: |
| """Parser to get declared functions from ICU4C headers. """ |
| |
| def __init__(self, decl_filters, allowlisted_decl_filter): |
| """ |
| Args: |
| decl_filters: A list of filters for declared functions. |
| allowlisted_decl_filter: A list of allowlisting filters for declared functions. |
| If the function is allowlisted here, the function will not filtered by the filter added |
| in decl_filters |
| """ |
| self.decl_filters = decl_filters |
| self.allowlisted_decl_filters = allowlisted_decl_filter |
| self.va_functions_mapping = {} |
| self.ignored_include_dependency = {} |
| |
| # properties to store the parsing result |
| self.all_headers = [] |
| self.all_header_paths_to_copy = set() |
| self.all_declared_functions = [] |
| self.seen_functions = set() |
| self.all_header_to_function_names = {} |
| |
| # Configures libclang to load in our environment |
| # Set up LD_LIBRARY_PATH to include libclang.so, libLLVM.so, etc. Note |
| # that setting LD_LIBRARY_PATH with os.putenv() sometimes doesn't help. |
| clang.cindex.Config.set_library_file(CLANG_LIB_PATH) |
| |
| def set_va_functions_mapping(self, mapping): |
| """Set mapping from a variable argument function to an implementation. |
| |
| Functions w/ variable argument lists (...) need special care to call |
| their corresponding v- versions that accept a va_list argument. Note that |
| although '...' will always appear as the last parameter, its v- version |
| may put the va_list arg in a different place. Hence we provide an index |
| to indicate the position. |
| e.g. 'umsg_format': ('umsg_vformat', 3) means in the wrapper function of |
| 'umsg_format', it will call 'umsg_vformat' instead, with the va_list arg |
| inserted as the 3rd argument.""" |
| self.va_functions_mapping = mapping |
| |
| def set_ignored_include_dependency(self, mapping): |
| """ |
| A sample mapping is { "ulocdata.h" : [ "uloc.h", "ures.h" ] }. |
| The include dependencies will explicitly be ignored when producing header_paths_to_copy. |
| """ |
| self.ignored_include_dependency = mapping |
| |
| @property |
| def header_includes(self): |
| """Return all headers declaring the functions returned in get_all_declared_functions. |
| |
| If all functions in the header are filtered, the header is not included in here.""" |
| return [DeclaredFunctionsParser.short_header_path(header) for header in self.all_headers] |
| |
| @property |
| def header_paths_to_copy(self): |
| """Return all headers needed to be copied""" |
| return self.all_header_paths_to_copy |
| |
| @property |
| def declared_functions(self): |
| """Return all declared functions after filtering""" |
| return self.all_declared_functions |
| |
| @property |
| def header_to_function_names(self): |
| """Return the mapping from the header file name to a list of function names in the file""" |
| return self.all_header_to_function_names |
| |
| @staticmethod |
| def get_cflags(): |
| """Returns the cflags that should be used for parsing.""" |
| clang_flags = [ |
| '-x', |
| 'c', |
| '-std=c99', |
| '-DU_DISABLE_RENAMING=1', |
| '-DU_SHOW_CPLUSPLUS_API=0', |
| '-DU_HIDE_DRAFT_API', |
| '-DU_HIDE_DEPRECATED_API', |
| '-DU_HIDE_INTERNAL_API', |
| '-DANDROID_LINK_SHARED_ICU4C', |
| ] |
| |
| include_dirs = [ |
| CLANG_HEADER_PATH, |
| android_path('bionic/libc/include'), |
| android_path('external/icu/android_icu4c/include'), |
| android_path('external/icu/icu4c/source/common'), |
| android_path('external/icu/icu4c/source/i18n'), |
| ] |
| |
| for include_dir in include_dirs: |
| clang_flags.append('-I' + include_dir) |
| return clang_flags |
| |
| @staticmethod |
| def get_all_cpp_headers(): |
| """Return all C++ header names in icu4c/source/test/hdrtst/cxxfiles.txt""" |
| cpp_headers = [] |
| with open(android_path('external/icu/tools/icu4c_srcgen/cxxfiles.txt'), 'r') as file: |
| for line in file: |
| line = line.strip() |
| if not line.startswith("#"): |
| cpp_headers.append(line) |
| return cpp_headers |
| |
| def parse(self): |
| """Parse the headers and collect the declared functions after filtering |
| and the headers containing the functions.""" |
| index = clang.cindex.Index.create() |
| |
| icu_modules = ( |
| 'common', |
| 'i18n', |
| ) |
| header_dependencies = {} |
| for module in icu_modules: |
| path = android_path(android_path('external/icu/icu4c/source', module, 'unicode')) |
| files = [os.path.join(path, f) |
| for f in os.listdir(path) if f.endswith('.h')] |
| |
| for file_path in files: |
| base_header_name = os.path.basename(file_path) |
| # Ignore C++ headers. |
| if base_header_name in DeclaredFunctionsParser.get_all_cpp_headers(): |
| continue |
| |
| tunit = index.parse(file_path, DeclaredFunctionsParser.get_cflags()) |
| DeclaredFunctionsParser.handle_diagnostics(tunit) |
| header_dependencies[file_path] = [file_inclusion.include.name for file_inclusion |
| in tunit.get_includes()] |
| visible_functions = self.get_visible_functions( |
| tunit.cursor, module, file_path) |
| self.all_header_to_function_names[base_header_name] = \ |
| [f.name for f in visible_functions] |
| for function in visible_functions: |
| self.seen_functions.add(function.name) |
| self.all_declared_functions.append(function) |
| if visible_functions: |
| self.all_headers.append(file_path) |
| |
| # Sort to produce an deterministic output |
| self.all_declared_functions = sorted(self.all_declared_functions, key=lambda f: f.name) |
| self.all_headers = sorted(self.all_headers) |
| |
| # Build the headers required for using your restricted API set, and put the set into |
| # all_header_files_to_copy. |
| # header_dependencies is a map from icu4c header file path to a list of included headers. |
| # The key must be a ICU4C header, but the value could contain non-ICU4C headers, e.g. |
| # { |
| # ".../icu4c/source/common/unicode/utype.h": [ |
| # ".../icu4c/source/common/unicode/uversion.h", |
| # ".../bionic/libc/include/ctype.h", |
| # ], |
| # ... |
| # } |
| file_queue = deque() |
| file_processed = set() |
| for header in self.all_headers: |
| file_queue.appendleft(header) |
| self.all_header_paths_to_copy.add(header) |
| while file_queue: |
| file = file_queue.pop() |
| file_basename = os.path.basename(file) |
| if file in file_processed: |
| continue |
| file_processed.add(file) |
| for header in header_dependencies[file]: |
| header_basename = os.path.basename(header) |
| # Skip this header if this dependency is explicitly ignored |
| if file_basename in self.ignored_include_dependency and \ |
| header_basename in self.ignored_include_dependency[file_basename]: |
| continue |
| if header in header_dependencies: # Do not care non-icu4c headers |
| self.all_header_paths_to_copy.add(header) |
| file_queue.appendleft(header) |
| |
| @staticmethod |
| def handle_diagnostics(tunit): |
| """Prints compiler diagnostics to stdout. Exits if errors occurred.""" |
| errors = 0 |
| for diag in tunit.diagnostics: |
| if diag.severity == clang.cindex.Diagnostic.Fatal: |
| level = logging.CRITICAL |
| errors += 1 |
| elif diag.severity == clang.cindex.Diagnostic.Error: |
| level = logging.ERROR |
| errors += 1 |
| elif diag.severity == clang.cindex.Diagnostic.Warning: |
| level = logging.WARNING |
| elif diag.severity == clang.cindex.Diagnostic.Note: |
| level = logging.INFO |
| logger().log( |
| level, '%s:%s:%s %s', diag.location.file, diag.location.line, |
| diag.location.column, diag.spelling) |
| if errors: |
| sys.exit('Errors occurred during parsing. Exiting.') |
| |
| def get_visible_functions(self, cursor, module, file_name): |
| """Returns a list of all visible functions in a header file.""" |
| functions = [] |
| for child in cursor.get_children(): |
| if self.should_process_decl(child, file_name): |
| functions.append(self.from_cursor(child, module)) |
| return functions |
| |
| def should_process_decl(self, decl, file_name): |
| """Returns True if this function needs to be processed.""" |
| if decl.kind != clang.cindex.CursorKind.FUNCTION_DECL: |
| return False |
| if decl.location.file.name != file_name: |
| return False |
| if decl.spelling in self.seen_functions: |
| return False |
| if not DeclaredFunctionsParser.is_function_visible(decl): |
| return False |
| for allowlisted_decl_filter in self.allowlisted_decl_filters: |
| if allowlisted_decl_filter(decl): |
| return True |
| for decl_filter in self.decl_filters: |
| if not decl_filter(decl): |
| return False |
| return True |
| |
| @staticmethod |
| def is_function_visible(decl): |
| """Returns True if the function has default visibility.""" |
| visible = False |
| vis_attrs = DeclaredFunctionsParser.get_children_by_kind( |
| decl, clang.cindex.CursorKind.VISIBILITY_ATTR) |
| for child in vis_attrs: |
| visible = child.spelling == 'default' |
| return visible |
| |
| @staticmethod |
| def get_children_by_kind(cursor, kind): |
| """Returns a generator of cursor's children of a specific kind.""" |
| for child in cursor.get_children(): |
| if child.kind == kind: |
| yield child |
| |
| @staticmethod |
| def short_header_path(name): |
| """Trim the given file name to 'unicode/xyz.h'.""" |
| return name[name.rfind('unicode/'):] |
| |
| def from_cursor(self, cursor, module): |
| """Creates a Function object from the decl at the cursor.""" |
| if cursor.type.kind != clang.cindex.TypeKind.FUNCTIONPROTO: |
| raise ValueError(textwrap.dedent("""\ |
| {}'s type kind is {}, expected TypeKind.FUNCTIONPROTO. |
| {} Line {} Column {}""".format( |
| cursor.spelling, |
| cursor.type.kind, |
| cursor.location.file, |
| cursor.location.line, |
| cursor.location.column))) |
| |
| name = cursor.spelling |
| result_type = cursor.result_type.spelling |
| is_variadic = cursor.type.is_function_variadic() |
| params = [] |
| for arg in cursor.get_arguments(): |
| params.append((arg.type.spelling, arg.spelling)) |
| function = Function(name, result_type, params, is_variadic, module) |
| # For variadic function, set the callee and va_list position |
| if function.is_variadic and function.name in self.va_functions_mapping: |
| va_func = self.va_functions_mapping[function.name] |
| function.set_variadic_callee(va_func[0], va_func[1]) |
| return function |
| |
| |
| class StableDeclarationFilter: |
| """Return true if it's @stable API""" |
| def __call__(self, decl): |
| """Returns True if the given decl has a doxygen stable tag.""" |
| if not decl.raw_comment: |
| return False |
| if '@stable' in decl.raw_comment: |
| return True |
| return False |
| |
| |
| class AllowlistedDeclarationFilter: |
| """A filter for allowlisting function declarations.""" |
| def __init__(self, allowlisted_function_names): |
| self.allowlisted_function_names = allowlisted_function_names |
| |
| def __call__(self, decl): |
| """Returns True if the given decl is allowlisted""" |
| return decl.spelling in self.allowlisted_function_names |
| |
| |
| class BlocklistedlistedDeclarationFilter: |
| """A filter for blocklisting function declarations.""" |
| def __init__(self, blocklisted_function_names): |
| self.blocklisted_function_names = blocklisted_function_names |
| |
| def __call__(self, decl): |
| """Returns True if the given decl is nor blocklisted""" |
| return decl.spelling not in self.blocklisted_function_names |
| |
| |
| # Functions w/ variable argument lists (...) need special care to call |
| # their corresponding v- versions that accept a va_list argument. Note that |
| # although '...' will always appear as the last parameter, its v- version |
| # may put the va_list arg in a different place. Hence we provide an index |
| # to indicate the position. |
| # |
| # e.g. 'umsg_format': ('umsg_vformat', 3) means in the wrapper function of |
| # 'umsg_format', it will call 'umsg_vformat' instead, with the va_list arg |
| # inserted as the 3rd argument. |
| |
| # We need to insert the va_list (named args) at the position |
| # indicated by the KNOWN_VA_FUNCTIONS map. |
| KNOWN_VA_FUNCTIONS = { |
| 'u_formatMessage': ('u_vformatMessage', 5), |
| 'u_parseMessage': ('u_vparseMessage', 5), |
| 'u_formatMessageWithError': ('u_vformatMessageWithError', 6), |
| 'u_parseMessageWithError': ('u_vparseMessageWithError', 5), |
| 'umsg_format': ('umsg_vformat', 3), |
| 'umsg_parse': ('umsg_vparse', 4), |
| 'utrace_format': ('utrace_vformat', 4), |
| } |
| |
| # The following functions are not @stable |
| ALLOWLISTED_FUNCTION_NAMES = ( |
| # Not intended to be called directly, but are used by @stable macros. |
| 'utf8_nextCharSafeBody', |
| 'utf8_appendCharSafeBody', |
| 'utf8_prevCharSafeBody', |
| 'utf8_back1SafeBody', |
| ) |