|  | # Copyright 2023 The Bazel Authors. All rights reserved. | 
|  | # | 
|  | # Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | # you may not use this file except in compliance with the License. | 
|  | # You may obtain a copy of the License at | 
|  | # | 
|  | #     http://www.apache.org/licenses/LICENSE-2.0 | 
|  | # | 
|  | # Unless required by applicable law or agreed to in writing, software | 
|  | # distributed under the License is distributed on an "AS IS" BASIS, | 
|  | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | # See the License for the specific language governing permissions and | 
|  | # limitations under the License. | 
|  |  | 
|  | import argparse | 
|  | import json | 
|  | import pathlib | 
|  | import re | 
|  | import sys | 
|  | import zipfile | 
|  |  | 
|  |  | 
|  | # Generator is the modules_mapping.json file generator. | 
|  | class Generator: | 
|  | stderr = None | 
|  | output_file = None | 
|  | excluded_patterns = None | 
|  | mapping = {} | 
|  |  | 
|  | def __init__(self, stderr, output_file, excluded_patterns): | 
|  | self.stderr = stderr | 
|  | self.output_file = output_file | 
|  | self.excluded_patterns = [re.compile(pattern) for pattern in excluded_patterns] | 
|  |  | 
|  | # dig_wheel analyses the wheel .whl file determining the modules it provides | 
|  | # by looking at the directory structure. | 
|  | def dig_wheel(self, whl): | 
|  | with zipfile.ZipFile(whl, "r") as zip_file: | 
|  | for path in zip_file.namelist(): | 
|  | if is_metadata(path): | 
|  | if data_has_purelib_or_platlib(path): | 
|  | self.module_for_path(path, whl) | 
|  | else: | 
|  | continue | 
|  | else: | 
|  | self.module_for_path(path, whl) | 
|  |  | 
|  | def module_for_path(self, path, whl): | 
|  | ext = pathlib.Path(path).suffix | 
|  | if ext == ".py" or ext == ".so": | 
|  | if "purelib" in path or "platlib" in path: | 
|  | root = "/".join(path.split("/")[2:]) | 
|  | else: | 
|  | root = path | 
|  |  | 
|  | wheel_name = get_wheel_name(whl) | 
|  |  | 
|  | if root.endswith("/__init__.py"): | 
|  | # Note the '/' here means that the __init__.py is not in the | 
|  | # root of the wheel, therefore we can index the directory | 
|  | # where this file is as an importable package. | 
|  | module = root[: -len("/__init__.py")].replace("/", ".") | 
|  | if not self.is_excluded(module): | 
|  | self.mapping[module] = wheel_name | 
|  |  | 
|  | # Always index the module file. | 
|  | if ext == ".so": | 
|  | # Also remove extra metadata that is embeded as part of | 
|  | # the file name as an extra extension. | 
|  | ext = "".join(pathlib.Path(root).suffixes) | 
|  | module = root[: -len(ext)].replace("/", ".") | 
|  | if not self.is_excluded(module): | 
|  | self.mapping[module] = wheel_name | 
|  |  | 
|  | def is_excluded(self, module): | 
|  | for pattern in self.excluded_patterns: | 
|  | if pattern.search(module): | 
|  | return True | 
|  | return False | 
|  |  | 
|  | # run is the entrypoint for the generator. | 
|  | def run(self, wheels): | 
|  | for whl in wheels: | 
|  | try: | 
|  | self.dig_wheel(whl) | 
|  | except AssertionError as error: | 
|  | print(error, file=self.stderr) | 
|  | return 1 | 
|  | mapping_json = json.dumps(self.mapping) | 
|  | with open(self.output_file, "w") as f: | 
|  | f.write(mapping_json) | 
|  | return 0 | 
|  |  | 
|  |  | 
|  | def get_wheel_name(path): | 
|  | pp = pathlib.PurePath(path) | 
|  | if pp.suffix != ".whl": | 
|  | raise RuntimeError( | 
|  | "{} is not a valid wheel file name: the wheel doesn't follow ".format( | 
|  | pp.name | 
|  | ) | 
|  | + "https://www.python.org/dev/peps/pep-0427/#file-name-convention" | 
|  | ) | 
|  | return pp.name[: pp.name.find("-")] | 
|  |  | 
|  |  | 
|  | # is_metadata checks if the path is in a metadata directory. | 
|  | # Ref: https://www.python.org/dev/peps/pep-0427/#file-contents. | 
|  | def is_metadata(path): | 
|  | top_level = path.split("/")[0].lower() | 
|  | return top_level.endswith(".dist-info") or top_level.endswith(".data") | 
|  |  | 
|  |  | 
|  | # The .data is allowed to contain a full purelib or platlib directory | 
|  | # These get unpacked into site-packages, so require indexing too. | 
|  | # This is the same if "Root-Is-Purelib: true" is set and the files are at the root. | 
|  | # Ref: https://peps.python.org/pep-0427/#what-s-the-deal-with-purelib-vs-platlib | 
|  | def data_has_purelib_or_platlib(path): | 
|  | maybe_lib = path.split("/")[1].lower() | 
|  | return is_metadata(path) and (maybe_lib == "purelib" or maybe_lib == "platlib") | 
|  |  | 
|  |  | 
|  | if __name__ == "__main__": | 
|  | parser = argparse.ArgumentParser( | 
|  | prog="generator", | 
|  | description="Generates the modules mapping used by the Gazelle manifest.", | 
|  | ) | 
|  | parser.add_argument("--output_file", type=str) | 
|  | parser.add_argument("--exclude_patterns", nargs="+", default=[]) | 
|  | parser.add_argument("--wheels", nargs="+", default=[]) | 
|  | args = parser.parse_args() | 
|  | generator = Generator(sys.stderr, args.output_file, args.exclude_patterns) | 
|  | exit(generator.run(args.wheels)) |