feat(bzlmod): Cleaning up interpreter resolution (#1218)
This commit cleans up the use of "canonical resolution" of
the Python interpreter. When the extension toolchains run
it collects a list of the interpreters and then
uses the hub_repo rule to create a map of names and the
interpreter labels.
Next, we then use the interpreter_extension that, creates
reports that have symlinks pointing to the different interpreter
binaries.
The user can then pass in a label to the pip call for the
specific hermetic interpreter.
diff --git a/MODULE.bazel b/MODULE.bazel
index 92da402..e490beb 100644
--- a/MODULE.bazel
+++ b/MODULE.bazel
@@ -46,3 +46,6 @@
"pypi__coverage_cp39_x86_64-apple-darwin",
"pypi__coverage_cp39_x86_64-unknown-linux-gnu",
)
+
+python = use_extension("@rules_python//python:extensions.bzl", "python")
+use_repo(python, "pythons_hub")
diff --git a/examples/bzlmod_build_file_generation/MODULE.bazel b/examples/bzlmod_build_file_generation/MODULE.bazel
index 781b0cb..d59fbb3 100644
--- a/examples/bzlmod_build_file_generation/MODULE.bazel
+++ b/examples/bzlmod_build_file_generation/MODULE.bazel
@@ -48,9 +48,6 @@
# We also use the same name for python.host_python_interpreter.
PYTHON_NAME = "python3"
-# This is the name that is used for the host interpreter
-PYTHON_INTERPRETER = PYTHON_NAME + "_host_interpreter"
-
# We next initialize the python toolchain using the extension.
# You can set different Python versions in this block.
python.toolchain(
@@ -66,37 +63,46 @@
# into the scope of the current module.
# All of the python3 repositories use the PYTHON_NAME as there prefix. They
# are not catenated for ease of reading.
-use_repo(python, PYTHON_NAME)
-use_repo(python, "python3_toolchains")
-use_repo(python, PYTHON_INTERPRETER)
+use_repo(python, PYTHON_NAME, "python3_toolchains")
-# Register an already-defined toolchain so that Bazel can use it during toolchain resolution.
+# Register an already-defined toolchain so that Bazel can use it during
+# toolchain resolution.
register_toolchains(
"@python3_toolchains//:all",
)
-# Use the pip extension
-pip = use_extension("@rules_python//python:extensions.bzl", "pip")
+# The interpreter extension discovers the platform specific Python binary.
+# It creates a symlink to the binary, and we pass the label to the following
+# pip.parse call.
+interpreter = use_extension("@rules_python//python:interpreter_extension.bzl", "interpreter")
+interpreter.install(
+ name = "interpreter_python3",
+ python_name = PYTHON_NAME,
+)
+use_repo(interpreter, "interpreter_python3")
-# Use the extension to call the `pip_repository` rule that invokes `pip`, with `incremental` set.
-# Accepts a locked/compiled requirements file and installs the dependencies listed within.
+# Use the extension, pip.parse, to call the `pip_repository` rule that invokes
+# `pip`, with `incremental` set. The pip call accepts a locked/compiled
+# requirements file and installs the dependencies listed within.
# Those dependencies become available in a generated `requirements.bzl` file.
# You can instead check this `requirements.bzl` file into your repo.
# Because this project has different requirements for windows vs other
# operating systems, we have requirements for each.
+pip = use_extension("@rules_python//python:extensions.bzl", "pip")
pip.parse(
name = "pip",
# When using gazelle you must use set the following flag
# in order for the generation of gazelle dependency resolution.
incompatible_generate_aliases = True,
- # The interpreter attribute points to the interpreter to use for running
- # pip commands to download the packages in the requirements file.
+ # The interpreter_target attribute points to the interpreter to
+ # use for running pip commands to download the packages in the
+ # requirements file.
# As a best practice, we use the same interpreter as the toolchain
# that was configured above; this ensures the same Python version
# is used for both resolving dependencies and running tests/binaries.
# If this isn't specified, then you'll get whatever is locally installed
# on your system.
- python_interpreter_target = "@" + PYTHON_INTERPRETER + "//:python",
+ python_interpreter_target = "@interpreter_python3//:python",
requirements_lock = "//:requirements_lock.txt",
requirements_windows = "//:requirements_windows.txt",
)
diff --git a/python/extensions.bzl b/python/extensions.bzl
index 3bcbb50..ce11069 100644
--- a/python/extensions.bzl
+++ b/python/extensions.bzl
@@ -19,9 +19,10 @@
load("@rules_python//python/pip_install:repositories.bzl", "pip_install_dependencies")
load("@rules_python//python/pip_install:requirements_parser.bzl", parse_requirements = "parse")
load("@rules_python//python/private:coverage_deps.bzl", "install_coverage_deps")
-load("@rules_python//python/private:toolchains_repo.bzl", "get_host_os_arch", "get_host_platform")
+load("@rules_python//python/private:interpreter_hub.bzl", "hub_repo")
def _python_impl(module_ctx):
+ toolchains = []
for mod in module_ctx.modules:
for toolchain_attr in mod.tags.toolchain:
python_register_toolchains(
@@ -33,11 +34,16 @@
register_coverage_tool = toolchain_attr.configure_coverage_tool,
ignore_root_user_error = toolchain_attr.ignore_root_user_error,
)
- host_hub_name = toolchain_attr.name + "_host_interpreter"
- _host_hub(
- name = host_hub_name,
- user_repo_prefix = toolchain_attr.name,
- )
+
+ # We collect all of the toolchain names to create
+ # the INTERPRETER_LABELS map. This is used
+ # by interpreter_extensions.bzl
+ toolchains.append(toolchain_attr.name)
+
+ hub_repo(
+ name = "pythons_hub",
+ toolchains = toolchains,
+ )
python = module_extension(
implementation = _python_impl,
@@ -133,89 +139,3 @@
"parse": tag_class(attrs = _pip_parse_ext_attrs()),
},
)
-
-# This function allows us to build the label name of a label
-# that is not passed into the current context.
-# The module_label is the key element that is passed in.
-# This value provides the root location of the labels
-# See https://bazel.build/external/extension#repository_names_and_visibility
-def _repo_mapped_label(module_label, extension_name, apparent):
- """Construct a canonical repo label accounting for repo mapping.
-
- Args:
- module_label: Label object of the module hosting the extension; see
- "_module" implicit attribute.
- extension_name: str, name of the extension that created the repo in `apparent`.
- apparent: str, a repo-qualified target string, but without the "@". e.g.
- "python38_x86_linux//:python". The repo name should use the apparent
- name used by the extension named by `ext_name` (i.e. the value of the
- `name` arg the extension passes to repository rules)
- """
- return Label("@@{module}~{extension_name}~{apparent}".format(
- module = module_label.workspace_name,
- extension_name = extension_name,
- apparent = apparent,
- ))
-
-# We are doing some bazel stuff here that could use an explanation.
-# The basis of this function is that we need to create a symlink to
-# the python binary that exists in a different repo that we know is
-# setup by rules_python.
-#
-# We are building a Label like
-# @@rules_python~override~python~python3_x86_64-unknown-linux-gnu//:python
-# and then the function creates a symlink named python to that Label.
-# The tricky part is the "~override~" part can't be known in advance
-# and will change depending on how and what version of rules_python
-# is used. To figure that part out, an implicit attribute is used to
-# resolve the module's current name (see "_module" attribute)
-#
-# We are building the Label name dynamically, and can do this even
-# though the Label is not passed into this function. If we choose
-# not do this a user would have to write another 16 lines
-# of configuration code, but we are able to save them that work
-# because we know how rules_python works internally. We are using
-# functions from private:toolchains_repo.bzl which is where the repo
-# is being built. The repo name differs between host OS and platforms
-# and the functions from toolchains_repo gives us this functions that
-# information.
-def _host_hub_impl(repo_ctx):
- # Intentionally empty; this is only intended to be used by repository
- # rules, which don't process build file contents.
- repo_ctx.file("BUILD.bazel", "")
-
- # The two get_ functions we use are also utilized when building
- # the repositories for the different interpreters.
- (os, arch) = get_host_os_arch(repo_ctx)
- host_platform = "{}_{}//:python".format(
- repo_ctx.attr.user_repo_prefix,
- get_host_platform(os, arch),
- )
-
- # the attribute is set to attr.label(default = "//:_"), which
- # provides us the resolved, canonical, prefix for the module's repos.
- # The extension_name "python" is determined by the
- # name bound to the module_extension() call.
- # We then have the OS and platform specific name of the python
- # interpreter.
- label = _repo_mapped_label(repo_ctx.attr._module, "python", host_platform)
-
- # create the symlink in order to set the interpreter for pip.
- repo_ctx.symlink(label, "python")
-
-# We use this rule to set the pip interpreter target when using different operating
-# systems with the same project
-_host_hub = repository_rule(
- implementation = _host_hub_impl,
- local = True,
- attrs = {
- "user_repo_prefix": attr.string(
- mandatory = True,
- doc = """\
-The prefix to create the repository name. Usually the name you used when you created the
-Python toolchain.
-""",
- ),
- "_module": attr.label(default = "//:_"),
- },
-)
diff --git a/python/interpreter_extension.bzl b/python/interpreter_extension.bzl
new file mode 100644
index 0000000..b9afe1a
--- /dev/null
+++ b/python/interpreter_extension.bzl
@@ -0,0 +1,75 @@
+# Copyright 2023 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"Module extension that finds the current toolchain Python binary and creates a symlink to it."
+
+load("@pythons_hub//:interpreters.bzl", "INTERPRETER_LABELS")
+
+def _interpreter_impl(mctx):
+ for mod in mctx.modules:
+ for install_attr in mod.tags.install:
+ _interpreter_repo(
+ name = install_attr.name,
+ python_name = install_attr.python_name,
+ )
+
+interpreter = module_extension(
+ doc = """\
+This extension is used to expose the underlying platform-specific
+interpreter registered as a toolchain. It is used by users to get
+a label to the interpreter for use with pip.parse
+in the MODULES.bazel file.
+""",
+ implementation = _interpreter_impl,
+ tag_classes = {
+ "install": tag_class(
+ attrs = {
+ "name": attr.string(
+ doc = "Name of the interpreter, we use this name to set the interpreter for pip.parse",
+ mandatory = True,
+ ),
+ "python_name": attr.string(
+ doc = "The name set in the previous python.toolchain call.",
+ mandatory = True,
+ ),
+ },
+ ),
+ },
+)
+
+def _interpreter_repo_impl(rctx):
+ rctx.file("BUILD.bazel", "")
+
+ actual_interpreter_label = INTERPRETER_LABELS.get(rctx.attr.python_name)
+ if actual_interpreter_label == None:
+ fail("Unable to find interpreter with name {}".format(rctx.attr.python_name))
+
+ rctx.symlink(actual_interpreter_label, "python")
+
+_interpreter_repo = repository_rule(
+ doc = """\
+Load the INTERPRETER_LABELS map. This map contain of all of the Python binaries
+by name and a label the points to the interpreter binary. The
+binaries are downloaded as part of the python toolchain setup.
+The rule finds the label and creates a symlink named "python" to that
+label. This symlink is then used by pip.
+""",
+ implementation = _interpreter_repo_impl,
+ attrs = {
+ "python_name": attr.string(
+ mandatory = True,
+ doc = "Name of the Python toolchain",
+ ),
+ },
+)
diff --git a/python/private/interpreter_hub.bzl b/python/private/interpreter_hub.bzl
new file mode 100644
index 0000000..f1ca670
--- /dev/null
+++ b/python/private/interpreter_hub.bzl
@@ -0,0 +1,58 @@
+# Copyright 2023 The Bazel Authors. All rights reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"Repo rule used by bzlmod extension to create a repo that has a map of Python interpreters and their labels"
+
+load("//python:versions.bzl", "WINDOWS_NAME")
+load("//python/private:toolchains_repo.bzl", "get_host_os_arch", "get_host_platform")
+
+_build_file_for_hub_template = """
+INTERPRETER_LABELS = {{
+{lines}
+}}
+"""
+
+_line_for_hub_template = """\
+ "{name}": Label("@{name}_{platform}//:{path}"),
+"""
+
+def _hub_repo_impl(rctx):
+ (os, arch) = get_host_os_arch(rctx)
+ platform = get_host_platform(os, arch)
+
+ rctx.file("BUILD.bazel", "")
+ is_windows = (os == WINDOWS_NAME)
+ path = "python.exe" if is_windows else "bin/python3"
+
+ lines = "\n".join([_line_for_hub_template.format(
+ name = name,
+ platform = platform,
+ path = path,
+ ) for name in rctx.attr.toolchains])
+
+ rctx.file("interpreters.bzl", _build_file_for_hub_template.format(lines = lines))
+
+hub_repo = repository_rule(
+ doc = """\
+This private rule create a repo with a BUILD file that contains a map of interpreter names
+and the labels to said interpreters. This map is used to by the interpreter hub extension.
+""",
+ implementation = _hub_repo_impl,
+ attrs = {
+ "toolchains": attr.string_list(
+ doc = "List of the base names the toolchain repo defines.",
+ mandatory = True,
+ ),
+ },
+)