feat(bzlmod): Cleaning up interpreter resolution (#1218)

This commit cleans up the use of "canonical resolution" of
  the Python interpreter. When the extension toolchains run
  it collects a list of the interpreters and then
  uses the hub_repo rule to create a map of names and the
  interpreter labels.
 
 Next, we then use the interpreter_extension that, creates
 reports that have symlinks pointing to the different interpreter
 binaries.

The user can then pass in a label to the pip call for the
 specific hermetic interpreter.
diff --git a/MODULE.bazel b/MODULE.bazel
index 92da402..e490beb 100644
--- a/MODULE.bazel
+++ b/MODULE.bazel
@@ -46,3 +46,6 @@
     "pypi__coverage_cp39_x86_64-apple-darwin",
     "pypi__coverage_cp39_x86_64-unknown-linux-gnu",
 )
+
+python = use_extension("@rules_python//python:extensions.bzl", "python")
+use_repo(python, "pythons_hub")
diff --git a/examples/bzlmod_build_file_generation/MODULE.bazel b/examples/bzlmod_build_file_generation/MODULE.bazel
index 781b0cb..d59fbb3 100644
--- a/examples/bzlmod_build_file_generation/MODULE.bazel
+++ b/examples/bzlmod_build_file_generation/MODULE.bazel
@@ -48,9 +48,6 @@
 # We also use the same name for python.host_python_interpreter.
 PYTHON_NAME = "python3"
 
-# This is the name that is used for the host interpreter
-PYTHON_INTERPRETER = PYTHON_NAME + "_host_interpreter"
-
 # We next initialize the python toolchain using the extension.
 # You can set different Python versions in this block.
 python.toolchain(
@@ -66,37 +63,46 @@
 # into the scope of the current module.
 # All of the python3 repositories use the PYTHON_NAME as there prefix.  They
 # are not catenated for ease of reading.
-use_repo(python, PYTHON_NAME)
-use_repo(python, "python3_toolchains")
-use_repo(python, PYTHON_INTERPRETER)
+use_repo(python, PYTHON_NAME, "python3_toolchains")
 
-# Register an already-defined toolchain so that Bazel can use it during toolchain resolution.
+# Register an already-defined toolchain so that Bazel can use it during
+# toolchain resolution.
 register_toolchains(
     "@python3_toolchains//:all",
 )
 
-# Use the pip extension
-pip = use_extension("@rules_python//python:extensions.bzl", "pip")
+# The interpreter extension discovers the platform specific Python binary.
+# It creates a symlink to the binary, and we pass the label to the following
+# pip.parse call.
+interpreter = use_extension("@rules_python//python:interpreter_extension.bzl", "interpreter")
+interpreter.install(
+    name = "interpreter_python3",
+    python_name = PYTHON_NAME,
+)
+use_repo(interpreter, "interpreter_python3")
 
-# Use the extension to call the `pip_repository` rule that invokes `pip`, with `incremental` set.
-# Accepts a locked/compiled requirements file and installs the dependencies listed within.
+# Use the extension, pip.parse, to call the `pip_repository` rule that invokes
+# `pip`, with `incremental` set. The pip call accepts a locked/compiled
+# requirements file and installs the dependencies listed within.
 # Those dependencies become available in a generated `requirements.bzl` file.
 # You can instead check this `requirements.bzl` file into your repo.
 # Because this project has different requirements for windows vs other
 # operating systems, we have requirements for each.
+pip = use_extension("@rules_python//python:extensions.bzl", "pip")
 pip.parse(
     name = "pip",
     # When using gazelle you must use set the following flag
     # in order for the generation of gazelle dependency resolution.
     incompatible_generate_aliases = True,
-    # The interpreter attribute points to the interpreter to use for running
-    # pip commands to download the packages in the requirements file.
+    # The interpreter_target attribute points to the interpreter to
+    # use for running pip commands to download the packages in the
+    # requirements file.
     # As a best practice, we use the same interpreter as the toolchain
     # that was configured above; this ensures the same Python version
     # is used for both resolving dependencies and running tests/binaries.
     # If this isn't specified, then you'll get whatever is locally installed
     # on your system.
-    python_interpreter_target = "@" + PYTHON_INTERPRETER + "//:python",
+    python_interpreter_target = "@interpreter_python3//:python",
     requirements_lock = "//:requirements_lock.txt",
     requirements_windows = "//:requirements_windows.txt",
 )
diff --git a/python/extensions.bzl b/python/extensions.bzl
index 3bcbb50..ce11069 100644
--- a/python/extensions.bzl
+++ b/python/extensions.bzl
@@ -19,9 +19,10 @@
 load("@rules_python//python/pip_install:repositories.bzl", "pip_install_dependencies")
 load("@rules_python//python/pip_install:requirements_parser.bzl", parse_requirements = "parse")
 load("@rules_python//python/private:coverage_deps.bzl", "install_coverage_deps")
-load("@rules_python//python/private:toolchains_repo.bzl", "get_host_os_arch", "get_host_platform")
+load("@rules_python//python/private:interpreter_hub.bzl", "hub_repo")
 
 def _python_impl(module_ctx):
+    toolchains = []
     for mod in module_ctx.modules:
         for toolchain_attr in mod.tags.toolchain:
             python_register_toolchains(
@@ -33,11 +34,16 @@
                 register_coverage_tool = toolchain_attr.configure_coverage_tool,
                 ignore_root_user_error = toolchain_attr.ignore_root_user_error,
             )
-            host_hub_name = toolchain_attr.name + "_host_interpreter"
-            _host_hub(
-                name = host_hub_name,
-                user_repo_prefix = toolchain_attr.name,
-            )
+
+            # We collect all of the toolchain names to create
+            # the INTERPRETER_LABELS map.  This is used
+            # by interpreter_extensions.bzl
+            toolchains.append(toolchain_attr.name)
+
+    hub_repo(
+        name = "pythons_hub",
+        toolchains = toolchains,
+    )
 
 python = module_extension(
     implementation = _python_impl,
@@ -133,89 +139,3 @@
         "parse": tag_class(attrs = _pip_parse_ext_attrs()),
     },
 )
-
-# This function allows us to build the label name of a label
-# that is not passed into the current context.
-# The module_label is the key element that is passed in.
-# This value provides the root location of the labels
-# See https://bazel.build/external/extension#repository_names_and_visibility
-def _repo_mapped_label(module_label, extension_name, apparent):
-    """Construct a canonical repo label accounting for repo mapping.
-
-    Args:
-        module_label: Label object of the module hosting the extension; see
-          "_module" implicit attribute.
-        extension_name: str, name of the extension that created the repo in `apparent`.
-        apparent: str, a repo-qualified target string, but without the "@". e.g.
-          "python38_x86_linux//:python". The repo name should use the apparent
-          name used by the extension named by `ext_name` (i.e. the value of the
-          `name` arg the extension passes to repository rules)
-    """
-    return Label("@@{module}~{extension_name}~{apparent}".format(
-        module = module_label.workspace_name,
-        extension_name = extension_name,
-        apparent = apparent,
-    ))
-
-# We are doing some bazel stuff here that could use an explanation.
-# The basis of this function is that we need to create a symlink to
-# the python binary that exists in a different repo that we know is
-# setup by rules_python.
-#
-# We are building a Label like
-# @@rules_python~override~python~python3_x86_64-unknown-linux-gnu//:python
-# and then the function creates a symlink named python to that Label.
-# The tricky part is the "~override~" part can't be known in advance
-# and will change depending on how and what version of rules_python
-# is used. To figure that part out, an implicit attribute is used to
-# resolve the module's current name (see "_module" attribute)
-#
-# We are building the Label name dynamically, and can do this even
-# though the Label is not passed into this function.  If we choose
-# not do this a user would have to write another 16 lines
-# of configuration code, but we are able to save them that work
-# because we know how rules_python works internally.  We are using
-# functions from private:toolchains_repo.bzl which is where the repo
-# is being built. The repo name differs between host OS and platforms
-# and the functions from toolchains_repo gives us this functions that
-# information.
-def _host_hub_impl(repo_ctx):
-    # Intentionally empty; this is only intended to be used by repository
-    # rules, which don't process build file contents.
-    repo_ctx.file("BUILD.bazel", "")
-
-    # The two get_ functions we use are also utilized when building
-    # the repositories for the different interpreters.
-    (os, arch) = get_host_os_arch(repo_ctx)
-    host_platform = "{}_{}//:python".format(
-        repo_ctx.attr.user_repo_prefix,
-        get_host_platform(os, arch),
-    )
-
-    # the attribute is set to attr.label(default = "//:_"), which
-    # provides us the resolved, canonical, prefix for the module's repos.
-    # The extension_name "python" is determined by the
-    # name bound to the module_extension() call.
-    # We then have the OS and platform specific name of the python
-    # interpreter.
-    label = _repo_mapped_label(repo_ctx.attr._module, "python", host_platform)
-
-    # create the symlink in order to set the interpreter for pip.
-    repo_ctx.symlink(label, "python")
-
-# We use this rule to set the pip interpreter target when using different operating
-# systems with the same project
-_host_hub = repository_rule(
-    implementation = _host_hub_impl,
-    local = True,
-    attrs = {
-        "user_repo_prefix": attr.string(
-            mandatory = True,
-            doc = """\
-The prefix to create the repository name.  Usually the name you used when you created the 
-Python toolchain.
-""",
-        ),
-        "_module": attr.label(default = "//:_"),
-    },
-)
diff --git a/python/interpreter_extension.bzl b/python/interpreter_extension.bzl
new file mode 100644
index 0000000..b9afe1a
--- /dev/null
+++ b/python/interpreter_extension.bzl
@@ -0,0 +1,75 @@
+# Copyright 2023 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"Module extension that finds the current toolchain Python binary and creates a symlink to it."
+
+load("@pythons_hub//:interpreters.bzl", "INTERPRETER_LABELS")
+
+def _interpreter_impl(mctx):
+    for mod in mctx.modules:
+        for install_attr in mod.tags.install:
+            _interpreter_repo(
+                name = install_attr.name,
+                python_name = install_attr.python_name,
+            )
+
+interpreter = module_extension(
+    doc = """\
+This extension is used to expose the underlying platform-specific
+interpreter registered as a toolchain. It is used by users to get
+a label to the interpreter for use with pip.parse
+in the MODULES.bazel file.
+""",
+    implementation = _interpreter_impl,
+    tag_classes = {
+        "install": tag_class(
+            attrs = {
+                "name": attr.string(
+                    doc = "Name of the interpreter, we use this name to set the interpreter for pip.parse",
+                    mandatory = True,
+                ),
+                "python_name": attr.string(
+                    doc = "The name set in the previous python.toolchain call.",
+                    mandatory = True,
+                ),
+            },
+        ),
+    },
+)
+
+def _interpreter_repo_impl(rctx):
+    rctx.file("BUILD.bazel", "")
+
+    actual_interpreter_label = INTERPRETER_LABELS.get(rctx.attr.python_name)
+    if actual_interpreter_label == None:
+        fail("Unable to find interpreter with name {}".format(rctx.attr.python_name))
+
+    rctx.symlink(actual_interpreter_label, "python")
+
+_interpreter_repo = repository_rule(
+    doc = """\
+Load the INTERPRETER_LABELS map. This map contain of all of the Python binaries
+by name and a label the points to the interpreter binary. The
+binaries are downloaded as part of the python toolchain setup.
+The rule finds the label and creates a symlink named "python" to that
+label. This symlink is then used by pip.
+""",
+    implementation = _interpreter_repo_impl,
+    attrs = {
+        "python_name": attr.string(
+            mandatory = True,
+            doc = "Name of the Python toolchain",
+        ),
+    },
+)
diff --git a/python/private/interpreter_hub.bzl b/python/private/interpreter_hub.bzl
new file mode 100644
index 0000000..f1ca670
--- /dev/null
+++ b/python/private/interpreter_hub.bzl
@@ -0,0 +1,58 @@
+# Copyright 2023 The Bazel Authors. All rights reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"Repo rule used by bzlmod extension to create a repo that has a map of Python interpreters and their labels"
+
+load("//python:versions.bzl", "WINDOWS_NAME")
+load("//python/private:toolchains_repo.bzl", "get_host_os_arch", "get_host_platform")
+
+_build_file_for_hub_template = """
+INTERPRETER_LABELS = {{
+{lines}
+}}
+"""
+
+_line_for_hub_template = """\
+    "{name}": Label("@{name}_{platform}//:{path}"),
+"""
+
+def _hub_repo_impl(rctx):
+    (os, arch) = get_host_os_arch(rctx)
+    platform = get_host_platform(os, arch)
+
+    rctx.file("BUILD.bazel", "")
+    is_windows = (os == WINDOWS_NAME)
+    path = "python.exe" if is_windows else "bin/python3"
+
+    lines = "\n".join([_line_for_hub_template.format(
+        name = name,
+        platform = platform,
+        path = path,
+    ) for name in rctx.attr.toolchains])
+
+    rctx.file("interpreters.bzl", _build_file_for_hub_template.format(lines = lines))
+
+hub_repo = repository_rule(
+    doc = """\
+This private rule create a repo with a BUILD file that contains a map of interpreter names
+and the labels to said interpreters. This map is used to by the interpreter hub extension.
+""",
+    implementation = _hub_repo_impl,
+    attrs = {
+        "toolchains": attr.string_list(
+            doc = "List of the base names the toolchain repo defines.",
+            mandatory = True,
+        ),
+    },
+)