fix: correctly handle absolute URLs in parse_simpleapi_html.bzl (#2112)
This PR addresses a typo and improves the handling of absolute URLs in
the
`parse_simpleapi_html.bzl` file and corrects a minor issue in the
`simpleapi_download.bzl` file.
Summary:
1. parse_simpleapi_html.bzl:
- Introduced a new private function `_get_root_directory(url)` to
extract
the root directory from a given URL.
- Enhanced `_absolute_url` function to correctly handle absolute URLs by
utilizing the `_get_root_directory` function. This ensures that URLs
starting with a "/" are correctly resolved to their full path, avoiding
potential incorrect concatenation.
2. simpleapi_download.bzl: Corrected the handling of the `real_url`
variable in
the `_read_simpleapi` function, ensuring that the correct URL is passed
to
`_read_index_result` when using non-blocking downloads.
diff --git a/CHANGELOG.md b/CHANGELOG.md
index edf01f7..c87e76e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -35,6 +35,7 @@
* `3.12 -> 3.12.4`
### Fixed
+* (rules) correctly handle absolute URLs in parse_simpleapi_html.bzl.
* (rules) Fixes build targets linking against `@rules_python//python/cc:current_py_cc_libs`
in host platform builds on macOS, by editing the `LC_ID_DYLIB` field of the hermetic interpreter's
`libpython3.x.dylib` using `install_name_tool`, setting it to its absolute path under Bazel's
diff --git a/python/private/pypi/parse_simpleapi_html.bzl b/python/private/pypi/parse_simpleapi_html.bzl
index 2488469..81ee385 100644
--- a/python/private/pypi/parse_simpleapi_html.bzl
+++ b/python/private/pypi/parse_simpleapi_html.bzl
@@ -96,7 +96,25 @@
whls = whls,
)
+def _get_root_directory(url):
+ scheme_end = url.find("://")
+ if scheme_end == -1:
+ fail("Invalid URL format")
+
+ scheme = url[:scheme_end]
+ host_end = url.find("/", scheme_end + 3)
+ if host_end == -1:
+ host_end = len(url)
+ host = url[scheme_end + 3:host_end]
+
+ return "{}://{}".format(scheme, host)
+
def _absolute_url(index_url, candidate):
+ if candidate.startswith("/"):
+ # absolute url
+ root_directory = _get_root_directory(index_url)
+ return "{}{}".format(root_directory, candidate)
+
if not candidate.startswith(".."):
return candidate
diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl
index b258fef..c730c20 100644
--- a/python/private/pypi/simpleapi_download.bzl
+++ b/python/private/pypi/simpleapi_download.bzl
@@ -185,10 +185,10 @@
if download_kwargs.get("block") == False:
# Simulate the same API as ctx.download has
return struct(
- wait = lambda: _read_index_result(ctx, download.wait(), output, url, cache, cache_key),
+ wait = lambda: _read_index_result(ctx, download.wait(), output, real_url, cache, cache_key),
)
- return _read_index_result(ctx, download, output, url, cache, cache_key)
+ return _read_index_result(ctx, download, output, real_url, cache, cache_key)
def _read_index_result(ctx, result, output, url, cache, cache_key):
if not result.success:
diff --git a/tests/pypi/parse_simpleapi_html/parse_simpleapi_html_tests.bzl b/tests/pypi/parse_simpleapi_html/parse_simpleapi_html_tests.bzl
index a532e87..aa735b8 100644
--- a/tests/pypi/parse_simpleapi_html/parse_simpleapi_html_tests.bzl
+++ b/tests/pypi/parse_simpleapi_html/parse_simpleapi_html_tests.bzl
@@ -221,6 +221,40 @@
yanked = False,
),
),
+ (
+ struct(
+ attrs = [
+ 'href="/whl/torch-2.0.0-cp38-cp38-manylinux2014_aarch64.whl#sha256=deadbeef"',
+ ],
+ filename = "torch-2.0.0-cp38-cp38-manylinux2014_aarch64.whl",
+ url = "https://download.pytorch.org/whl/cpu/torch",
+ ),
+ struct(
+ filename = "torch-2.0.0-cp38-cp38-manylinux2014_aarch64.whl",
+ metadata_sha256 = "",
+ metadata_url = "",
+ sha256 = "deadbeef",
+ url = "https://download.pytorch.org/whl/torch-2.0.0-cp38-cp38-manylinux2014_aarch64.whl",
+ yanked = False,
+ ),
+ ),
+ (
+ struct(
+ attrs = [
+ 'href="/whl/torch-2.0.0-cp38-cp38-manylinux2014_aarch64.whl#sha256=notdeadbeef"',
+ ],
+ filename = "torch-2.0.0-cp38-cp38-manylinux2014_aarch64.whl",
+ url = "http://download.pytorch.org/whl/cpu/torch",
+ ),
+ struct(
+ filename = "torch-2.0.0-cp38-cp38-manylinux2014_aarch64.whl",
+ metadata_sha256 = "",
+ metadata_url = "",
+ sha256 = "notdeadbeef",
+ url = "http://download.pytorch.org/whl/torch-2.0.0-cp38-cp38-manylinux2014_aarch64.whl",
+ yanked = False,
+ ),
+ ),
]
for (input, want) in tests: