blob: b8c6d01bc909cfdd4a68981aa5bb50cd497728ff [file] [log] [blame]
# Copyright 2024 The ChromiumOS Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Utilities for interacting with gs://."""
import dataclasses
import datetime
import logging
import re
import shlex
import subprocess
from typing import List, Optional
# Determine which gsutil to use.
# 'gsutil.py' is provided by depot_tools, whereas 'gsutil'
# is provided by either https://cloud.google.com/sdk/docs/install, or
# the 'google-cloud-cli' package. Since we need depot_tools to even
# use 'repo', 'gsutil.py' is guaranteed to exist.
GSUTIL = "gsutil.py"
@dataclasses.dataclass(frozen=True)
class GsEntry:
"""An entry of `gsutil ls -l` output."""
# When this was last modified (or created). `None` if the entry is a
# directory.
last_modified: Optional[datetime.datetime]
# The full gs:// path to the artifact.
gs_path: str
def _datetime_from_gs_time(timestamp_str: str) -> datetime.datetime:
"""Parses a datetime from gs."""
return datetime.datetime.strptime(
timestamp_str, "%Y-%m-%dT%H:%M:%SZ"
).replace(tzinfo=datetime.timezone.utc)
def _parse_ls_output(stdout: str) -> List[GsEntry]:
"""Parses output of `gsutil ls`."""
stdout_lines = stdout.splitlines()
# Ignore the last line, since that's always "TOTAL:"
stdout_lines.pop()
line_re = re.compile(
# Entries can take one of two forms:
r"(?:"
# 1. The entry has a size, mod date, and name
r"\d+\s+(\S+T\S+)\s+(gs://.+)"
r"|"
# 2. The entry has none of those, and is just a gs URL.
r"(gs://.+)"
r")"
)
results = []
for line in stdout_lines:
# If the line starts with gs://, it's a header for a directory's
# contents. Skip it.
if line.startswith("gs://"):
continue
line = line.strip()
if not line:
continue
m = line_re.fullmatch(line)
if m is None:
raise ValueError(f"Unexpected line from gs: {line!r}")
timestamp_str, gs_url, alt_gs_url = m.groups()
if timestamp_str:
last_modified = _datetime_from_gs_time(timestamp_str)
gs_path = gs_url
else:
last_modified = None
gs_path = alt_gs_url
results.append(GsEntry(last_modified=last_modified, gs_path=gs_path))
return results
def ls(gs_url: str) -> List[GsEntry]:
"""Runs `gsutil ls` on the given `path`.
Globs are forwarded to gs://
Returns:
A list of GsEntrys matching `path`. If the list is entry, no paths
matched the URL.
"""
cmd = [
GSUTIL,
"ls",
"-l",
gs_url,
]
result = subprocess.run(
cmd,
check=False,
stdin=subprocess.DEVNULL,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
encoding="utf-8",
)
if result.returncode:
# If nothing could be found, gsutil will exit after printing this.
if "One or more URLs matched no objects." in result.stderr:
return []
logging.error("%s failed; stderr:\n%s", shlex.join(cmd), result.stderr)
result.check_returncode()
assert False, "unreachable"
return _parse_ls_output(result.stdout)