| import argparse |
| import os |
| import re |
| from tempfile import TemporaryDirectory |
| |
| from tools.stats.upload_stats_lib import download_gha_artifacts, upload_file_to_s3 |
| |
| ARTIFACTS = [ |
| "sccache-stats", |
| "test-jsons", |
| "test-reports", |
| "usage-log", |
| ] |
| BUCKET_NAME = "gha-artifacts" |
| FILENAME_REGEX = r"-runattempt\d+" |
| |
| |
| def get_artifacts(repo: str, workflow_run_id: int, workflow_run_attempt: int) -> None: |
| with TemporaryDirectory() as temp_dir: |
| print("Using temporary directory:", temp_dir) |
| os.chdir(temp_dir) |
| |
| for artifact in ARTIFACTS: |
| artifact_paths = download_gha_artifacts( |
| artifact, workflow_run_id, workflow_run_attempt |
| ) |
| |
| for artifact_path in artifact_paths: |
| # GHA artifact is named as follows: NAME-runattempt${{ github.run_attempt }}-SUFFIX.zip |
| # and we want remove the run_attempt to conform with the naming convention on S3, i.e. |
| # pytorch/pytorch/WORKFLOW_ID/RUN_ATTEMPT/artifact/NAME-SUFFIX.zip |
| s3_filename = re.sub(FILENAME_REGEX, "", artifact_path.name) |
| upload_file_to_s3( |
| file_name=str(artifact_path.resolve()), |
| bucket=BUCKET_NAME, |
| key=f"{repo}/{workflow_run_id}/{workflow_run_attempt}/artifact/{s3_filename}", |
| ) |
| |
| |
| if __name__ == "__main__": |
| parser = argparse.ArgumentParser(description="Upload test artifacts from GHA to S3") |
| parser.add_argument( |
| "--workflow-run-id", |
| type=int, |
| required=True, |
| help="id of the workflow to get artifacts from", |
| ) |
| parser.add_argument( |
| "--workflow-run-attempt", |
| type=int, |
| required=True, |
| help="which retry of the workflow this is", |
| ) |
| parser.add_argument( |
| "--repo", |
| type=str, |
| required=True, |
| help="which GitHub repo this workflow run belongs to", |
| ) |
| args = parser.parse_args() |
| get_artifacts(args.repo, args.workflow_run_id, args.workflow_run_attempt) |