Allow disable binary build jobs on CI (#100754)
Given the recent outage w.r.t. binary workflows running on CI, I want to close the gap between them and regular CI jobs. The first part is to add the same filter step used by regular CI jobs so that oncalls can disable the job if need.
* Nightly runs are excluded as it includes the step to publish nightly binaries. Allowing oncalls to disable this part requires more thoughts. So this covers only CI binary build and test jobs
* As binary jobs doesn't have a concept of test matrix config which is a required parameter to the filter script, I use a pseudo input of test config default there
### Testing
* https://github.com/pytorch/pytorch/issues/100758. The job is skipped in https://github.com/pytorch/pytorch/actions/runs/4911034089/jobs/8768782689
* https://github.com/pytorch/pytorch/issues/100759. The job is skipped in https://github.com/pytorch/pytorch/actions/runs/4911033966/jobs/8768713669
Note that Windows binary jobs are not run in PR anymore after https://github.com/pytorch/pytorch/pull/100638, and MacOS binary jobs only run nightly. So there are only Linux jobs left.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/100754
Approved by: https://github.com/ZainRizvi
diff --git a/.github/actions/filter-test-configs/action.yml b/.github/actions/filter-test-configs/action.yml
index 4c60731..7bc7ad0 100644
--- a/.github/actions/filter-test-configs/action.yml
+++ b/.github/actions/filter-test-configs/action.yml
@@ -42,9 +42,14 @@
python3 -m pip install requests==2.26.0 pyyaml==6.0
- name: Parse ref
- shell: bash
id: parse-ref
- run: .github/scripts/parse_ref.py
+ shell: bash
+ run: |
+ set -x
+
+ # Use relative path here as this could be checked out anywhere, not necessarily
+ # in runner workspace
+ python3 "${GITHUB_ACTION_PATH}/../../scripts/parse_ref.py"
- name: Get the job name
id: get-job-name
@@ -80,14 +85,17 @@
echo "Workflow: ${GITHUB_WORKFLOW}"
echo "Job name: ${JOB_NAME}"
- .github/scripts/filter_test_configs.py \
+ # Use relative path here as this could be checked out anywhere, not necessarily
+ # in runner workspace
+ python3 "${GITHUB_ACTION_PATH}/../../scripts/filter_test_configs.py" \
--workflow "${GITHUB_WORKFLOW}" \
--job-name "${JOB_NAME}" \
--test-matrix "${{ inputs.test-matrix }}" \
--pr-number "${{ github.event.pull_request.number }}" \
--tag "${{ steps.parse-ref.outputs.tag }}" \
--event-name "${{ github.event_name }}" \
- --schedule "${{ github.event.schedule }}"
+ --schedule "${{ github.event.schedule }}" \
+ --branch "${{ github.event.workflow_run.head_branch }}"
- name: Print the filtered test matrix
shell: bash
diff --git a/.github/scripts/filter_test_configs.py b/.github/scripts/filter_test_configs.py
index 8ab6540..91ff039 100755
--- a/.github/scripts/filter_test_configs.py
+++ b/.github/scripts/filter_test_configs.py
@@ -54,6 +54,7 @@
TEST_JOB_NAME = "test"
BUILD_AND_TEST_JOB_NAME = "build-and-test"
JOB_NAME_CFG_REGEX = re.compile(r"(?P<job>[\w-]+)\s+\((?P<cfg>[\w-]+)\)")
+EXCLUDED_BRANCHES = ["nightly"]
def parse_args() -> Any:
@@ -81,7 +82,15 @@
help="name of the event that triggered the job (pull, schedule, etc)",
)
parser.add_argument(
- "--schedule", type=str, help="cron schedule that triggered the job"
+ "--schedule",
+ type=str,
+ help="cron schedule that triggered the job",
+ )
+ parser.add_argument(
+ "--branch",
+ type=str,
+ default="main",
+ help="the branch name",
)
return parser.parse_args()
@@ -214,8 +223,24 @@
disabled_job_cfg,
) = record
- if disabled_workflow != workflow or disabled_platform != current_platform:
- # The current workflow or platform is not disabled by this record
+ if disabled_workflow != workflow:
+ # The current workflow is not disabled by this record
+ continue
+
+ cleanup_regex = rf"(-{BUILD_JOB_NAME}|-{TEST_JOB_NAME})$"
+ # There is an exception here for binary build workflows in which the platform
+ # names have the build and test suffix. For example, we have a build job called
+ # manywheel-py3-cuda11_8-build / build and its subsequent test job called
+ # manywheel-py3-cuda11_8-test / test. So they are linked, but their suffixes
+ # are different
+ disabled_platform_no_suffix = re.sub(cleanup_regex, "", disabled_platform)
+ current_platform_no_suffix = re.sub(cleanup_regex, "", current_platform)
+
+ if (
+ disabled_platform != current_platform
+ and disabled_platform_no_suffix != current_platform_no_suffix
+ ):
+ # The current platform is not disabled by this record
continue
# The logic after this is fairly complicated:
@@ -351,7 +376,7 @@
# periodically scheduled jobs, only the ones at this time
filtered_test_matrix = set_periodic_modes(filtered_test_matrix)
- if args.workflow and args.job_name:
+ if args.workflow and args.job_name and args.branch not in EXCLUDED_BRANCHES:
# If both workflow and job name are available, we will check if the current job
# is disabled and remove it and all its dependants from the test matrix
filtered_test_matrix = remove_disabled_jobs(
diff --git a/.github/scripts/test_filter_test_configs.py b/.github/scripts/test_filter_test_configs.py
index f0fc7ba..a99bf7f 100755
--- a/.github/scripts/test_filter_test_configs.py
+++ b/.github/scripts/test_filter_test_configs.py
@@ -82,6 +82,22 @@
"mock-platform-8",
"build (dynamo)",
],
+ "linux-binary-libtorch-cxx11-abi / libtorch-cpu-shared-with-deps-cxx11-abi-test / test": [
+ "pytorchbot",
+ "9",
+ "https://github.com/pytorch/pytorch/issues/9",
+ "linux-binary-libtorch-cxx11-abi",
+ "libtorch-cpu-shared-with-deps-cxx11-abi-test",
+ "test",
+ ],
+ "linux-binary-manywheel / manywheel-py3_8-cuda11_8-build": [
+ "pytorchbot",
+ "10",
+ "https://github.com/pytorch/pytorch/issues/10",
+ "linux-binary-manywheel",
+ "manywheel-py3_8-cuda11_8-build",
+ "",
+ ],
}
MOCKED_LABELS = [{"name": "foo"}, {"name": "bar"}, {}, {"name": ""}]
@@ -255,6 +271,34 @@
"expected": '{"include": [{"config": "default"}, {"config": "backward_compat"}]}',
"description": "not disabled on this platform",
},
+ {
+ "workflow": "linux-binary-libtorch-cxx11-abi",
+ "job_name": "libtorch-cpu-shared-with-deps-cxx11-abi-build / build",
+ "test_matrix": '{include: [{config: "default"}]}',
+ "expected": '{"include": []}',
+ "description": "Build job is not needed when test job has been disabled",
+ },
+ {
+ "workflow": "linux-binary-libtorch-cxx11-abi",
+ "job_name": "libtorch-cpu-shared-with-deps-cxx11-abi-test / test",
+ "test_matrix": '{include: [{config: "default"}]}',
+ "expected": '{"include": []}',
+ "description": "The binary test job is disabled on this platform",
+ },
+ {
+ "workflow": "linux-binary-manywheel",
+ "job_name": "manywheel-py3_8-cuda11_8-build / build",
+ "test_matrix": '{include: [{config: "default"}]}',
+ "expected": '{"include": []}',
+ "description": "Both binary build and test jobs are disabled",
+ },
+ {
+ "workflow": "linux-binary-manywheel",
+ "job_name": "manywheel-py3_8-cuda11_8-test / test",
+ "test_matrix": '{include: [{config: "default"}]}',
+ "expected": '{"include": []}',
+ "description": "Both binary build and test jobs are disabled",
+ },
]
for case in testcases:
diff --git a/.github/templates/common.yml.j2 b/.github/templates/common.yml.j2
index 84c3e27..64ab2c5 100644
--- a/.github/templates/common.yml.j2
+++ b/.github/templates/common.yml.j2
@@ -33,7 +33,6 @@
echo "system info $(uname -a)"
{%- endmacro -%}
-
{%- macro setup_ec2_windows() -%}
!{{ display_ec2_information() }}
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
@@ -55,6 +54,21 @@
Set-MpPreference -DisableRealtimeMonitoring $True
{%- endmacro -%}
+{%- macro apply_filter() -%}
+ - name: Check if the job is disabled
+ id: filter
+ # Binary workflows checkout to pytorch subdirectory instead
+ uses: ./pytorch/.github/actions/filter-test-configs
+ with:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ # NB: Use a mock test matrix with a default value here. After filtering, if the
+ # returned matrix is empty, it means that the job is disabled
+ test-matrix: |
+ { include: [
+ { config: "default" },
+ ]}
+{%- endmacro -%}
+
{%- macro checkout(submodules="recursive", deep_clone=True, directory="", repository="pytorch/pytorch", branch="", checkout_pr_head=True) -%}
- name: Checkout !{{ 'PyTorch' if repository == "pytorch/pytorch" else repository }}
uses: malfet/checkout@silent-checkout
diff --git a/.github/workflows/_binary-build-linux.yml b/.github/workflows/_binary-build-linux.yml
index 232807f..4a13af6 100644
--- a/.github/workflows/_binary-build-linux.yml
+++ b/.github/workflows/_binary-build-linux.yml
@@ -123,19 +123,25 @@
echo "PYTORCH_FINAL_PACKAGE_DIR=${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
echo "SHA1=${{ env.SHA1 }}"
} >> "${GITHUB_ENV} }}"
+
- name: List the env
shell: bash
run: env
+
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: pytorch/test-infra/.github/actions/setup-ssh@main
with:
github-secret: ${{ secrets.github-token }}
+
- name: Checkout PyTorch
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+
- name: Setup Linux
uses: ./.github/actions/setup-linux
+
- name: Chown workspace
uses: ./.github/actions/chown-workspace
+
- name: Clean workspace
shell: bash
run: |
@@ -149,6 +155,7 @@
submodules: recursive
path: pytorch
quiet-checkout: true
+
- name: Clean PyTorch checkout
run: |
# Remove any artifacts from the previous checkouts
@@ -163,17 +170,33 @@
repository: pytorch/builder
path: builder
quiet-checkout: true
+
- name: Clean pytorch/builder checkout
run: |
# Remove any artifacts from the previous checkouts
git clean -fxd
working-directory: builder
+ - name: Check if the job is disabled
+ id: filter
+ uses: ./pytorch/.github/actions/filter-test-configs
+ with:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ # NB: Use a mock test matrix with a default value here. After filtering, if the
+ # returned matrix is empty, it means that the job is disabled
+ test-matrix: |
+ { include: [
+ { config: "default" },
+ ]}
+
- name: Pull Docker image
+ if: ${{ steps.filter.outputs.is-test-matrix-empty == 'False' }}
uses: pytorch/test-infra/.github/actions/pull-docker-image@main
with:
docker-image: ${{ inputs.DOCKER_IMAGE }}
+
- name: Build PyTorch binary
+ if: ${{ steps.filter.outputs.is-test-matrix-empty == 'False' }}
run: |
set -x
mkdir -p artifacts/
@@ -203,14 +226,16 @@
)
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/${{ inputs.PACKAGE_TYPE }}/build.sh"
+
- name: Chown artifacts
- if: always()
+ if: ${{ steps.filter.outputs.is-test-matrix-empty == 'False' }}
shell: bash
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- uses: actions/upload-artifact@v3
+ if: ${{ steps.filter.outputs.is-test-matrix-empty == 'False' }}
with:
name: ${{ inputs.build_name }}
if-no-files-found: error
diff --git a/.github/workflows/_binary-test-linux.yml b/.github/workflows/_binary-test-linux.yml
index 820008c..8d0fc9b 100644
--- a/.github/workflows/_binary-test-linux.yml
+++ b/.github/workflows/_binary-test-linux.yml
@@ -126,13 +126,17 @@
uses: pytorch/test-infra/.github/actions/setup-ssh@main
with:
github-secret: ${{ secrets.github-token }}
+
# Setup the environment
- name: Checkout PyTorch
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+
- name: Setup Linux
uses: ./.github/actions/setup-linux
+
- name: Chown workspace
uses: ./.github/actions/chown-workspace
+
- name: Clean workspace
shell: bash
run: |
@@ -145,6 +149,7 @@
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
+
- name: Clean PyTorch checkout
run: |
# Remove any artifacts from the previous checkouts
@@ -158,28 +163,44 @@
submodules: recursive
repository: pytorch/builder
path: builder
+
- name: Clean pytorch/builder checkout
run: |
# Remove any artifacts from the previous checkouts
git clean -fxd
working-directory: builder
- - uses: actions/download-artifact@v3
- name: Download Build Artifacts
+ - name: Check if the job is disabled
+ id: filter
+ uses: ./pytorch/.github/actions/filter-test-configs
+ with:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ # NB: Use a mock test matrix with a default value here. After filtering, if the
+ # returned matrix is empty, it means that the job is disabled
+ test-matrix: |
+ { include: [
+ { config: "default" },
+ ]}
+
+ - name: Download Build Artifacts
+ if: ${{ steps.filter.outputs.is-test-matrix-empty == 'False' }}
+ uses: actions/download-artifact@v3
with:
name: ${{ inputs.build_name }}
path: "${{ runner.temp }}/artifacts/"
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
uses: pytorch/test-infra/.github/actions/setup-nvidia@main
- if: ${{ inputs.GPU_ARCH_TYPE == 'cuda' }}
+ if: ${{ inputs.GPU_ARCH_TYPE == 'cuda' && steps.filter.outputs.is-test-matrix-empty == 'False' }}
- name: Pull Docker image
+ if: ${{ steps.filter.outputs.is-test-matrix-empty == 'False' }}
uses: pytorch/test-infra/.github/actions/pull-docker-image@main
with:
docker-image: ${{ inputs.DOCKER_IMAGE }}
- name: Test Pytorch binary
+ if: ${{ steps.filter.outputs.is-test-matrix-empty == 'False' }}
uses: ./pytorch/.github/actions/test-pytorch-binary
- name: Teardown Linux
diff --git a/.github/workflows/_binary-upload.yml b/.github/workflows/_binary-upload.yml
index f129491..23435bd 100644
--- a/.github/workflows/_binary-upload.yml
+++ b/.github/workflows/_binary-upload.yml
@@ -101,6 +101,10 @@
no-sudo: true
- name: Download Build Artifacts
+ id: download-artifacts
+ # NB: When the previous build job is skipped, there won't be any artifacts and
+ # this step will fail. Binary build jobs can only be skipped on CI, not nightly
+ continue-on-error: true
uses: actions/download-artifact@v3
with:
name: ${{ inputs.build_name }}
@@ -110,6 +114,7 @@
if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }}
run: |
echo "DRY_RUN=disabled" >> "$GITHUB_ENV"
+
- name: Set UPLOAD_CHANNEL (only for tagged pushes)
if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }}
shell: bash -e -l {0}
@@ -118,7 +123,9 @@
if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then
echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV"
fi
+
- name: Upload binaries
+ if: steps.download-artifacts.outcome && steps.download-artifacts.outcome == 'success'
env:
PKG_DIR: "${{ runner.temp }}/artifacts"
UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}"