blob: 21d0500f484effbd882a87e125eeff931ed2a885 [file] [log] [blame]
name: bazel
on:
workflow_call:
inputs:
build-environment:
required: true
type: string
description: Top-level label for what's being built/tested.
docker-image-name:
required: true
type: string
description: Name of the base docker image to build with.
cuda-version:
required: true
type: string
description: What CUDA version to build with (i.e. "11.7"), "cpu" for none.
sync-tag:
required: false
type: string
default: ""
description: |
If this is set, our linter will use this to make sure that every other
job with the same `sync-tag` is identical.
test-matrix:
required: true
type: string
description: |
A JSON description of what configs to run later on.
env:
GIT_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
jobs:
filter:
runs-on: [self-hosted, linux.large]
outputs:
test-matrix: ${{ steps.filter.outputs.test-matrix }}
is-test-matrix-empty: ${{ steps.filter.outputs.is-test-matrix-empty }}
keep-going: ${{ steps.filter.outputs.keep-going }}
reenabled-issues: ${{ steps.filter.outputs.reenabled-issues }}
steps:
- name: Checkout PyTorch
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
with:
fetch-depth: 1
submodules: false
- name: Select all requested test configurations
id: filter
uses: ./.github/actions/filter-test-configs
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
test-matrix: ${{ inputs.test-matrix }}
build-and-test:
needs: filter
# Don't run on forked repos.
if: github.repository_owner == 'pytorch' && needs.filter.outputs.is-test-matrix-empty == 'False'
strategy:
matrix: ${{ fromJSON(needs.filter.outputs.test-matrix) }}
fail-fast: false
runs-on: ${{ matrix.runner }}
steps:
- name: Setup SSH (Click me for login details)
uses: pytorch/test-infra/.github/actions/setup-ssh@main
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
# [see note: pytorch repo ref]
- name: Checkout PyTorch
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
- name: Setup Linux
uses: ./.github/actions/setup-linux
- name: Calculate docker image
id: calculate-docker-image
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
with:
docker-image-name: ${{ inputs.docker-image-name }}
- name: Pull docker image
uses: pytorch/test-infra/.github/actions/pull-docker-image@main
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
uses: pytorch/test-infra/.github/actions/setup-nvidia@main
if: ${{ inputs.cuda-version != 'cpu' }}
- name: Output disk space left
run: |
sudo df -H
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' >> "/tmp/github_env_${GITHUB_RUN_ID}"
env | grep '^CI' >> "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Parse ref
id: parse-ref
run: .github/scripts/parse_ref.py
- name: Get workflow job id
id: get-job-id
uses: ./.github/actions/get-workflow-job-id
if: always()
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
- name: Build
env:
BUILD_ENVIRONMENT: ${{ inputs.build-environment }}
PR_NUMBER: ${{ github.event.pull_request.number }}
BRANCH: ${{ steps.parse-ref.outputs.branch }}
GITHUB_REPOSITORY: ${{ github.repository }}
GITHUB_WORKFLOW: ${{ github.workflow }}
GITHUB_JOB: ${{ github.job }}
GITHUB_RUN_ID: ${{ github.run_id }}
GITHUB_RUN_NUMBER: ${{ github.run_number }}
GITHUB_RUN_ATTEMPT: ${{ github.run_attempt }}
PYTORCH_RETRY_TEST_CASES: 1
PYTORCH_OVERRIDE_FLAKY_SIGNAL: 1
REENABLED_ISSUES: ${{ needs.filter.outputs.reenabled-issues }}
# TODO duplicated
AWS_DEFAULT_REGION: us-east-1
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
TORCH_CUDA_ARCH_LIST: 5.2
DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
OUR_GITHUB_JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
CUDA_VERSION: ${{ inputs.cuda-version }}
run: |
export SHARD_NUMBER=0
# detached container should get cleaned up by teardown_ec2_linux
# TODO: Stop building test binaries as part of the build phase
# Make sure we copy test results from bazel-testlogs symlink to
# a regular directory ./test/test-reports
# shellcheck disable=SC2086
container_name=$(docker run \
${GPU_FLAG:-} \
-e BUILD_ENVIRONMENT \
-e GITHUB_ACTIONS \
-e GITHUB_REPOSITORY \
-e GITHUB_WORKFLOW \
-e GITHUB_JOB \
-e GITHUB_RUN_NUMBER \
-e GITHUB_RUN_ATTEMPT \
-e GIT_DEFAULT_BRANCH="$GIT_DEFAULT_BRANCH" \
-e SHARD_NUMBER \
-e NUM_TEST_SHARDS \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e SCCACHE_BUCKET \
-e SKIP_SCCACHE_INITIALIZATION=1 \
-e REENABLED_ISSUES \
-e TORCH_CUDA_ARCH_LIST \
-e OUR_GITHUB_JOB_ID \
-e CUDA_VERSION \
-e PYTORCH_RETRY_TEST_CASES \
-e PYTORCH_OVERRIDE_FLAKY_SIGNAL \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
--shm-size="1g" \
--tty \
--detach \
--user jenkins \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}"
)
docker exec -t "${container_name}" sh -c '.ci/pytorch/build.sh'
echo "container_id=${container_name}" >> "${GITHUB_ENV}"
- name: Test
id: test
# Time out the test phase after 3.5 hours
timeout-minutes: 120
run: |
docker exec -t "${container_id}" sh -c '.ci/pytorch/test.sh && cp -Lr ./bazel-testlogs ./test/test-reports'
- name: Print remaining test logs
shell: bash
if: always() && steps.test.conclusion
run: |
cat test/**/*.log || true
- name: Chown workspace
uses: ./.github/actions/chown-workspace
if: always()
- name: Upload test artifacts
uses: ./.github/actions/upload-test-artifacts
if: always() && steps.test.conclusion && steps.test.conclusion != 'skipped'
with:
file-suffix: bazel-${{ github.job }}_${{ steps.get-job-id.outputs.job-id }}
- name: Teardown Linux
uses: pytorch/test-infra/.github/actions/teardown-linux@main
if: always()