periodic-libtorch-linux-bionic-cuda11.5-py3.7-gcc7 #2554
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# @generated DO NOT EDIT MANUALLY | |
# Template is at: .github/templates/linux_ci_workflow.yml.j2 | |
# Generation script: .github/scripts/generate_ci_workflows.py | |
name: periodic-libtorch-linux-bionic-cuda11.5-py3.7-gcc7 | |
on: | |
push: | |
tags: | |
- 'ciflow/all/*' | |
- 'ciflow/cuda/*' | |
- 'ciflow/libtorch/*' | |
- 'ciflow/linux/*' | |
- 'ciflow/scheduled/*' | |
schedule: | |
- cron: 45 4,10,16,22 * * * | |
workflow_dispatch: | |
env: | |
BUILD_ENVIRONMENT: periodic-libtorch-linux-bionic-cuda11.5-py3.7-gcc7 | |
DOCKER_IMAGE_BASE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-cuda11.5-cudnn8-py3-gcc7 | |
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2 | |
XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla | |
TORCH_CUDA_ARCH_LIST: 5.2 | |
IN_CI: 1 | |
IS_GHA: 1 | |
# This is used for the phase of adding wheel tests only, will be removed once completed | |
IN_WHEEL_TEST: 1 | |
# Used for custom_opertor, jit_hooks, custom_backend, see .jenkins/pytorch/build.sh | |
CUSTOM_TEST_ARTIFACT_BUILD_DIR: build/custom_test_artifacts | |
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine" | |
PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }} | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
AWS_DEFAULT_REGION: us-east-1 | |
PR_NUMBER: ${{ github.event.pull_request.number }} | |
SHA1: ${{ github.event.pull_request.head.sha || github.sha }} | |
PYTORCH_RETRY_TEST_CASES: 1 | |
concurrency: | |
group: periodic-libtorch-linux-bionic-cuda11.5-py3.7-gcc7-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} | |
cancel-in-progress: true | |
jobs: | |
build: | |
runs-on: linux.2xlarge | |
timeout-minutes: 240 | |
env: | |
JOB_BASE_NAME: periodic-libtorch-linux-bionic-cuda11.5-py3.7-gcc7-build | |
outputs: | |
docker_image: ${{ steps.calculate-tag.outputs.docker_image }} | |
steps: | |
- name: print labels | |
run: echo "${PR_LABELS}" | |
- name: Display EC2 information | |
shell: bash | |
run: | | |
set -euo pipefail | |
function get_ec2_metadata() { | |
# Pulled from instance metadata endpoint for EC2 | |
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html | |
category=$1 | |
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" | |
} | |
echo "ami-id: $(get_ec2_metadata ami-id)" | |
echo "instance-id: $(get_ec2_metadata instance-id)" | |
echo "instance-type: $(get_ec2_metadata instance-type)" | |
echo "system info $(uname -a)" | |
- name: Start docker if docker deamon is not running | |
run: | | |
if systemctl is-active --quiet docker; then | |
echo "Docker daemon is running..."; | |
else | |
echo "Starting docker deamon..." && sudo systemctl start docker; | |
fi | |
- name: Log in to ECR | |
env: | |
AWS_RETRY_MODE: standard | |
AWS_MAX_ATTEMPTS: 5 | |
run: | | |
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") | |
retry () { | |
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@") | |
} | |
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ | |
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" | |
- name: Chown workspace | |
run: | | |
retry () { | |
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@") | |
} | |
retry docker pull "${ALPINE_IMAGE}" | |
# Ensure the working directory gets chowned back to the current user | |
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . | |
- name: Clean workspace | |
run: | | |
rm -rf "${GITHUB_WORKSPACE}" | |
mkdir "${GITHUB_WORKSPACE}" | |
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" | |
uses: seemethere/add-github-ssh-key@v1 | |
with: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
- name: Preserve github env variables for use in docker | |
run: | | |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" | |
- name: Checkout PyTorch | |
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 | |
with: | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
# deep clone, to allow use of git merge-base | |
fetch-depth: 0 | |
submodules: recursive | |
- name: Clean PyTorch checkout | |
run: | | |
# Remove any artifacts from the previous checkouts | |
git clean -fxd | |
- name: Calculate docker image tag | |
id: calculate-tag | |
run: | | |
DOCKER_TAG=$(git rev-parse HEAD:.circleci/docker) | |
echo "DOCKER_TAG=${DOCKER_TAG}" >> "${GITHUB_ENV}" | |
echo "DOCKER_IMAGE=${DOCKER_IMAGE_BASE}:${DOCKER_TAG}" >> "${GITHUB_ENV}" | |
echo "::set-output name=docker_tag::${DOCKER_TAG}" | |
echo "::set-output name=docker_image::${DOCKER_IMAGE_BASE}:${DOCKER_TAG}" | |
- name: Check if image should be built | |
id: check | |
env: | |
BASE_REVISION: ${{ github.event.pull_request.base.sha || github.sha }} | |
run: | | |
set -x | |
# Check if image already exists, if it does then skip building it | |
if docker manifest inspect "${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"; then | |
exit 0 | |
fi | |
if [[ "$BASE_REVISION" = "$(git rev-parse HEAD)" ]]; then | |
# if we're on the base branch then use the parent commit | |
MERGE_BASE=$(git rev-parse HEAD~) | |
else | |
# otherwise we're on a PR, so use the most recent base commit | |
MERGE_BASE=$(git merge-base HEAD "$BASE_REVISION") | |
fi | |
# Covers the case where a previous tag doesn't exist for the tree | |
# this is only really applicable on trees that don't have `.circleci/docker` at its merge base, i.e. nightly | |
if ! git rev-parse "$MERGE_BASE:.circleci/docker"; then | |
echo "Directory '.circleci/docker' not found in commit $MERGE_BASE, you should probably rebase onto a more recent commit" | |
exit 1 | |
fi | |
PREVIOUS_DOCKER_TAG=$(git rev-parse "$MERGE_BASE:.circleci/docker") | |
# If no image exists but the hash is the same as the previous hash then we should error out here | |
if [[ "${PREVIOUS_DOCKER_TAG}" = "${DOCKER_TAG}" ]]; then | |
echo "ERROR: Something has gone wrong and the previous image isn't available for the merge-base of your branch" | |
echo " contact the PyTorch team to restore the original images" | |
exit 1 | |
fi | |
echo ::set-output name=rebuild::yes | |
- name: Build and push docker image | |
if: ${{ steps.check.outputs.rebuild }} | |
env: | |
DOCKER_SKIP_S3_UPLOAD: 1 | |
working-directory: .circleci/docker | |
run: | | |
export IMAGE_NAME=${DOCKER_IMAGE_BASE#308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/} | |
./build_docker.sh | |
- name: Pull Docker image | |
run: | | |
retry () { | |
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@") | |
} | |
retry docker pull "${DOCKER_IMAGE}" | |
- name: Parse ref | |
shell: bash | |
id: parse-ref | |
run: ./.github/scripts/parse_ref.py | |
- name: Build | |
env: | |
BRANCH: ${{ steps.parse-ref.outputs.branch }} | |
run: | | |
# detached container should get cleaned up by teardown_ec2_linux | |
container_name=$(docker run \ | |
-e BUILD_ENVIRONMENT \ | |
-e JOB_BASE_NAME \ | |
-e MAX_JOBS="$(nproc --ignore=2)" \ | |
-e AWS_DEFAULT_REGION \ | |
-e IS_GHA \ | |
-e PR_NUMBER \ | |
-e SHA1 \ | |
-e BRANCH \ | |
-e GITHUB_RUN_ID \ | |
-e SCCACHE_BUCKET \ | |
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \ | |
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \ | |
-e SKIP_SCCACHE_INITIALIZATION=1 \ | |
-e TORCH_CUDA_ARCH_LIST \ | |
-e PR_LABELS \ | |
-e http_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e https_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e no_proxy="localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock" \ | |
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \ | |
--security-opt seccomp=unconfined \ | |
--cap-add=SYS_PTRACE \ | |
--tty \ | |
--detach \ | |
--user jenkins \ | |
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \ | |
-w /var/lib/jenkins/workspace \ | |
"${DOCKER_IMAGE}" | |
) | |
docker exec -t "${container_name}" sh -c 'sudo chown -R jenkins . && .jenkins/pytorch/build.sh' | |
- name: Display and upload binary build size statistics (Click Me) | |
# temporary hack: set CIRCLE_* vars, until we update | |
# tools/stats/print_test_stats.py to natively support GitHub Actions | |
env: | |
SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }} | |
BRANCH: ${{ steps.parse-ref.outputs.branch }} | |
TAG: ${{ steps.parse-ref.outputs.tag }} | |
WORKFLOW_ID: '${{ github.run_id }}' | |
run: | | |
COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0) | |
export COMMIT_TIME | |
pip3 install requests==2.26 boto3==1.16.34 | |
python3 -m tools.stats.upload_binary_size_to_scuba || exit 0 | |
- name: Chown workspace | |
run: | | |
# Ensure the working directory gets chowned back to the current user | |
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . | |
- name: Hold runner for 2 hours or until ssh sessions have drained | |
# Always hold for active ssh sessions | |
if: always() | |
run: .github/scripts/wait_for_ssh_to_drain.sh | |
- name: Chown workspace | |
if: always() | |
run: | | |
# Ensure the working directory gets chowned back to the current user | |
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . | |
- name: Kill containers, clean up images | |
if: always() | |
run: | | |
# ignore expansion of "docker ps -q" since it could be empty | |
# shellcheck disable=SC2046 | |
docker stop $(docker ps -q) || true | |
# Prune all of the docker images | |
docker system prune -af | |
- name: Hold runner for 2 hours or until ssh sessions have drained | |
# Always hold for active ssh sessions | |
if: always() | |
run: .github/scripts/wait_for_ssh_to_drain.sh | |
- name: Clean up docker images | |
if: always() | |
run: | | |
# Prune all of the docker images | |
docker system prune -af |