forked from pytorch/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 1
181 lines (177 loc) · 8.56 KB
/
generated-docker-builds.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
# @generated DO NOT EDIT MANUALLY
# Template is at: .github/templates/docker_builds_ci_workflow.yml.j2
# Generation script: .github/scripts/generate_ci_workflows.py
name: docker-builds
on:
workflow_dispatch:
pull_request:
types: [opened, synchronize, reopened]
paths:
- '.circleci/docker/**'
- '.github/workflows/generated-docker-builds.yml'
schedule:
- cron: 1 3 * * 3
concurrency:
group: docker-builds-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
AWS_DEFAULT_REGION: us-east-1
jobs:
docker-build:
runs-on: linux.2xlarge
timeout-minutes: 240
strategy:
matrix:
include:
- docker_image_base: '308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-cuda10.2-cudnn7-py3.9-gcc7'
docker_image_short_name: 'pytorch-linux-bionic-cuda10.2-cudnn7-py3.9-gcc7'
- docker_image_base: '308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-cuda11.5-cudnn8-py3-gcc7'
docker_image_short_name: 'pytorch-linux-bionic-cuda11.5-cudnn8-py3-gcc7'
- docker_image_base: '308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-py3.7-clang9'
docker_image_short_name: 'pytorch-linux-bionic-py3.7-clang9'
- docker_image_base: '308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-rocm4.3.1-py3.7'
docker_image_short_name: 'pytorch-linux-bionic-rocm4.3.1-py3.7'
- docker_image_base: '308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-rocm4.5-py3.7'
docker_image_short_name: 'pytorch-linux-bionic-rocm4.5-py3.7'
- docker_image_base: '308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7'
docker_image_short_name: 'pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7'
- docker_image_base: '308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7'
docker_image_short_name: 'pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7'
- docker_image_base: '308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c'
docker_image_short_name: 'pytorch-linux-xenial-py3-clang5-android-ndk-r19c'
- docker_image_base: '308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang5-asan'
docker_image_short_name: 'pytorch-linux-xenial-py3-clang5-asan'
- docker_image_base: '308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang7-asan'
docker_image_short_name: 'pytorch-linux-xenial-py3-clang7-asan'
- docker_image_base: '308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang7-onnx'
docker_image_short_name: 'pytorch-linux-xenial-py3-clang7-onnx'
- docker_image_base: '308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.7-gcc5.4'
docker_image_short_name: 'pytorch-linux-xenial-py3.7-gcc5.4'
- docker_image_base: '308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.7-gcc7'
docker_image_short_name: 'pytorch-linux-xenial-py3.7-gcc7'
env:
DOCKER_IMAGE_BASE: '${{ matrix.docker_image_base }}'
name: docker-build (${{ matrix.docker_image_short_name }})
steps:
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: Start docker if docker deamon is not running
run: |
if systemctl is-active --quiet docker; then
echo "Docker daemon is running...";
else
echo "Starting docker deamon..." && sudo systemctl start docker;
fi
- name: Log in to ECR
env:
AWS_RETRY_MODE: standard
AWS_MAX_ATTEMPTS: 5
run: |
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
- name: Chown workspace
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
retry docker pull "${ALPINE_IMAGE}"
# Ensure the working directory gets chowned back to the current user
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Clean workspace
run: |
rm -rf "${GITHUB_WORKSPACE}"
mkdir "${GITHUB_WORKSPACE}"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
# deep clone, to allow use of git merge-base
fetch-depth: 0
submodules: recursive
- name: Clean PyTorch checkout
run: |
# Remove any artifacts from the previous checkouts
git clean -fxd
- name: Calculate docker image tag
id: calculate-tag
run: |
DOCKER_TAG=$(git rev-parse HEAD:.circleci/docker)
echo "DOCKER_TAG=${DOCKER_TAG}" >> "${GITHUB_ENV}"
echo "DOCKER_IMAGE=${DOCKER_IMAGE_BASE}:${DOCKER_TAG}" >> "${GITHUB_ENV}"
echo "::set-output name=docker_tag::${DOCKER_TAG}"
echo "::set-output name=docker_image::${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"
- name: Check if image should be built
id: check
env:
BASE_REVISION: ${{ github.event.pull_request.base.sha || github.sha }}
run: |
set -x
echo ::set-output name=rebuild::yes
- name: Build and push docker image
if: ${{ steps.check.outputs.rebuild }}
env:
DOCKER_SKIP_S3_UPLOAD: 1
working-directory: .circleci/docker
run: |
export IMAGE_NAME=${DOCKER_IMAGE_BASE#308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/}
./build_docker.sh
- name: Pull Docker image
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
retry docker pull "${DOCKER_IMAGE}"
- name: Parse ref
shell: bash
id: parse-ref
run: ./.github/scripts/parse_ref.py
- name: Hold runner for 2 hours or until ssh sessions have drained
# Always hold for active ssh sessions
if: always()
run: .github/scripts/wait_for_ssh_to_drain.sh
- name: Chown workspace
if: always()
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Kill containers, clean up images
if: always()
run: |
# ignore expansion of "docker ps -q" since it could be empty
# shellcheck disable=SC2046
docker stop $(docker ps -q) || true
# Prune all of the docker images
docker system prune -af
- name: Hold runner for 2 hours or until ssh sessions have drained
# Always hold for active ssh sessions
if: always()
run: .github/scripts/wait_for_ssh_to_drain.sh
- name: Clean up docker images
if: always()
run: |
# Prune all of the docker images
docker system prune -af