-
Notifications
You must be signed in to change notification settings - Fork 90
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
infra: Rename buildspec files. (#211)
- Loading branch information
Showing
8 changed files
with
131 additions
and
102 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
version: 0.2 | ||
|
||
phases: | ||
pre_build: | ||
commands: | ||
- PR_NUM=$(echo $CODEBUILD_SOURCE_VERSION | grep -o '[0-9]\+') | ||
- echo 'Pull request number:' $PR_NUM '. No value means this build is not from pull request.' | ||
|
||
build: | ||
commands: | ||
|
||
- error_cmd="echo 'In order to make changes to the docker files, please, use https://github.com/aws/deep-learning-containers repository.' && exit 1" | ||
- execute-command-if-has-matching-changes "$error_cmd" "docker/" |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,94 @@ | ||
version: 0.2 | ||
|
||
env: | ||
variables: | ||
FRAMEWORK_VERSION: '1.4.0' | ||
CPU_INSTANCE_TYPE: 'ml.c4.xlarge' | ||
GPU_INSTANCE_TYPE: 'ml.p2.8xlarge' | ||
ECR_REPO: 'sagemaker-test' | ||
GITHUB_REPO: 'sagemaker-pytorch-container' | ||
DLC_ACCOUNT: '763104351884' | ||
SETUP_FILE: 'setup_cmds.sh' | ||
SETUP_CMDS: '#!/bin/bash\npip install --upgrade pip\npip install -U -e .\npip install -U -e .[test]' | ||
|
||
phases: | ||
pre_build: | ||
commands: | ||
- start-dockerd | ||
- ACCOUNT=$(aws --region $AWS_DEFAULT_REGION sts --endpoint-url https://sts.$AWS_DEFAULT_REGION.amazonaws.com get-caller-identity --query 'Account' --output text) | ||
- PREPROD_IMAGE="$ACCOUNT.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com/$ECR_REPO" | ||
- PR_NUM=$(echo $CODEBUILD_SOURCE_VERSION | grep -o '[0-9]\+') | ||
- BUILD_ID="$(echo $CODEBUILD_BUILD_ID | sed -e 's/:/-/g')" | ||
- echo 'Pull request number:' $PR_NUM '. No value means this build is not from pull request.' | ||
|
||
build: | ||
commands: | ||
- TOX_PARALLEL_NO_SPINNER=1 | ||
- PY_COLORS=0 | ||
|
||
# install | ||
- pip3 install -U -e .[test] | ||
|
||
# run linters | ||
- tox -e flake8,twine | ||
|
||
# run unit tests | ||
- tox -e py27,py36,py37 test/unit | ||
|
||
# define tags | ||
- GENERIC_TAG="$FRAMEWORK_VERSION-pytorch-$BUILD_ID" | ||
- DLC_CPU_TAG="$FRAMEWORK_VERSION-dlc-cpu-$BUILD_ID" | ||
- DLC_GPU_TAG="$FRAMEWORK_VERSION-dlc-gpu-$BUILD_ID" | ||
|
||
# run local CPU integration tests (build and push the image to ECR repo) | ||
- test_cmd="pytest test/integration/local --build-image --push-image --dockerfile-type pytorch --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --tag $GENERIC_TAG" | ||
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec.yml" "lib/*" | ||
- test_cmd="pytest test/integration/local --build-image --push-image --dockerfile-type dlc.cpu --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --tag $DLC_CPU_TAG" | ||
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec.yml" "lib/*" | ||
|
||
# launch remote GPU instance | ||
- prefix='ml.' | ||
- instance_type=${GPU_INSTANCE_TYPE#"$prefix"} | ||
- create-key-pair | ||
- launch-ec2-instance --instance-type $instance_type --ami-name dlami-ubuntu-latest | ||
|
||
# build DLC GPU image because the base DLC image is too big and takes too long to build as part of the test | ||
- python3 setup.py sdist | ||
- build_dir="test/container/$FRAMEWORK_VERSION" | ||
- $(aws ecr get-login --registry-ids $DLC_ACCOUNT --no-include-email --region $AWS_DEFAULT_REGION) | ||
- docker build -f "$build_dir/Dockerfile.dlc.gpu" -t $PREPROD_IMAGE:$DLC_GPU_TAG --build-arg region=$AWS_DEFAULT_REGION . | ||
# push DLC GPU image to ECR | ||
- $(aws ecr get-login --registry-ids $ACCOUNT --no-include-email --region $AWS_DEFAULT_REGION) | ||
- docker push $PREPROD_IMAGE:$DLC_GPU_TAG | ||
|
||
# run GPU local integration tests | ||
- printf "$SETUP_CMDS" > $SETUP_FILE | ||
# no reason to rebuild the image again since it was already built and pushed to ECR during CPU tests | ||
- generic_cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --tag $GENERIC_TAG" | ||
- test_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$generic_cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\"" | ||
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec.yml" "lib/*" | ||
- dlc_cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --tag $DLC_GPU_TAG" | ||
- test_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$dlc_cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\" --skip-setup" | ||
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec.yml" "lib/*" | ||
|
||
# run CPU sagemaker integration tests | ||
- test_cmd="pytest -n 10 test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --tag $GENERIC_TAG" | ||
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec.yml" "lib/*" | ||
- test_cmd="pytest -n 10 test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --tag $DLC_CPU_TAG" | ||
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec.yml" "lib/*" | ||
|
||
# run GPU sagemaker integration tests | ||
- test_cmd="pytest -n 10 test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --instance-type $GPU_INSTANCE_TYPE --tag $GENERIC_TAG" | ||
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec.yml" "lib/*" | ||
- test_cmd="pytest -n 10 test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --instance-type $GPU_INSTANCE_TYPE --tag $DLC_GPU_TAG" | ||
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec.yml" "lib/*" | ||
|
||
finally: | ||
# shut down remote GPU instance | ||
- cleanup-gpu-instances | ||
- cleanup-key-pairs | ||
|
||
- error_cmd="echo 'In order to make changes to the docker files, please, use https://github.com/aws/deep-learning-containers repository.' && exit 1" | ||
- execute-command-if-has-matching-changes "$error_cmd" "docker/" | ||
# remove ECR image | ||
- aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$GENERIC_TAG | ||
- aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$DLC_CPU_TAG | ||
- aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$DLC_GPU_TAG |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
#include <stdio.h> | ||
#include <string.h> | ||
|
||
/* | ||
* Modifies gethostname to return algo-1, algo-2, etc. when running on SageMaker. | ||
* | ||
* Without this gethostname() on SageMaker returns 'aws', leading NCCL/MPI to think there is only one host, | ||
* not realizing that it needs to use NET/Socket. | ||
* | ||
* When docker container starts we read 'current_host' value from /opt/ml/input/config/resourceconfig.json | ||
* and replace PLACEHOLDER_HOSTNAME with it before compiling this code into a shared library. | ||
*/ | ||
int gethostname(char *name, size_t len) | ||
{ | ||
const char *val = PLACEHOLDER_HOSTNAME; | ||
strncpy(name, val, len); | ||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
#!/usr/bin/env bash | ||
|
||
if [[ "$1" = "train" ]]; then | ||
CURRENT_HOST=$(jq .current_host /opt/ml/input/config/resourceconfig.json) | ||
sed -ie "s/PLACEHOLDER_HOSTNAME/$CURRENT_HOST/g" changehostname.c | ||
gcc -o changehostname.o -c -fPIC -Wall changehostname.c | ||
gcc -o libchangehostname.so -shared -export-dynamic changehostname.o -ldl | ||
LD_PRELOAD=/libchangehostname.so train | ||
else | ||
eval "$@" | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters