From 1cd5c78250702fca1fd87e96db83a64b69038788 Mon Sep 17 00:00:00 2001 From: SparkSnail Date: Sun, 23 Feb 2020 22:07:04 +0800 Subject: [PATCH 1/6] add pipeline for paiK8s --- test/config_test.py | 2 +- test/generate_ts_config.py | 19 ++++++++++- test/pipelines-it-pai.yml | 4 +-- test/pipelines-it-paiYarn.yml | 59 +++++++++++++++++++++++++++++++++++ test/training_service.yml | 18 ++++++++++- 5 files changed, 97 insertions(+), 5 deletions(-) create mode 100644 test/pipelines-it-paiYarn.yml diff --git a/test/config_test.py b/test/config_test.py index 91136a8a95..bce0778d1a 100644 --- a/test/config_test.py +++ b/test/config_test.py @@ -112,7 +112,7 @@ def run(args): parser = argparse.ArgumentParser() parser.add_argument("--config", type=str, default=None) parser.add_argument("--exclude", type=str, default=None) - parser.add_argument("--ts", type=str, choices=['local', 'remote', 'pai', 'kubeflow', 'frameworkcontroller'], default='local') + parser.add_argument("--ts", type=str, choices=['local', 'remote', 'pai', 'paiYarn', 'kubeflow', 'frameworkcontroller'], default='local') parser.add_argument("--local_gpu", action='store_true') parser.add_argument("--preinstall", action='store_true') args = parser.parse_args() diff --git a/test/generate_ts_config.py b/test/generate_ts_config.py index fb5784d3b1..ff3bd5cfc7 100644 --- a/test/generate_ts_config.py +++ b/test/generate_ts_config.py @@ -12,7 +12,7 @@ def update_training_service_config(args): config = get_yml_content(TRAINING_SERVICE_FILE) if args.nni_manager_ip is not None: config[args.ts]['nniManagerIp'] = args.nni_manager_ip - if args.ts == 'pai': + if args.ts == 'paiYarn': if args.pai_user is not None: config[args.ts]['paiYarnConfig']['userName'] = args.pai_user if args.pai_pwd is not None: @@ -27,6 +27,23 @@ def update_training_service_config(args): config[args.ts]['trial']['outputDir'] = args.output_dir if args.vc is not None: config[args.ts]['trial']['virtualCluster'] = args.vc + if args.ts == 'pai': + if args.pai_user is not None: + config[args.ts]['paiConfig']['userName'] = args.pai_user + if args.pai_host is not None: + config[args.ts]['paiConfig']['host'] = args.pai_host + if args.pai_token is not None: + config[args.ts]['paiConfig']['token'] = args.pai_token + if args.nni_docker_image is not None: + config[args.ts]['trial']['image'] = args.nni_docker_image + if args.nniManagerNFSMountPath is not None: + config[args.ts]['trial']['nniManagerNFSMountPath'] = args.nniManagerNFSMountPath + if args.containerNFSMountPath is not None: + config[args.ts]['trial']['containerNFSMountPath'] = args.containerNFSMountPath + if args.paiStoragePlugin is not None: + config[args.ts]['trial']['paiStoragePlugin'] = args.paiStoragePlugin + if args.vc is not None: + config[args.ts]['trial']['virtualCluster'] = args.vc elif args.ts == 'kubeflow': if args.nfs_server is not None: config[args.ts]['kubeflowConfig']['nfs']['server'] = args.nfs_server diff --git a/test/pipelines-it-pai.yml b/test/pipelines-it-pai.yml index 15f24b591d..c5ed99f184 100644 --- a/test/pipelines-it-pai.yml +++ b/test/pipelines-it-pai.yml @@ -51,8 +51,8 @@ jobs: echo "TEST_IMG:$TEST_IMG" cd test - python3 generate_ts_config.py --ts pai --pai_host $(pai_host) --pai_user $(pai_user) --pai_pwd $(pai_pwd) --vc $(pai_virtual_cluster) \ - --nni_docker_image $TEST_IMG --data_dir $(data_dir) --output_dir $(output_dir) --nni_manager_ip $(nni_manager_ip) + python3 generate_ts_config.py --ts pai --pai_host $(pai_host) --pai_user $(pai_user) --nni_docker_image $TEST_IMG --paiStoragePlugin $(paiStoragePlugin)\ + --pai_token $(pai_token) --nniManagerNFSMountPath $(nniManagerNFSMountPath) --containerNFSMountPath $(containerNFSMountPath) --nni_manager_ip $(nni_manager_ip) PATH=$HOME/.local/bin:$PATH python3 config_test.py --ts pai PATH=$HOME/.local/bin:$PATH python3 metrics_test.py diff --git a/test/pipelines-it-paiYarn.yml b/test/pipelines-it-paiYarn.yml new file mode 100644 index 0000000000..ad5ec3b305 --- /dev/null +++ b/test/pipelines-it-paiYarn.yml @@ -0,0 +1,59 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +jobs: +- job: 'integration_test_paiYarn' + timeoutInMinutes: 0 + + steps: + - script: python3 -m pip install --upgrade pip setuptools --user + displayName: 'Install python tools' + + - script: | + cd deployment/pypi + echo 'building prerelease package...' + make build + ls $(Build.SourcesDirectory)/deployment/pypi/dist/ + condition: eq( variables['build_docker_img'], 'true' ) + displayName: 'build nni bdsit_wheel' + + - script: | + source install.sh + displayName: 'Install nni toolkit via source code' + + - script: | + sudo apt-get install swig -y + PATH=$HOME/.local/bin:$PATH nnictl package install --name=SMAC + PATH=$HOME/.local/bin:$PATH nnictl package install --name=BOHB + displayName: 'Install dependencies for integration tests in PAI mode' + + - script: | + set -e + if [ $(build_docker_img) = 'true' ] + then + cd deployment/pypi + docker login -u $(docker_hub_user) -p $(docker_hub_pwd) + echo 'updating docker file for installing nni from local...' + # update Dockerfile to install NNI in docker image from whl file built in last step + sed -ie 's/RUN python3 -m pip --no-cache-dir install nni/COPY .\/dist\/* .\nRUN python3 -m pip install nni-*.whl/' ../docker/Dockerfile + cat ../docker/Dockerfile + export IMG_TAG=`date -u +%y%m%d%H%M` + + echo 'build and upload docker image' + docker build -f ../docker/Dockerfile -t $(test_docker_img_name):$IMG_TAG . + docker push $(test_docker_img_name):$IMG_TAG + + export TEST_IMG=$(test_docker_img_name):$IMG_TAG + cd ../../ + else + export TEST_IMG=$(existing_docker_img) + fi + + echo "TEST_IMG:$TEST_IMG" + cd test + python3 generate_ts_config.py --ts paiYarn --pai_host $(pai_host) --pai_user $(pai_user) --pai_pwd $(pai_pwd) --vc $(pai_virtual_cluster) \ + --nni_docker_image $TEST_IMG --data_dir $(data_dir) --output_dir $(output_dir) --nni_manager_ip $(nni_manager_ip) + + PATH=$HOME/.local/bin:$PATH python3 config_test.py --ts paiYarn + PATH=$HOME/.local/bin:$PATH python3 metrics_test.py + displayName: 'integration test' diff --git a/test/training_service.yml b/test/training_service.yml index 2a00acca54..0c5be4df8e 100644 --- a/test/training_service.yml +++ b/test/training_service.yml @@ -52,7 +52,7 @@ frameworkcontroller: local: trainingServicePlatform: local -pai: +paiYarn: nniManagerIp: maxExecDuration: 15m paiYarnConfig: @@ -68,6 +68,22 @@ pai: memoryMB: 8192 outputDir: virtualCluster: +pai: + nniManagerIp: + maxExecDuration: 15m + paiConfig: + host: + host: + userName: + trainingServicePlatform: pai + trial: + gpuNum: 1 + cpuNum: 1 + image: + memoryMB: 8192 + nniManagerNFSMountPath: + containerNFSMountPath: + paiStoragePlugin: remote: machineList: - ip: From 2c4f8c50cf0e1484ba370ab67d5b1d42efdf3b52 Mon Sep 17 00:00:00 2001 From: SparkSnail Date: Sun, 23 Feb 2020 22:38:56 +0800 Subject: [PATCH 2/6] fix pipeline --- test/generate_ts_config.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/generate_ts_config.py b/test/generate_ts_config.py index ff3bd5cfc7..2824ac53ce 100644 --- a/test/generate_ts_config.py +++ b/test/generate_ts_config.py @@ -111,6 +111,10 @@ def convert_command(): parser.add_argument("--data_dir", type=str) parser.add_argument("--output_dir", type=str) parser.add_argument("--vc", type=str) + parser.add_argument("--pai_token", type=str) + parser.add_argument("--paiStoragePlugin", type=str) + parser.add_argument("--nniManagerNFSMountPath", type=str) + parser.add_argument("--containerNFSMountPath", type=str) # args for kubeflow and frameworkController parser.add_argument("--nfs_server", type=str) parser.add_argument("--nfs_path", type=str) From 0881058226bd0d6e6f8a056d666665dbda749ae5 Mon Sep 17 00:00:00 2001 From: SparkSnail Date: Sun, 23 Feb 2020 23:05:37 +0800 Subject: [PATCH 3/6] fix token --- test/training_service.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/test/training_service.yml b/test/training_service.yml index 0c5be4df8e..040342da67 100644 --- a/test/training_service.yml +++ b/test/training_service.yml @@ -72,7 +72,6 @@ pai: nniManagerIp: maxExecDuration: 15m paiConfig: - host: host: userName: trainingServicePlatform: pai From dad062f49f031d6ccca135cd5f057e0770e022f9 Mon Sep 17 00:00:00 2001 From: SparkSnail Date: Tue, 25 Feb 2020 22:20:36 +0800 Subject: [PATCH 4/6] exclude multiphase --- test/pipelines-it-pai.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/pipelines-it-pai.yml b/test/pipelines-it-pai.yml index c5ed99f184..70c276ce7e 100644 --- a/test/pipelines-it-pai.yml +++ b/test/pipelines-it-pai.yml @@ -54,6 +54,6 @@ jobs: python3 generate_ts_config.py --ts pai --pai_host $(pai_host) --pai_user $(pai_user) --nni_docker_image $TEST_IMG --paiStoragePlugin $(paiStoragePlugin)\ --pai_token $(pai_token) --nniManagerNFSMountPath $(nniManagerNFSMountPath) --containerNFSMountPath $(containerNFSMountPath) --nni_manager_ip $(nni_manager_ip) - PATH=$HOME/.local/bin:$PATH python3 config_test.py --ts pai + PATH=$HOME/.local/bin:$PATH python3 config_test.py --ts pai --exclude multi_phase PATH=$HOME/.local/bin:$PATH python3 metrics_test.py displayName: 'integration test' From e12bf395018d013f8e1627bef317d9cff5c64b64 Mon Sep 17 00:00:00 2001 From: SparkSnail Date: Thu, 27 Feb 2020 17:58:08 +0800 Subject: [PATCH 5/6] fix comments --- test/generate_ts_config.py | 12 ++++++------ test/pipelines-it-pai.yml | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/test/generate_ts_config.py b/test/generate_ts_config.py index 2824ac53ce..efa24ee4e2 100644 --- a/test/generate_ts_config.py +++ b/test/generate_ts_config.py @@ -37,11 +37,11 @@ def update_training_service_config(args): if args.nni_docker_image is not None: config[args.ts]['trial']['image'] = args.nni_docker_image if args.nniManagerNFSMountPath is not None: - config[args.ts]['trial']['nniManagerNFSMountPath'] = args.nniManagerNFSMountPath + config[args.ts]['trial']['nniManagerNFSMountPath'] = args.nni_manager_nfs_mount_path if args.containerNFSMountPath is not None: - config[args.ts]['trial']['containerNFSMountPath'] = args.containerNFSMountPath + config[args.ts]['trial']['containerNFSMountPath'] = args.container_nfs_mount_path if args.paiStoragePlugin is not None: - config[args.ts]['trial']['paiStoragePlugin'] = args.paiStoragePlugin + config[args.ts]['trial']['paiStoragePlugin'] = args.pai_storage_plugin if args.vc is not None: config[args.ts]['trial']['virtualCluster'] = args.vc elif args.ts == 'kubeflow': @@ -112,9 +112,9 @@ def convert_command(): parser.add_argument("--output_dir", type=str) parser.add_argument("--vc", type=str) parser.add_argument("--pai_token", type=str) - parser.add_argument("--paiStoragePlugin", type=str) - parser.add_argument("--nniManagerNFSMountPath", type=str) - parser.add_argument("--containerNFSMountPath", type=str) + parser.add_argument("--pai_storage_plugin", type=str) + parser.add_argument("--nni_manager_nfs_mount_path", type=str) + parser.add_argument("--container_nfs_mount_path", type=str) # args for kubeflow and frameworkController parser.add_argument("--nfs_server", type=str) parser.add_argument("--nfs_path", type=str) diff --git a/test/pipelines-it-pai.yml b/test/pipelines-it-pai.yml index 70c276ce7e..d0d04afc07 100644 --- a/test/pipelines-it-pai.yml +++ b/test/pipelines-it-pai.yml @@ -51,8 +51,8 @@ jobs: echo "TEST_IMG:$TEST_IMG" cd test - python3 generate_ts_config.py --ts pai --pai_host $(pai_host) --pai_user $(pai_user) --nni_docker_image $TEST_IMG --paiStoragePlugin $(paiStoragePlugin)\ - --pai_token $(pai_token) --nniManagerNFSMountPath $(nniManagerNFSMountPath) --containerNFSMountPath $(containerNFSMountPath) --nni_manager_ip $(nni_manager_ip) + python3 generate_ts_config.py --ts pai --pai_host $(pai_host) --pai_user $(pai_user) --nni_docker_image $TEST_IMG --pai_storage_plugin $(pai_storage_plugin)\ + --pai_token $(pai_token) --nni_manager_nfs_mount_path $(nni_manager_nfs_mount_path) --container_nfs_mount_path $(container_nfs_mount_path) --nni_manager_ip $(nni_manager_ip) PATH=$HOME/.local/bin:$PATH python3 config_test.py --ts pai --exclude multi_phase PATH=$HOME/.local/bin:$PATH python3 metrics_test.py From 5dee410d8da42235907b253d16b577f493635bb4 Mon Sep 17 00:00:00 2001 From: SparkSnail Date: Thu, 5 Mar 2020 14:52:11 +0800 Subject: [PATCH 6/6] fix pipeline --- test/generate_ts_config.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/generate_ts_config.py b/test/generate_ts_config.py index efa24ee4e2..dbebdc45b6 100644 --- a/test/generate_ts_config.py +++ b/test/generate_ts_config.py @@ -36,11 +36,11 @@ def update_training_service_config(args): config[args.ts]['paiConfig']['token'] = args.pai_token if args.nni_docker_image is not None: config[args.ts]['trial']['image'] = args.nni_docker_image - if args.nniManagerNFSMountPath is not None: + if args.nni_manager_nfs_mount_path is not None: config[args.ts]['trial']['nniManagerNFSMountPath'] = args.nni_manager_nfs_mount_path - if args.containerNFSMountPath is not None: + if args.container_nfs_mount_path is not None: config[args.ts]['trial']['containerNFSMountPath'] = args.container_nfs_mount_path - if args.paiStoragePlugin is not None: + if args.pai_storage_plugin is not None: config[args.ts]['trial']['paiStoragePlugin'] = args.pai_storage_plugin if args.vc is not None: config[args.ts]['trial']['virtualCluster'] = args.vc