Skip to content
This repository has been archived by the owner on Sep 18, 2024. It is now read-only.

Support aml pipeline #3477

Merged
merged 38 commits into from
May 11, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
bf41180
add pipeline for adl
SparkSnail Dec 25, 2020
c9f1fa1
fix agent pool
SparkSnail Dec 25, 2020
5c8655f
fix build
SparkSnail Dec 26, 2020
3aec741
fix build
SparkSnail Dec 26, 2020
a34cb20
fix build
SparkSnail Dec 26, 2020
650f9b0
fix build
SparkSnail Dec 26, 2020
a2965e7
fix build
SparkSnail Dec 26, 2020
bf5a0bc
fix sed command
SparkSnail Dec 26, 2020
8dbfb61
fix build
SparkSnail Dec 26, 2020
41d8844
fix docker image
SparkSnail Dec 26, 2020
5acad1d
fix docker
SparkSnail Dec 26, 2020
57c9be2
fix test cases in adl
SparkSnail Dec 27, 2020
a00fdc3
fix str
SparkSnail Dec 27, 2020
7a230ff
fix searchspace path
SparkSnail Dec 27, 2020
291507d
fix build
SparkSnail Dec 27, 2020
100e2ef
fix config file path
SparkSnail Dec 27, 2020
df81878
fix accessor test
SparkSnail Dec 27, 2020
ccc018e
add waittime
SparkSnail Dec 27, 2020
63c915b
add main_adl test case
SparkSnail Dec 27, 2020
33c15a2
fix build
SparkSnail Dec 27, 2020
09e95b7
fix build
SparkSnail Dec 27, 2020
29d2b61
Merge branch 'v2.0' of https://github.com/microsoft/nni into dev-pipe…
SparkSnail Dec 28, 2020
32b3242
fix comments
SparkSnail Dec 30, 2020
bdeadb7
fix conflict
SparkSnail Mar 25, 2021
36049d6
support aml pipeline
SparkSnail Mar 25, 2021
3c02c82
fix build
SparkSnail Apr 2, 2021
42793cd
fix build
SparkSnail Apr 2, 2021
09338e5
fix build
SparkSnail Apr 2, 2021
0c5a4ba
fix build
SparkSnail Apr 2, 2021
fde4fcc
fix build
SparkSnail Apr 2, 2021
1b41ed6
fix build
SparkSnail Apr 2, 2021
9acfe4e
fix build
SparkSnail Apr 2, 2021
9b15f30
fix build
SparkSnail Apr 2, 2021
cacac59
fix build
SparkSnail Apr 2, 2021
615eda2
fix build
SparkSnail Apr 2, 2021
79f3b6e
fix build
SparkSnail Apr 2, 2021
9d27257
fix build
SparkSnail Apr 2, 2021
fb9eb8d
Merge branch 'master' of https://github.com/microsoft/nni into dev-pi…
SparkSnail May 11, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions pipelines/integration-test-aml.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
trigger: none
pr: none
schedules:
- cron: 0 16 * * *
branches:
include: [ master ]

jobs:
- job: aml
pool: NNI CI REMOTE CLI
timeoutInMinutes: 120

steps:
- script: |
export NNI_RELEASE=999.$(date -u +%Y%m%d%H%M%S)
echo "##vso[task.setvariable variable=PATH]${PATH}:${HOME}/.local/bin"
echo "##vso[task.setvariable variable=NNI_RELEASE]${NNI_RELEASE}"

echo "Working directory: ${PWD}"
echo "NNI version: ${NNI_RELEASE}"
echo "Build docker image: $(build_docker_image)"

python3 -m pip install --upgrade pip setuptools
displayName: Prepare

- script: |
set -e
python3 setup.py build_ts
python3 setup.py bdist_wheel -p manylinux1_x86_64
python3 -m pip install dist/nni-${NNI_RELEASE}-py3-none-manylinux1_x86_64.whl[SMAC,BOHB]
displayName: Build and install NNI

- script: |
set -e
cd examples/tuners/customized_tuner
python3 setup.py develop --user
nnictl algo register --meta meta_file.yml
displayName: Install customized tuner

- script: |
set -e
docker login -u nnidev -p $(docker_hub_password)
echo '## Build docker image ##'
docker build --build-arg NNI_RELEASE=${NNI_RELEASE} -t nnidev/nni-nightly .
echo '## Upload docker image ##'
docker push nnidev/nni-nightly
condition: eq(variables['build_docker_image'], 'true')
displayName: Build and upload docker image

- script: |
set -e
cd test
python3 nni_test/nnitest/generate_ts_config.py \
--ts aml \
--subscription_id $(subscriptionId) \
--resource_group $(resourceGroup) \
--workspace_name $(workspaceName) \
--compute_target $(computeTarget) \
--nni_manager_ip $(manager_ip) \
--nni_docker_image nnidev/nni-nightly

python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts aml
displayName: Integration test
16 changes: 15 additions & 1 deletion test/config/training_service.yml
Original file line number Diff line number Diff line change
Expand Up @@ -105,4 +105,18 @@ adl:
storageClass:
storageSize:
trainingServicePlatform: adl

aml:
nniManagerIp:
maxExecDuration: 15m
# PAI has job submission limitation, set maxTrialNum=1 to control trial job numbers for PAI
maxTrialNum: 2
trialConcurrency: 2
trainingServicePlatform: aml
trial:
gpuNum: 1
image:
amlConfig:
subscriptionId:
resourceGroup:
workspaceName:
computeTarget:
18 changes: 17 additions & 1 deletion test/nni_test/nnitest/generate_ts_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,13 +88,24 @@ def update_training_service_config(args):
config[args.ts]['trial']['nfs']['server'] = args.adl_nfs_server
config[args.ts]['trial']['nfs']['path'] = args.adl_nfs_path
config[args.ts]['trial']['nfs']['container_mount_path'] = args.nadl_fs_container_mount_path
elif args.ts == 'aml':
if args.nni_docker_image is not None:
config[args.ts]['trial']['image'] = args.nni_docker_image
if args.subscription_id is not None:
config[args.ts]['amlConfig']['subscriptionId'] = args.subscription_id
if args.resource_group is not None:
config[args.ts]['amlConfig']['resourceGroup'] = args.resource_group
if args.workspace_name is not None:
config[args.ts]['amlConfig']['workspaceName'] = args.workspace_name
if args.compute_target is not None:
config[args.ts]['amlConfig']['computeTarget'] = args.compute_target

dump_yml_content(TRAINING_SERVICE_FILE, config)


if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--ts", type=str, choices=['pai', 'kubeflow', 'remote', 'local', 'frameworkcontroller', 'adl'], default='pai')
parser.add_argument("--ts", type=str, choices=['pai', 'kubeflow', 'remote', 'local', 'frameworkcontroller', 'adl', 'aml'], default='pai')
parser.add_argument("--nni_docker_image", type=str)
parser.add_argument("--nni_manager_ip", type=str)
# args for PAI
Expand Down Expand Up @@ -129,6 +140,11 @@ def update_training_service_config(args):
parser.add_argument("--adl_nfs_server", type=str)
parser.add_argument("--adl_nfs_path", type=str)
parser.add_argument("--adl_nfs_container_mount_path", type=str)
# args for aml
parser.add_argument("--subscription_id", type=str)
parser.add_argument("--resource_group", type=str)
parser.add_argument("--workspace_name", type=str)
parser.add_argument("--compute_target", type=str)
args = parser.parse_args()

update_training_service_config(args)
2 changes: 1 addition & 1 deletion test/nni_test/nnitest/run_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ def run(args):
parser.add_argument("--cases", type=str, default=None)
parser.add_argument("--exclude", type=str, default=None)
parser.add_argument("--ts", type=str, choices=['local', 'remote', 'pai',
'kubeflow', 'frameworkcontroller', 'adl'], default='local')
'kubeflow', 'frameworkcontroller', 'adl', 'aml'], default='local')
args = parser.parse_args()

run(args)
2 changes: 1 addition & 1 deletion ts/nni_manager/core/nniDataStore.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ class NNIDataStore implements DataStore {

public storeTrialJobEvent(
event: TrialJobEvent, trialJobId: string, hyperParameter?: string, jobDetail?: TrialJobDetail): Promise<void> {
this.log.debug(`storeTrialJobEvent: event: ${event}, data: ${hyperParameter}, jobDetail: ${JSON.stringify(jobDetail)}`);
//this.log.debug(`storeTrialJobEvent: event: ${event}, data: ${hyperParameter}, jobDetail: ${JSON.stringify(jobDetail)}`);

// Use the timestamp in jobDetail as TrialJobEvent timestamp for different events
let timestamp: number | undefined;
Expand Down