From d04441b1b1ee14e3e141d632349593245a150b03 Mon Sep 17 00:00:00 2001 From: Yuge Zhang Date: Wed, 17 Jun 2020 16:35:58 +0800 Subject: [PATCH 01/17] Exit when installation failed on pipeline (#2548) --- azure-pipelines.yml | 13 ++++++++++--- test/pipelines/pipelines-it-frameworkcontroller.yml | 2 ++ test/pipelines/pipelines-it-kubeflow.yml | 1 + test/pipelines/pipelines-it-local.yml | 1 + test/pipelines/pipelines-it-pai.yml | 2 ++ test/pipelines/pipelines-it-paiYarn.yml | 2 ++ .../pipelines-it-remote-linux-to-linux.yml | 3 +++ .../pipelines-it-remote-linux-to-windows.yml | 1 + test/scripts/it.sh | 1 + 9 files changed, 23 insertions(+), 3 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index bc269ac148..92080252c4 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -8,6 +8,7 @@ jobs: steps: - script: | + set -e python3 -m pip install --upgrade pip setuptools --user python3 -m pip install pylint==2.3.1 astroid==2.2.5 --user python3 -m pip install coverage --user @@ -25,6 +26,7 @@ jobs: yarn eslint displayName: 'Run eslint' - script: | + set -e python3 -m pip install torch==1.5.0+cpu torchvision==0.6.0+cpu -f https://download.pytorch.org/whl/torch_stable.html --user python3 -m pip install tensorflow==1.15.2 --user python3 -m pip install keras==2.1.6 --user @@ -44,6 +46,7 @@ jobs: python3 -m pylint --rcfile pylintrc nnicli displayName: 'Run pylint' - script: | + set -e python3 -m pip install flake8 --user EXCLUDES=./src/nni_manager/,./src/webui,./tools/nni_annotation/testcase/,./examples/trials/mnist-nas/*/mnist*.py,./examples/trials/nas_cifar10/src/cifar10/general_child.py python3 -m flake8 . --count --exclude=$EXCLUDES --select=E9,F63,F72,F82 --show-source --statistics @@ -67,6 +70,7 @@ jobs: steps: - script: | + set -e python3 -m pip install --upgrade pip setuptools --user python3 -m pip install coverage --user echo "##vso[task.setvariable variable=PATH]${HOME}/.local/bin:${PATH}" @@ -75,6 +79,7 @@ jobs: source install.sh displayName: 'Install nni toolkit via source code' - script: | + set -e python3 -m pip install torch==1.3.1+cpu torchvision==0.4.2+cpu -f https://download.pytorch.org/whl/torch_stable.html --user python3 -m pip install tensorflow==1.15.2 --user python3 -m pip install keras==2.1.6 --user @@ -98,18 +103,20 @@ jobs: vmImage: 'macOS-10.15' steps: - - script: python3 -m pip install --upgrade pip setuptools + - script: | + python3 -m pip install --upgrade pip setuptools + echo "##vso[task.setvariable variable=PATH]${HOME}/Library/Python/3.7/bin:${PATH}" displayName: 'Install python tools' - script: | source install.sh - echo "##vso[task.setvariable variable=PATH]${HOME}/Library/Python/3.7/bin:${PATH}" displayName: 'Install nni toolkit via source code' - script: | + set -e # pytorch Mac binary does not support CUDA, default is cpu version python3 -m pip install torchvision==0.6.0 torch==1.5.0 --user python3 -m pip install tensorflow==1.15.2 --user brew install swig@3 - rm /usr/local/bin/swig + rm -f /usr/local/bin/swig ln -s /usr/local/opt/swig\@3/bin/swig /usr/local/bin/swig nnictl package install --name=SMAC displayName: 'Install dependencies' diff --git a/test/pipelines/pipelines-it-frameworkcontroller.yml b/test/pipelines/pipelines-it-frameworkcontroller.yml index dc7849ee1f..b2c0af21c0 100644 --- a/test/pipelines/pipelines-it-frameworkcontroller.yml +++ b/test/pipelines/pipelines-it-frameworkcontroller.yml @@ -10,6 +10,7 @@ jobs: displayName: 'Install python tools' - script: | + set -e cd deployment/pypi if [ -d ./dist ] then @@ -26,6 +27,7 @@ jobs: displayName: 'Install nni toolkit via source code' - script: | + set -e sudo apt-get install swig -y PATH=$HOME/.local/bin:$PATH nnictl package install --name=SMAC PATH=$HOME/.local/bin:$PATH nnictl package install --name=BOHB diff --git a/test/pipelines/pipelines-it-kubeflow.yml b/test/pipelines/pipelines-it-kubeflow.yml index ec7c4dbca5..db470e7c45 100644 --- a/test/pipelines/pipelines-it-kubeflow.yml +++ b/test/pipelines/pipelines-it-kubeflow.yml @@ -10,6 +10,7 @@ jobs: displayName: 'Install python tools' - script: | + set -e cd deployment/pypi if [ -d ./dist ] then diff --git a/test/pipelines/pipelines-it-local.yml b/test/pipelines/pipelines-it-local.yml index 4606795274..00fb8e11e5 100644 --- a/test/pipelines/pipelines-it-local.yml +++ b/test/pipelines/pipelines-it-local.yml @@ -9,6 +9,7 @@ jobs: source install.sh displayName: 'Install nni toolkit via source code' - script: | + set -e python3 -m pip install scikit-learn==0.20.0 --user python3 -m pip install torchvision==0.4.1 --user python3 -m pip install torch==1.3.1 --user diff --git a/test/pipelines/pipelines-it-pai.yml b/test/pipelines/pipelines-it-pai.yml index 3bea2c402b..f2b428ecc6 100644 --- a/test/pipelines/pipelines-it-pai.yml +++ b/test/pipelines/pipelines-it-pai.yml @@ -10,6 +10,7 @@ jobs: displayName: 'Install python tools' - script: | + set -e cd deployment/pypi if [ -d ./dist ] then @@ -26,6 +27,7 @@ jobs: displayName: 'Install nni toolkit via source code' - script: | + set -e sudo apt-get install swig -y PATH=$HOME/.local/bin:$PATH nnictl package install --name=SMAC PATH=$HOME/.local/bin:$PATH nnictl package install --name=BOHB diff --git a/test/pipelines/pipelines-it-paiYarn.yml b/test/pipelines/pipelines-it-paiYarn.yml index ad5ec3b305..373166e3f3 100644 --- a/test/pipelines/pipelines-it-paiYarn.yml +++ b/test/pipelines/pipelines-it-paiYarn.yml @@ -10,6 +10,7 @@ jobs: displayName: 'Install python tools' - script: | + set -e cd deployment/pypi echo 'building prerelease package...' make build @@ -22,6 +23,7 @@ jobs: displayName: 'Install nni toolkit via source code' - script: | + set -e sudo apt-get install swig -y PATH=$HOME/.local/bin:$PATH nnictl package install --name=SMAC PATH=$HOME/.local/bin:$PATH nnictl package install --name=BOHB diff --git a/test/pipelines/pipelines-it-remote-linux-to-linux.yml b/test/pipelines/pipelines-it-remote-linux-to-linux.yml index 78b3c9edaa..a79760bcf3 100644 --- a/test/pipelines/pipelines-it-remote-linux-to-linux.yml +++ b/test/pipelines/pipelines-it-remote-linux-to-linux.yml @@ -6,6 +6,7 @@ jobs: - script: python3 -m pip install --upgrade pip setuptools --user displayName: 'Install python tools' - script: | + set -e cd deployment/pypi echo 'building prerelease package...' make build @@ -15,6 +16,7 @@ jobs: source install.sh displayName: 'Install nni toolkit via source code' - script: | + set -e sudo apt-get install swig -y PATH=$HOME/.local/bin:$PATH nnictl package install --name=SMAC PATH=$HOME/.local/bin:$PATH nnictl package install --name=BOHB @@ -45,6 +47,7 @@ jobs: inputs: secureFile: $(remote_private_key) - script: | + set -e cp $(Agent.TempDirectory)/$(remote_private_key) test/id_rsa chmod 600 test/id_rsa scp -P $(remote_port) -i test/id_rsa $(remote_user)@$(remote_host):/tmp/nnitest/$(Build.BuildId)/port test/port diff --git a/test/pipelines/pipelines-it-remote-linux-to-windows.yml b/test/pipelines/pipelines-it-remote-linux-to-windows.yml index 613db986fc..fe0652127d 100644 --- a/test/pipelines/pipelines-it-remote-linux-to-windows.yml +++ b/test/pipelines/pipelines-it-remote-linux-to-windows.yml @@ -27,6 +27,7 @@ jobs: - script: make easy-install displayName: "Install nni via source code" - script: | + set -e sudo apt-get install swig -y PATH=$HOME/.local/bin:$PATH nnictl package install --name=SMAC PATH=$HOME/.local/bin:$PATH nnictl package install --name=BOHB diff --git a/test/scripts/it.sh b/test/scripts/it.sh index bb6abd6a44..9d6682a9e2 100644 --- a/test/scripts/it.sh +++ b/test/scripts/it.sh @@ -1,4 +1,5 @@ #!/bin/bash +set -e CWD=${PWD} ## Export certain environment variables for unittest code to work From 1a6cc4144167d061c4e91b1f5e4ffeee12836f9b Mon Sep 17 00:00:00 2001 From: colorjam Date: Wed, 17 Jun 2020 18:07:31 +0800 Subject: [PATCH 02/17] add taylorfoweight into supported algo (#2551) --- docs/en_US/Compressor/Overview.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/en_US/Compressor/Overview.md b/docs/en_US/Compressor/Overview.md index 047d1903c0..757f13a8ce 100644 --- a/docs/en_US/Compressor/Overview.md +++ b/docs/en_US/Compressor/Overview.md @@ -26,6 +26,7 @@ Pruning algorithms compress the original network by removing redundant weights o | [ActivationAPoZRankFilterPruner](./Pruner.md#ActivationAPoZRankFilterPruner) | Pruning filters based on the metric APoZ (average percentage of zeros) which measures the percentage of zeros in activations of (convolutional) layers. [Reference Paper](https://arxiv.org/abs/1607.03250) | | [ActivationMeanRankFilterPruner](./Pruner.md#ActivationMeanRankFilterPruner) | Pruning filters based on the metric that calculates the smallest mean value of output activations | | [Slim Pruner](./Pruner.md#slim-pruner) | Pruning channels in convolution layers by pruning scaling factors in BN layers(Learning Efficient Convolutional Networks through Network Slimming) [Reference Paper](https://arxiv.org/abs/1708.06519) | +| [TaylorFO Pruner](./Pruner.md#taylorfoweightfilterpruner) | Pruning filters based on the first order taylor expansion on weights(Importance Estimation for Neural Network Pruning) [Reference Paper](http://jankautz.com/publications/Importance4NNPruning_CVPR19.pdf) | **Quantization** From 97f9d8a992749584455fa76822956190a75c64f3 Mon Sep 17 00:00:00 2001 From: Lijiaoa <61399850+Lijiaoa@users.noreply.github.com> Date: Wed, 17 Jun 2020 18:25:44 +0800 Subject: [PATCH 03/17] Show original search space (#2570) --- src/webui/src/components/Overview.tsx | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/src/webui/src/components/Overview.tsx b/src/webui/src/components/Overview.tsx index 6e39feac3b..a3d3378629 100644 --- a/src/webui/src/components/Overview.tsx +++ b/src/webui/src/components/Overview.tsx @@ -61,7 +61,6 @@ class Overview extends React.Component { render(): React.ReactNode { const { trialConcurrency } = this.state; const { experimentUpdateBroadcast, metricGraphMode, bestTrialEntries } = this.props; - const searchSpace = this.convertSearchSpace(); const bestTrials = this.findBestTrials(); // eslint-disable-next-line @typescript-eslint/no-non-null-assertion const bestAccuracy = bestTrials.length > 0 ? bestTrials[0].accuracy! : NaN; @@ -105,7 +104,7 @@ class Overview extends React.Component { - + {/* */} @@ -163,23 +162,6 @@ class Overview extends React.Component { ); } - private convertSearchSpace(): object { - const searchSpace = Object.assign({}, EXPERIMENT.searchSpace); - Object.keys(searchSpace).map(item => { - const key = searchSpace[item]._type; - const value = searchSpace[item]._value; - switch (key) { - case 'quniform': - case 'qnormal': - case 'qlognormal': - searchSpace[item]._value = [value[0], value[1]]; - break; - default: - } - }); - return searchSpace; - } - private findBestTrials(): Trial[] { const bestTrials = TRIALS.sort(); const { bestTrialEntries } = this.props; From 10e5656078a266260088b1a761f61b5df86a4f19 Mon Sep 17 00:00:00 2001 From: Yuge Zhang Date: Fri, 19 Jun 2020 17:11:37 +0800 Subject: [PATCH 04/17] Add shortcut to merge parameter into base setup (#2540) --- docs/en_US/autotune_ref.md | 6 ++++ examples/trials/mnist-pytorch/mnist.py | 4 +-- src/sdk/pynni/nni/utils.py | 40 ++++++++++++++++++++++++++ 3 files changed, 48 insertions(+), 2 deletions(-) diff --git a/docs/en_US/autotune_ref.md b/docs/en_US/autotune_ref.md index 9c8857b570..dc5a0ecea0 100644 --- a/docs/en_US/autotune_ref.md +++ b/docs/en_US/autotune_ref.md @@ -78,3 +78,9 @@ .. autoclass:: nni.bohb_advisor.bohb_advisor.BOHB :members: ``` + +## Utilities + +```eval_rst +.. autofunction:: nni.utils.merge_parameter +``` diff --git a/examples/trials/mnist-pytorch/mnist.py b/examples/trials/mnist-pytorch/mnist.py index ec9641af00..94f371c81f 100644 --- a/examples/trials/mnist-pytorch/mnist.py +++ b/examples/trials/mnist-pytorch/mnist.py @@ -13,6 +13,7 @@ import torch.nn as nn import torch.nn.functional as F import torch.optim as optim +from nni.utils import merge_parameter from torchvision import datasets, transforms logger = logging.getLogger('mnist_AutoML') @@ -157,8 +158,7 @@ def get_params(): # get parameters form tuner tuner_params = nni.get_next_parameter() logger.debug(tuner_params) - params = vars(get_params()) - params.update(tuner_params) + params = vars(merge_parameter(get_params(), tuner_params)) print(params) main(params) except Exception as exception: diff --git a/src/sdk/pynni/nni/utils.py b/src/sdk/pynni/nni/utils.py index 173203ab72..37a3b6e5a9 100644 --- a/src/sdk/pynni/nni/utils.py +++ b/src/sdk/pynni/nni/utils.py @@ -216,3 +216,43 @@ def json2parameter(x, is_rand, random_state, oldy=None, Rand=False, name=NodeTyp else: y = copy.deepcopy(x) return y + + +def merge_parameter(base_params, override_params): + """ + Update the parameters in ``base_params`` with ``override_params``. + Can be useful to override parsed command line arguments. + + Parameters + ---------- + base_params : namespace or dict + Base parameters. A key-value mapping. + override_params : dict or None + Parameters to override. Usually the parameters got from ``get_next_parameters()``. + When it is none, nothing will happen. + + Returns + ------- + namespace or dict + The updated ``base_params``. Note that ``base_params`` will be updated inplace. The return value is + only for convenience. + """ + if override_params is None: + return base_params + is_dict = isinstance(base_params, dict) + for k, v in override_params.items(): + if is_dict: + if k not in base_params: + raise ValueError('Key \'%s\' not found in base parameters.' % k) + if type(base_params[k]) != type(v) and base_params[k] is not None: + raise TypeError('Expected \'%s\' in override parameters to have type \'%s\', but found \'%s\'.' % + (k, type(base_params[k]), type(v))) + base_params[k] = v + else: + if not hasattr(base_params, k): + raise ValueError('Key \'%s\' not found in base parameters.' % k) + if type(getattr(base_params, k)) != type(v) and getattr(base_params, k) is not None: + raise TypeError('Expected \'%s\' in override parameters to have type \'%s\', but found \'%s\'.' % + (k, type(getattr(base_params, k)), type(v))) + setattr(base_params, k, v) + return base_params From 16fd8a21f250503115197812258490db75148e60 Mon Sep 17 00:00:00 2001 From: Yuge Zhang Date: Fri, 19 Jun 2020 17:14:01 +0800 Subject: [PATCH 05/17] Add support for .nniignore (#2454) --- src/nni_manager/common/utils.ts | 2 +- src/nni_manager/package.json | 3 + .../training_service/common/util.ts | 125 +++++++++------- .../training_service/test/fileUtility.test.ts | 133 ++++++++++++++++++ src/nni_manager/yarn.lock | 91 ++++++++++-- 5 files changed, 293 insertions(+), 61 deletions(-) create mode 100644 src/nni_manager/training_service/test/fileUtility.test.ts diff --git a/src/nni_manager/common/utils.ts b/src/nni_manager/common/utils.ts index 413d2ee220..734c601dfa 100644 --- a/src/nni_manager/common/utils.ts +++ b/src/nni_manager/common/utils.ts @@ -274,7 +274,7 @@ function countFilesRecursively(directory: string): Promise { }); } -function validateFileName(fileName: string): boolean { +export function validateFileName(fileName: string): boolean { const pattern: string = '^[a-z0-9A-Z._-]+$'; const validateResult = fileName.match(pattern); if (validateResult) { diff --git a/src/nni_manager/package.json b/src/nni_manager/package.json index 34aa0b0121..9b32372fd4 100644 --- a/src/nni_manager/package.json +++ b/src/nni_manager/package.json @@ -16,6 +16,7 @@ "child-process-promise": "^2.2.1", "express": "^4.16.3", "express-joi-validator": "^2.0.0", + "ignore": "^5.1.4", "js-base64": "^2.4.9", "kubernetes-client": "^6.5.0", "rx": "^4.1.0", @@ -23,6 +24,7 @@ "ssh2": "^0.6.1", "stream-buffers": "^3.0.2", "tail-stream": "^0.3.4", + "tar": "^6.0.2", "tree-kill": "^1.2.0", "ts-deferred": "^1.0.4", "typescript-ioc": "^1.2.4", @@ -42,6 +44,7 @@ "@types/sqlite3": "^3.1.3", "@types/ssh2": "^0.5.35", "@types/stream-buffers": "^3.0.2", + "@types/tar": "^4.0.3", "@types/tmp": "^0.0.33", "@typescript-eslint/eslint-plugin": "^2.10.0", "@typescript-eslint/parser": "^2.10.0", diff --git a/src/nni_manager/training_service/common/util.ts b/src/nni_manager/training_service/common/util.ts index dd7e368ac6..9655e79bf7 100644 --- a/src/nni_manager/training_service/common/util.ts +++ b/src/nni_manager/training_service/common/util.ts @@ -6,12 +6,44 @@ import * as cpp from 'child-process-promise'; import * as cp from 'child_process'; import * as fs from 'fs'; -import * as os from 'os'; +import ignore from 'ignore'; import * as path from 'path'; +import * as tar from 'tar'; import { String } from 'typescript-string-operations'; -import { countFilesRecursively, getNewLine, validateFileNameRecursively } from '../../common/utils'; +import { validateFileName } from '../../common/utils'; import { GPU_INFO_COLLECTOR_FORMAT_WINDOWS } from './gpuData'; +/** + * List all files in directory except those ignored by .nniignore. + * @param source + * @param destination + */ +export function* listDirWithIgnoredFiles(root: string, relDir: string, ignoreFiles: string[]): Iterable { + let ignoreFile = undefined; + const source = path.join(root, relDir); + if (fs.existsSync(path.join(source, '.nniignore'))) { + ignoreFile = path.join(source, '.nniignore'); + ignoreFiles.push(ignoreFile); + } + const ig = ignore(); + ignoreFiles.forEach((i) => ig.add(fs.readFileSync(i).toString())); + for (const d of fs.readdirSync(source)) { + const entry = path.join(relDir, d); + if (ig.ignores(entry)) + continue; + const entryStat = fs.statSync(path.join(root, entry)); + if (entryStat.isDirectory()) { + yield entry; + yield* listDirWithIgnoredFiles(root, entry, ignoreFiles); + } + else if (entryStat.isFile()) + yield entry; + } + if (ignoreFile !== undefined) { + ignoreFiles.pop(); + } +} + /** * Validate codeDir, calculate file count recursively under codeDir, and throw error if any rule is broken * @@ -19,28 +51,28 @@ import { GPU_INFO_COLLECTOR_FORMAT_WINDOWS } from './gpuData'; * @returns file number under codeDir */ export async function validateCodeDir(codeDir: string): Promise { - let fileCount: number | undefined; + let fileCount: number = 0; + let fileTotalSize: number = 0; let fileNameValid: boolean = true; - try { - fileCount = await countFilesRecursively(codeDir); - } catch (error) { - throw new Error(`Call count file error: ${error}`); - } - try { - fileNameValid = await validateFileNameRecursively(codeDir); - } catch (error) { - throw new Error(`Validate file name error: ${error}`); - } - - if (fileCount !== undefined && fileCount > 1000) { - const errMessage: string = `Too many files(${fileCount} found}) in ${codeDir},` - + ` please check if it's a valid code dir`; - throw new Error(errMessage); - } - - if (!fileNameValid) { - const errMessage: string = `File name in ${codeDir} is not valid, please check file names, only support digit number、alphabet and (.-_) in file name.`; - throw new Error(errMessage); + for (const relPath of listDirWithIgnoredFiles(codeDir, '', [])) { + const d = path.join(codeDir, relPath); + fileCount += 1; + fileTotalSize += fs.statSync(d).size; + if (fileCount > 2000) { + throw new Error(`Too many files and directories (${fileCount} already scanned) in ${codeDir},` + + ` please check if it's a valid code dir`); + } + if (fileTotalSize > 300 * 1024 * 1024) { + throw new Error(`File total size too large in code dir (${fileTotalSize} bytes already scanned, exceeds 300MB).`); + } + fileNameValid = true; + relPath.split(path.sep).forEach(fpart => { + if (fpart !== '' && !validateFileName(fpart)) + fileNameValid = false; + }); + if (!fileNameValid) { + throw new Error(`Validate file name error: '${d}' is an invalid file name.`); + } } return fileCount; @@ -68,10 +100,16 @@ export async function execMkdir(directory: string, share: boolean = false): Prom * @param destination */ export async function execCopydir(source: string, destination: string): Promise { - if (process.platform === 'win32') { - await cpp.exec(`powershell.exe Copy-Item "${source}\\*" -Destination "${destination}" -Recurse`); - } else { - await cpp.exec(`cp -r '${source}/.' '${destination}'`); + if (!fs.existsSync(destination)) + await fs.promises.mkdir(destination); + for (const relPath of listDirWithIgnoredFiles(source, '', [])) { + const sourcePath = path.join(source, relPath); + const destPath = path.join(destination, relPath); + if (fs.statSync(sourcePath).isDirectory()) { + await fs.promises.mkdir(destPath); + } else { + await fs.promises.copyFile(sourcePath, destPath); + } } return Promise.resolve(); @@ -165,28 +203,19 @@ export function setEnvironmentVariable(variable: { key: string; value: string }) * @param tarPath */ export async function tarAdd(tarPath: string, sourcePath: string): Promise { - if (process.platform === 'win32') { - const tarFilePath: string = tarPath.split('\\') - .join('\\\\'); - const sourceFilePath: string = sourcePath.split('\\') - .join('\\\\'); - const script: string[] = []; - script.push( - `import os`, - `import tarfile`, - String.Format(`tar = tarfile.open("{0}","w:gz")\r\nroot="{1}"\r\nfor file_path,dir,files in os.walk(root):`, tarFilePath, sourceFilePath), - ` for file in files:`, - ` full_path = os.path.join(file_path, file)`, - ` file = os.path.relpath(full_path, root)`, - ` tar.add(full_path, arcname=file)`, - `tar.close()`); - await fs.promises.writeFile(path.join(os.tmpdir(), 'tar.py'), script.join(getNewLine()), { encoding: 'utf8', mode: 0o777 }); - const tarScript: string = path.join(os.tmpdir(), 'tar.py'); - await cpp.exec(`python ${tarScript}`); - } else { - await cpp.exec(`tar -czf ${tarPath} -C ${sourcePath} .`); + const fileList = []; + for (const d of listDirWithIgnoredFiles(sourcePath, '', [])) { + fileList.push(d); } - + tar.create( + { + gzip: true, + file: tarPath, + sync: true, + cwd: sourcePath, + }, + fileList + ); return Promise.resolve(); } diff --git a/src/nni_manager/training_service/test/fileUtility.test.ts b/src/nni_manager/training_service/test/fileUtility.test.ts new file mode 100644 index 0000000000..5c056b8697 --- /dev/null +++ b/src/nni_manager/training_service/test/fileUtility.test.ts @@ -0,0 +1,133 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +'use strict'; +import * as assert from 'assert'; +import * as chai from 'chai'; +import * as fs from 'fs'; +import * as path from 'path'; +import * as tar from 'tar'; +import { execCopydir, tarAdd, validateCodeDir } from '../common/util'; + +const deleteFolderRecursive = (filePath: string) => { + if (fs.existsSync(filePath)) { + fs.readdirSync(filePath).forEach((file, index) => { + const curPath = path.join(filePath, file); + if (fs.lstatSync(curPath).isDirectory()) { // recurse + deleteFolderRecursive(curPath); + } else { // delete file + fs.unlinkSync(curPath); + } + }); + fs.rmdirSync(filePath); + } +}; + +describe('fileUtility', () => { + /* + Test file utilities, includes: + - Copy directory + - Ignore with ignore file + - Add to tar + */ + + const sourceDir = 'test-fileUtilityTestSource'; + const destDir = 'test-fileUtilityTestDest'; + + beforeEach(() => { + fs.mkdirSync(sourceDir); + fs.writeFileSync(path.join(sourceDir, '.nniignore'), 'abc\nxyz'); + fs.writeFileSync(path.join(sourceDir, 'abc'), '123'); + fs.writeFileSync(path.join(sourceDir, 'abcd'), '1234'); + fs.mkdirSync(path.join(sourceDir, 'xyz')); + fs.mkdirSync(path.join(sourceDir, 'xyy')); + fs.mkdirSync(path.join(sourceDir, 'www')); + fs.mkdirSync(path.join(sourceDir, 'xx')); // empty dir + fs.writeFileSync(path.join(sourceDir, 'xyy', '.nniignore'), 'qq'); // nested nniignore + fs.writeFileSync(path.join(sourceDir, 'xyy', 'abc'), '123'); + fs.writeFileSync(path.join(sourceDir, 'xyy', 'qq'), '1234'); + fs.writeFileSync(path.join(sourceDir, 'xyy', 'pp'), '1234'); + fs.writeFileSync(path.join(sourceDir, 'www', '.nniignore'), 'pp'); // pop nniignore + fs.writeFileSync(path.join(sourceDir, 'www', 'abc'), '123'); + fs.writeFileSync(path.join(sourceDir, 'www', 'qq'), '1234'); + fs.writeFileSync(path.join(sourceDir, 'www', 'pp'), '1234'); + }); + + afterEach(() => { + deleteFolderRecursive(sourceDir); + deleteFolderRecursive(destDir); + if (fs.existsSync(`${destDir}.tar`)) { + fs.unlinkSync(`${destDir}.tar`); + } + }); + + it('Test file copy', async () => { + await execCopydir(sourceDir, destDir); + const existFiles = [ + 'abcd', + 'xyy', + 'xx', + path.join('xyy', '.nniignore'), + path.join('xyy', 'pp'), + path.join('www', '.nniignore'), + path.join('www', 'qq'), + ] + const notExistFiles = [ + 'abc', + 'xyz', + path.join('xyy', 'abc'), + path.join('xyy', 'qq'), + path.join('www', 'pp'), + path.join('www', 'abc'), + ] + existFiles.forEach(d => assert.ok(fs.existsSync(path.join(destDir, d)))); + notExistFiles.forEach(d => assert.ok(!fs.existsSync(path.join(destDir, d)))); + }); + + it('Test file copy without ignore', async () => { + fs.unlinkSync(path.join(sourceDir, '.nniignore')); + await execCopydir(sourceDir, destDir); + assert.ok(fs.existsSync(path.join(destDir, 'abcd'))); + assert.ok(fs.existsSync(path.join(destDir, 'abc'))); + assert.ok(fs.existsSync(path.join(destDir, 'xyz'))); + assert.ok(fs.existsSync(path.join(destDir, 'xyy'))); + assert.ok(fs.existsSync(path.join(destDir, 'xx'))); + }); + + it('Test tar file', async () => { + const tarPath = `${destDir}.tar`; + await tarAdd(tarPath, sourceDir); + assert.ok(fs.existsSync(tarPath)); + fs.mkdirSync(destDir); + tar.extract({ + file: tarPath, + cwd: destDir, + sync: true + }) + assert.ok(fs.existsSync(path.join(destDir, 'abcd'))); + assert.ok(!fs.existsSync(path.join(destDir, 'abc'))); + }); + + it('Validate code ok', async () => { + assert.doesNotThrow(async () => validateCodeDir(sourceDir)); + }); + + it('Validate code too many files', async () => { + for (let i = 0; i < 2000; ++i) + fs.writeFileSync(path.join(sourceDir, `${i}.txt`), 'a'); + try { + await validateCodeDir(sourceDir); + } catch (error) { + chai.expect(error.message).to.contains('many files'); + return; + } + chai.expect.fail(null, null, 'Did not fail.'); + }); + + it('Validate code too many files ok', async() => { + for (let i = 0; i < 2000; ++i) + fs.writeFileSync(path.join(sourceDir, `${i}.txt`), 'a'); + fs.writeFileSync(path.join(sourceDir, '.nniignore'), '*.txt'); + assert.doesNotThrow(async () => validateCodeDir(sourceDir)); + }); +}); diff --git a/src/nni_manager/yarn.lock b/src/nni_manager/yarn.lock index ab14b07403..afe17342f2 100644 --- a/src/nni_manager/yarn.lock +++ b/src/nni_manager/yarn.lock @@ -296,6 +296,13 @@ version "3.0.3" resolved "https://registry.yarnpkg.com/@types/minimatch/-/minimatch-3.0.3.tgz#3dca0e3f33b200fc7d1139c0cd96c1268cadfd9d" +"@types/minipass@*": + version "2.2.0" + resolved "https://registry.yarnpkg.com/@types/minipass/-/minipass-2.2.0.tgz#51ad404e8eb1fa961f75ec61205796807b6f9651" + integrity sha512-wuzZksN4w4kyfoOv/dlpov4NOunwutLA/q7uc00xU02ZyUY+aoM5PWIXEKBMnm0NHd4a+N71BMjq+x7+2Af1fg== + dependencies: + "@types/node" "*" + "@types/mocha@^5.2.5": version "5.2.5" resolved "https://registry.yarnpkg.com/@types/mocha/-/mocha-5.2.5.tgz#8a4accfc403c124a0bafe8a9fc61a05ec1032073" @@ -442,6 +449,14 @@ dependencies: "@types/node" "*" +"@types/tar@^4.0.3": + version "4.0.3" + resolved "https://registry.yarnpkg.com/@types/tar/-/tar-4.0.3.tgz#e2cce0b8ff4f285293243f5971bd7199176ac489" + integrity sha512-Z7AVMMlkI8NTWF0qGhC4QIX0zkV/+y0J8x7b/RsHrN0310+YNjoJd8UrApCiGBCWtKjxS9QhNqLi2UJNToh5hA== + dependencies: + "@types/minipass" "*" + "@types/node" "*" + "@types/tmp@^0.0.33": version "0.0.33" resolved "https://registry.yarnpkg.com/@types/tmp/-/tmp-0.0.33.tgz#1073c4bc824754ae3d10cfab88ab0237ba964e4d" @@ -992,6 +1007,11 @@ chownr@^1.1.1, chownr@^1.1.2, chownr@^1.1.3: version "1.1.3" resolved "https://registry.yarnpkg.com/chownr/-/chownr-1.1.3.tgz#42d837d5239688d55f303003a508230fa6727142" +chownr@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/chownr/-/chownr-2.0.0.tgz#15bfbe53d2eab4cf70f18a8cd68ebe5b3cb1dece" + integrity sha512-bIomtDF5KGpdogkLd9VspvFzk9KfpyyGlS8YFVZl7TGPBHL5snIOnxeshwVgPteQ9b4Eydl+pVbIyE1DcvCWgQ== + ci-info@^1.5.0: version "1.6.0" resolved "https://registry.yarnpkg.com/ci-info/-/ci-info-1.6.0.tgz#2ca20dbb9ceb32d4524a683303313f0304b1e497" @@ -1880,6 +1900,13 @@ fs-minipass@^1.2.5: dependencies: minipass "^2.2.1" +fs-minipass@^2.0.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/fs-minipass/-/fs-minipass-2.1.0.tgz#7f5036fdbf12c63c169190cbe4199c852271f9fb" + integrity sha512-V/JgOLFCS+R6Vcq0slCuaeWEdNC3ouDlJMNIsacH2VtALiu9mV4LPrHc5cDl8k5aw6J8jwgWWpiTo5RYhmIzvg== + dependencies: + minipass "^3.0.0" + fs-vacuum@^1.2.10, fs-vacuum@~1.2.10: version "1.2.10" resolved "https://registry.yarnpkg.com/fs-vacuum/-/fs-vacuum-1.2.10.tgz#b7629bec07a4031a2548fdf99f5ecf1cc8b31e36" @@ -2282,6 +2309,11 @@ ignore@^4.0.6: version "4.0.6" resolved "https://registry.yarnpkg.com/ignore/-/ignore-4.0.6.tgz#750e3db5862087b4737ebac8207ffd1ef27b25fc" +ignore@^5.1.4: + version "5.1.4" + resolved "https://registry.yarnpkg.com/ignore/-/ignore-5.1.4.tgz#84b7b3dbe64552b6ef0eca99f6743dbec6d97adf" + integrity sha512-MzbUSahkTW1u7JpKKjY7LCARd1fU5W2rLdxlM4kdkayuCwZImjkpluF9CM1aLewYJguPDqewLam18Y6AU69A8A== + import-fresh@^3.0.0: version "3.2.1" resolved "https://registry.yarnpkg.com/import-fresh/-/import-fresh-3.2.1.tgz#633ff618506e793af5ac91bf48b72677e15cbe66" @@ -2605,10 +2637,10 @@ istanbul-lib-source-maps@^4.0.0: istanbul-lib-coverage "^3.0.0" source-map "^0.6.1" -istanbul-reports@^3.0.0: - version "3.0.1" - resolved "https://registry.yarnpkg.com/istanbul-reports/-/istanbul-reports-3.0.1.tgz#1343217244ad637e0c3b18e7f6b746941a9b5e9a" - integrity sha512-Vm9xwCiQ8t2cNNnckyeAV0UdxKpcQUz4nMxsBvIu8n2kmPSiyb5uaF/8LpmKr+yqL/MdOXaX2Nmdo4Qyxium9Q== +istanbul-reports@^3.0.2: + version "3.0.2" + resolved "https://registry.yarnpkg.com/istanbul-reports/-/istanbul-reports-3.0.2.tgz#d593210e5000683750cb09fc0644e4b6e27fd53b" + integrity sha512-9tZvz7AiR3PEDNGiV9vIouQ/EAcqMXFmkcA1CDFTwOB98OZVDL0PH9glHotf5Ugp6GCOTypfzGWI/OqjWNCRUw== dependencies: html-escaper "^2.0.0" istanbul-lib-report "^3.0.0" @@ -3154,6 +3186,13 @@ minipass@^2.3.5, minipass@^2.8.6, minipass@^2.9.0: safe-buffer "^5.1.2" yallist "^3.0.0" +minipass@^3.0.0: + version "3.1.3" + resolved "https://registry.yarnpkg.com/minipass/-/minipass-3.1.3.tgz#7d42ff1f39635482e15f9cdb53184deebd5815fd" + integrity sha512-Mgd2GdMVzY+x3IJ+oHnVM+KG3lA5c8tnabyJKmHSaG2kAGpudxuOf8ToDkhumF7UzME7DecbQE9uOZhNm7PuJg== + dependencies: + yallist "^4.0.0" + minizlib@^1.1.0: version "1.1.0" resolved "https://registry.yarnpkg.com/minizlib/-/minizlib-1.1.0.tgz#11e13658ce46bc3a70a267aac58359d1e0c29ceb" @@ -3166,6 +3205,14 @@ minizlib@^1.2.1: dependencies: minipass "^2.9.0" +minizlib@^2.1.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/minizlib/-/minizlib-2.1.0.tgz#fd52c645301ef09a63a2c209697c294c6ce02cf3" + integrity sha512-EzTZN/fjSvifSX0SlqUERCN39o6T40AMarPbv0MrarSFtIITCBh7bi+dU8nxGFHuqs9jdIAeoYoKuQAAASsPPA== + dependencies: + minipass "^3.0.0" + yallist "^4.0.0" + mississippi@^3.0.0: version "3.0.0" resolved "https://registry.yarnpkg.com/mississippi/-/mississippi-3.0.0.tgz#ea0a3291f97e0b5e8776b363d5f0a12d94c67022" @@ -3188,6 +3235,11 @@ mkdirp@0.5.3, mkdirp@^0.5.0, mkdirp@^0.5.1, mkdirp@~0.5.0, mkdirp@~0.5.1: dependencies: minimist "^1.2.5" +mkdirp@^1.0.3: + version "1.0.4" + resolved "https://registry.yarnpkg.com/mkdirp/-/mkdirp-1.0.4.tgz#3eb5ed62622756d79a5f0e2a221dfebad75c2f7e" + integrity sha512-vVqVZQyf3WLx2Shd0qJ9xuvqgAyKPLAiqITEtqW0oIUjzo3PePDd6fW9iFz30ef7Ysp/oiWqbhszeGWW2T6Gzw== + mocha@^7.1.1: version "7.1.1" resolved "https://registry.yarnpkg.com/mocha/-/mocha-7.1.1.tgz#89fbb30d09429845b1bb893a830bf5771049a441" @@ -3344,7 +3396,7 @@ node-pre-gyp@^0.10.3: semver "^5.3.0" tar "^4" -node-preload@^0.2.0: +node-preload@^0.2.1: version "0.2.1" resolved "https://registry.yarnpkg.com/node-preload/-/node-preload-0.2.1.tgz#c03043bb327f417a18fee7ab7ee57b408a144301" integrity sha512-RM5oyBy45cLEoHqCeh+MNuFAxO0vTFBLskvQbOKnEE7YTTSN4tbN8QWDIPQ6L+WvKsB/qLEGpYe2ZZ9d4W9OIQ== @@ -3641,9 +3693,9 @@ number-is-nan@^1.0.0: resolved "https://registry.yarnpkg.com/number-is-nan/-/number-is-nan-1.0.1.tgz#097b602b53422a522c1afb8790318336941a011d" nyc@^15.0.0: - version "15.0.0" - resolved "https://registry.yarnpkg.com/nyc/-/nyc-15.0.0.tgz#eb32db2c0f29242c2414fe46357f230121cfc162" - integrity sha512-qcLBlNCKMDVuKb7d1fpxjPR8sHeMVX0CHarXAVzrVWoFrigCkYR8xcrjfXSPi5HXM7EU78L6ywO7w1c5rZNCNg== + version "15.0.1" + resolved "https://registry.yarnpkg.com/nyc/-/nyc-15.0.1.tgz#bd4d5c2b17f2ec04370365a5ca1fc0ed26f9f93d" + integrity sha512-n0MBXYBYRqa67IVt62qW1r/d9UH/Qtr7SF1w/nQLJ9KxvWF6b2xCHImRAixHN9tnMMYHC2P14uo6KddNGwMgGg== dependencies: "@istanbuljs/load-nyc-config" "^1.0.0" "@istanbuljs/schema" "^0.1.2" @@ -3660,10 +3712,9 @@ nyc@^15.0.0: istanbul-lib-processinfo "^2.0.2" istanbul-lib-report "^3.0.0" istanbul-lib-source-maps "^4.0.0" - istanbul-reports "^3.0.0" - js-yaml "^3.13.1" + istanbul-reports "^3.0.2" make-dir "^3.0.0" - node-preload "^0.2.0" + node-preload "^0.2.1" p-map "^3.0.0" process-on-spawn "^1.0.0" resolve-from "^5.0.0" @@ -3671,7 +3722,6 @@ nyc@^15.0.0: signal-exit "^3.0.2" spawn-wrap "^2.0.0" test-exclude "^6.0.0" - uuid "^3.3.3" yargs "^15.0.2" oauth-sign@~0.8.2: @@ -4980,6 +5030,18 @@ tar@^4.4.10, tar@^4.4.12, tar@^4.4.13: safe-buffer "^5.1.2" yallist "^3.0.3" +tar@^6.0.2: + version "6.0.2" + resolved "https://registry.yarnpkg.com/tar/-/tar-6.0.2.tgz#5df17813468a6264ff14f766886c622b84ae2f39" + integrity sha512-Glo3jkRtPcvpDlAs/0+hozav78yoXKFr+c4wgw62NNMO3oo4AaJdCo21Uu7lcwr55h39W2XD1LMERc64wtbItg== + dependencies: + chownr "^2.0.0" + fs-minipass "^2.0.0" + minipass "^3.0.0" + minizlib "^2.1.0" + mkdirp "^1.0.3" + yallist "^4.0.0" + term-size@^1.2.0: version "1.2.0" resolved "https://registry.yarnpkg.com/term-size/-/term-size-1.2.0.tgz#458b83887f288fc56d6fffbfad262e26638efa69" @@ -5438,6 +5500,11 @@ yallist@^3.0.3: version "3.1.1" resolved "https://registry.yarnpkg.com/yallist/-/yallist-3.1.1.tgz#dbb7daf9bfd8bac9ab45ebf602b8cbad0d5d08fd" +yallist@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/yallist/-/yallist-4.0.0.tgz#9bb92790d9c0effec63be73519e11a35019a3a72" + integrity sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A== + yargs-parser@13.1.2, yargs-parser@^13.1.2: version "13.1.2" resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-13.1.2.tgz#130f09702ebaeef2650d54ce6e3e5706f7a4fb38" From 0101f8874afb86fbca00a29f2c8083c9fd68550e Mon Sep 17 00:00:00 2001 From: Lijiaoa <61399850+Lijiaoa@users.noreply.github.com> Date: Mon, 22 Jun 2020 10:26:41 +0800 Subject: [PATCH 06/17] Support nested experiment more friendly (#2554) --- src/webui/src/components/Modals/Compare.tsx | 2 +- .../src/components/trial-detail/Duration.tsx | 2 +- .../src/components/trial-detail/Para.tsx | 168 +++++++++++------- 3 files changed, 107 insertions(+), 65 deletions(-) diff --git a/src/webui/src/components/Modals/Compare.tsx b/src/webui/src/components/Modals/Compare.tsx index e794b79e81..ac73f860bc 100644 --- a/src/webui/src/components/Modals/Compare.tsx +++ b/src/webui/src/components/Modals/Compare.tsx @@ -5,7 +5,7 @@ import IntermediateVal from '../public-child/IntermediateVal'; import { TRIALS } from '../../static/datamodel'; import { contentStyles, iconButtonStyles } from '../Buttons/ModalTheme'; import '../../static/style/compare.scss'; -import { TableRecord, Intermedia, TooltipForIntermediate } from '../../static/interface'; // eslint-disable-line no-unused-vars +import { TableRecord, Intermedia, TooltipForIntermediate } from '../../static/interface'; // the modal of trial compare interface CompareProps { diff --git a/src/webui/src/components/trial-detail/Duration.tsx b/src/webui/src/components/trial-detail/Duration.tsx index 6303563835..880678e31a 100644 --- a/src/webui/src/components/trial-detail/Duration.tsx +++ b/src/webui/src/components/trial-detail/Duration.tsx @@ -1,6 +1,6 @@ import * as React from 'react'; import ReactEcharts from 'echarts-for-react'; -import { TableObj, EventMap } from '../../static/interface'; // eslint-disable-line no-unused-vars +import { TableObj, EventMap } from '../../static/interface'; import { filterDuration, convertDuration } from '../../static/function'; import 'echarts/lib/chart/bar'; import 'echarts/lib/component/tooltip'; diff --git a/src/webui/src/components/trial-detail/Para.tsx b/src/webui/src/components/trial-detail/Para.tsx index abe3b21c60..0a4cd8b34e 100644 --- a/src/webui/src/components/trial-detail/Para.tsx +++ b/src/webui/src/components/trial-detail/Para.tsx @@ -2,8 +2,8 @@ import * as React from 'react'; import ReactEcharts from 'echarts-for-react'; import { filterByStatus } from '../../static/function'; import { EXPERIMENT } from '../../static/datamodel'; -import { Stack, PrimaryButton, Dropdown, IDropdownOption, } from 'office-ui-fabric-react'; // eslint-disable-line no-unused-vars -import { ParaObj, Dimobj, TableObj } from '../../static/interface'; // eslint-disable-line no-unused-vars +import { Stack, PrimaryButton, Dropdown, IDropdownOption } from 'office-ui-fabric-react'; +import { ParaObj, Dimobj, TableObj } from '../../static/interface'; import 'echarts/lib/chart/parallel'; import 'echarts/lib/component/tooltip'; import 'echarts/lib/component/title'; @@ -28,6 +28,8 @@ interface ParaState { // office-fabric-ui selectedItem?: { key: string | number | undefined }; // percent Selector swapyAxis?: string[]; // yAxis Selector + paraYdataNested: number[][]; + isNested: false; } interface ParaProps { @@ -68,7 +70,9 @@ class Para extends React.Component { succeedRenderCount: 10000000, clickCounts: 1, isLoadConfirm: false, - swapyAxis: [] + swapyAxis: [], + paraYdataNested: [], + isNested: false }; } @@ -79,23 +83,29 @@ class Para extends React.Component { lengthofTrials: number ): void => { // get data for every lines. if dim is choice type, number -> toString() - const paraYdata: number[][] = []; - Object.keys(eachTrialParams).map(item => { - const temp: number[] = []; - for (let i = 0; i < dimName.length; i++) { - if ('type' in parallelAxis[i]) { - temp.push(eachTrialParams[item][dimName[i]].toString()); - } else { - // default metric - temp.push(eachTrialParams[item][dimName[i]]); + let paraYdata: number[][] = []; + const { isNested } = this.state; + if (isNested === false) { + for (const item of eachTrialParams) { + const temp: number[] = []; + for (let i = 0; i < dimName.length; i++) { + if ('type' in parallelAxis[i]) { + temp.push(item[dimName[i]].toString()); + } else { + // default metric + temp.push(item[dimName[i]]); + } } + paraYdata.push(temp); } - paraYdata.push(temp); - }); + } else { + paraYdata = this.state.paraYdataNested; + } // add acc Object.keys(paraYdata).map(item => { paraYdata[item].push(accPara[item]); }); + // according acc to sort ydata // sort to find top percent dataset if (paraYdata.length !== 0) { const len = paraYdata[0].length - 1; @@ -133,7 +143,7 @@ class Para extends React.Component { const lenOfDataSource: number = dataSource.length; const accPara: number[] = []; // specific value array - const eachTrialParams: string[] = []; + const eachTrialParams: Array = []; // experiment interface search space obj const searchRange = searchSpace !== undefined ? JSON.parse(searchSpace) : ''; // nest search space @@ -144,13 +154,15 @@ class Para extends React.Component { return; } }); - const dimName = Object.keys(searchRange); - this.setState({ dimName: dimName }); - + let dimName: string[] = []; const parallelAxis: Array = []; // search space range and specific value [only number] let i = 0; + const yAxisOrderList = new Map(); + this.setState({ isNested: isNested }); if (isNested === false) { + dimName = Object.keys(searchRange); + this.setState({ dimName: dimName }); for (i; i < dimName.length; i++) { const data: string[] = []; const searchKey = searchRange[dimName[i]]; @@ -223,37 +235,25 @@ class Para extends React.Component { } } } else { - for (i; i < dimName.length; i++) { - const searchKey = searchRange[dimName[i]]; - const data: string[] = []; - let j = 0; - switch (searchKey._type) { - case 'choice': - for (j; j < searchKey._value.length; j++) { - const item = searchKey._value[j]; - Object.keys(item).map(key => { - if (key !== '_name' && key !== '_type') { - Object.keys(item[key]).map(index => { - if (index !== '_type') { - const realChoice = item[key][index]; - Object.keys(realChoice).map(m => { - data.push(`${item._name}_${realChoice[m]}`); - }); - } - }); - } - }); + for (const parallelAxisName in searchRange) { + const data: any[] = []; + dimName.push(parallelAxisName); + + for (const choiceItem in searchRange[parallelAxisName]) { + if (choiceItem === '_value') { + for (const item in searchRange[parallelAxisName][choiceItem]) { + data.push(searchRange[parallelAxisName][choiceItem][item]._name); } - data.push('null'); + yAxisOrderList.set(parallelAxisName, JSON.parse(JSON.stringify(data))); parallelAxis.push({ dim: i, - name: dimName[i], - type: 'category', data: data, + name: parallelAxisName, + type: 'category', boundaryGap: true, axisLine: { lineStyle: { - type: 'dotted', // axis type,solid dashed dotted + type: 'dotted', // axis type,solid,dashed,dotted width: 1 } }, @@ -266,16 +266,44 @@ class Para extends React.Component { show: true, interval: 0, // rotate: 30 - }, - }); - break; - default: - parallelAxis.push({ - dim: i, - name: dimName[i] + } }); + i++; + for (const item in searchRange[parallelAxisName][choiceItem]) { + for (const key in searchRange[parallelAxisName][choiceItem][item]) { + if (key !== '_name') { + dimName.push(key); + parallelAxis.push({ + dim: i, + data: searchRange[parallelAxisName][choiceItem][item][key]._value.concat('null'), + name: `${searchRange[parallelAxisName][choiceItem][item]._name}_${key}`, + type: 'category', + boundaryGap: true, + axisLine: { + lineStyle: { + type: 'dotted', // axis type,solid,dashed,dotted + width: 1 + } + }, + axisTick: { + show: true, + interval: 0, + alignWithLabel: true, + }, + axisLabel: { + show: true, + interval: 0, + // rotate: 30 + } + }); + i++; + } + } + } + } } } + this.setState({ dimName: dimName }); } parallelAxis.push({ dim: i, @@ -291,6 +319,7 @@ class Para extends React.Component { tooltip: { trigger: 'item' }, + parallel: { parallelAxisDefault: { tooltip: { @@ -332,7 +361,7 @@ class Para extends React.Component { } else { Object.keys(dataSource).map(item => { const trial = dataSource[item]; - eachTrialParams.push(trial.description.parameters || ''); + eachTrialParams.push(trial.description.parameters); // may be a succeed trial hasn't final result // all detail page may be break down if havn't if if (trial.acc !== undefined) { @@ -341,22 +370,35 @@ class Para extends React.Component { } } }); - // nested search space, deal data + // nested search space, fill all yAxis data if (isNested !== false) { - eachTrialParams.forEach(element => { - Object.keys(element).forEach(key => { - const item = element[key]; - if (typeof item === 'object') { - Object.keys(item).forEach(index => { - if (index !== '_name') { - element[key] = `${item._name}_${item[index]}`; + const renderDataSource: Array = []; + for (const i in eachTrialParams) { + const eachTrialData: Array = []; + for (const m in eachTrialParams[i]) { + const eachTrialParamsObj = eachTrialParams[i][m]; + for (const n in yAxisOrderList.get(m)) { + if (yAxisOrderList.get(m)[n] === eachTrialParamsObj._name) { + for (const index in eachTrialParamsObj) { + if (index !== '_name') { + eachTrialData.push(eachTrialParamsObj[index].toString()); + } + if (eachTrialParamsObj[index] === 'Empty') { + eachTrialData.push('Empty'); + } + } + } else { + if (yAxisOrderList.get(m)[n] === 'Empty') { + eachTrialData.push(eachTrialParamsObj._name.toString()); } else { - element[key] = 'null'; + eachTrialData.push('null'); } - }); + } } - }); - }); + } + renderDataSource.push(eachTrialData); + } + this.setState({ paraYdataNested: renderDataSource }); } // if not return final result const maxVal = accPara.length === 0 ? 1 : Math.max(...accPara); @@ -592,7 +634,7 @@ class Para extends React.Component { } componentDidUpdate(prevProps: ParaProps): void { - if(this.props.dataSource !== prevProps.dataSource) { + if (this.props.dataSource !== prevProps.dataSource) { const { dataSource, expSearchSpace, whichGraph } = this.props; if (whichGraph === 'Hyper-parameter') { this.hyperParaPic(dataSource, expSearchSpace); From 76586fcaf3ea3265ff9e74d1d88ebe98b8dcc7a4 Mon Sep 17 00:00:00 2001 From: SparkSnail Date: Mon, 22 Jun 2020 10:28:44 +0800 Subject: [PATCH 07/17] Check eth0 in nnictl (#2566) --- deployment/pypi/setup.py | 1 + setup.py | 1 + tools/nni_cmd/launcher_utils.py | 10 ++++++++++ tools/setup.py | 3 ++- 4 files changed, 14 insertions(+), 1 deletion(-) diff --git a/deployment/pypi/setup.py b/deployment/pypi/setup.py index 3c2d433790..48a7176e04 100644 --- a/deployment/pypi/setup.py +++ b/deployment/pypi/setup.py @@ -58,6 +58,7 @@ 'PythonWebHDFS', 'hyperopt==0.1.2', 'json_tricks', + 'netifaces', 'numpy', 'scipy', 'coverage', diff --git a/setup.py b/setup.py index 8a3733776f..a4be914036 100644 --- a/setup.py +++ b/setup.py @@ -32,6 +32,7 @@ def read(fname): 'astor', 'hyperopt==0.1.2', 'json_tricks', + 'netifaces', 'numpy', 'psutil', 'ruamel.yaml', diff --git a/tools/nni_cmd/launcher_utils.py b/tools/nni_cmd/launcher_utils.py index 5fbd9bf176..8be9a8bf0e 100644 --- a/tools/nni_cmd/launcher_utils.py +++ b/tools/nni_cmd/launcher_utils.py @@ -3,6 +3,7 @@ import os import json +import netifaces from schema import SchemaError from schema import Schema from .config_schema import LOCAL_CONFIG_SCHEMA, REMOTE_CONFIG_SCHEMA, PAI_CONFIG_SCHEMA, PAI_YARN_CONFIG_SCHEMA, \ @@ -297,10 +298,19 @@ def validate_pai_trial_conifg(experiment_config): print_warning(warning_information.format('outputDir')) validate_pai_config_path(experiment_config) +def validate_eth0_device(experiment_config): + '''validate whether the machine has eth0 device''' + if experiment_config.get('trainingServicePlatform') not in ['local'] \ + and not experiment_config.get('nniManagerIp') \ + and 'eth0' not in netifaces.interfaces(): + print_error('This machine does not contain eth0 network device, please set nniManagerIp in config file!') + exit(1) + def validate_all_content(experiment_config, config_path): '''Validate whether experiment_config is valid''' parse_path(experiment_config, config_path) validate_common_content(experiment_config) + validate_eth0_device(experiment_config) validate_pai_trial_conifg(experiment_config) experiment_config['maxExecDuration'] = parse_time(experiment_config['maxExecDuration']) if experiment_config.get('advisor'): diff --git a/tools/setup.py b/tools/setup.py index d5e527e596..3c0513e607 100644 --- a/tools/setup.py +++ b/tools/setup.py @@ -16,7 +16,8 @@ 'astor', 'schema', 'PythonWebHDFS', - 'colorama' + 'colorama', + 'netifaces' ], author = 'Microsoft NNI Team', From a17632b07a196ee20615941c80ed2e9fc504d711 Mon Sep 17 00:00:00 2001 From: Yuge Zhang Date: Mon, 22 Jun 2020 10:57:11 +0800 Subject: [PATCH 08/17] Increase timeout for yarn (#2572) --- azure-pipelines.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 92080252c4..b2bae68d72 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -108,6 +108,7 @@ jobs: echo "##vso[task.setvariable variable=PATH]${HOME}/Library/Python/3.7/bin:${PATH}" displayName: 'Install python tools' - script: | + echo "network-timeout 600000" >> ${HOME}/.yarnrc source install.sh displayName: 'Install nni toolkit via source code' - script: | From a264d9add1aec64e7d926c0909ebedbaa98e8e32 Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Mon, 22 Jun 2020 18:48:04 +0800 Subject: [PATCH 09/17] Fix pylint error (#2585) --- src/sdk/pynni/nni/bohb_advisor/config_generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sdk/pynni/nni/bohb_advisor/config_generator.py b/src/sdk/pynni/nni/bohb_advisor/config_generator.py index 5bd039844f..c6a13c6b35 100644 --- a/src/sdk/pynni/nni/bohb_advisor/config_generator.py +++ b/src/sdk/pynni/nni/bohb_advisor/config_generator.py @@ -236,7 +236,7 @@ def get_config(self, budget): return sample def impute_conditional_data(self, array): - return_array = np.empty_like(array) + return_array = np.zeros(array.shape) for i in range(array.shape[0]): datum = np.copy(array[i]) nan_indices = np.argwhere(np.isnan(datum)).flatten() From 0f7f94601953c8e4446181815abeeebe5fe22f84 Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Tue, 23 Jun 2020 01:11:38 +0800 Subject: [PATCH 10/17] Add pipeline for tfv2 (#2584) --- azure-pipelines.yml | 38 +++--- src/sdk/pynni/tests/test_compressor.py | 2 +- test/config/integration_tests_tf2.yml | 159 ++++++++++++++++++++++ test/pipelines/pipelines-it-local-tf2.yml | 36 +++++ 4 files changed, 215 insertions(+), 20 deletions(-) create mode 100644 test/config/integration_tests_tf2.yml create mode 100644 test/pipelines/pipelines-it-local-tf2.yml diff --git a/azure-pipelines.yml b/azure-pipelines.yml index b2bae68d72..dc4206864d 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -10,7 +10,6 @@ jobs: - script: | set -e python3 -m pip install --upgrade pip setuptools --user - python3 -m pip install pylint==2.3.1 astroid==2.2.5 --user python3 -m pip install coverage --user echo "##vso[task.setvariable variable=PATH]${HOME}/.local/bin:${PATH}" displayName: 'Install python tools' @@ -28,29 +27,14 @@ jobs: - script: | set -e python3 -m pip install torch==1.5.0+cpu torchvision==0.6.0+cpu -f https://download.pytorch.org/whl/torch_stable.html --user - python3 -m pip install tensorflow==1.15.2 --user - python3 -m pip install keras==2.1.6 --user + python3 -m pip install tensorflow==2.2.0 --user + python3 -m pip install keras==2.4.2 --user python3 -m pip install gym onnx --user python3 -m pip install sphinx==1.8.3 sphinx-argparse==0.2.5 sphinx-markdown-tables==0.0.9 sphinx-rtd-theme==0.4.2 sphinxcontrib-websupport==1.1.0 recommonmark==0.5.0 --user sudo apt-get install swig -y nnictl package install --name=SMAC nnictl package install --name=BOHB displayName: 'Install dependencies' - - script: | - set -e - python3 -m pylint --rcfile pylintrc nni_annotation - python3 -m pylint --rcfile pylintrc nni_cmd - python3 -m pylint --rcfile pylintrc nni_gpu_tool - python3 -m pylint --rcfile pylintrc nni_trial_tool - python3 -m pylint --rcfile pylintrc nni - python3 -m pylint --rcfile pylintrc nnicli - displayName: 'Run pylint' - - script: | - set -e - python3 -m pip install flake8 --user - EXCLUDES=./src/nni_manager/,./src/webui,./tools/nni_annotation/testcase/,./examples/trials/mnist-nas/*/mnist*.py,./examples/trials/nas_cifar10/src/cifar10/general_child.py - python3 -m flake8 . --count --exclude=$EXCLUDES --select=E9,F63,F72,F82 --show-source --statistics - displayName: 'Run flake8 tests to find Python syntax errors and undefined names' - script: | cd test source scripts/unittest.sh @@ -64,7 +48,7 @@ jobs: sphinx-build -M html . _build -W displayName: 'Sphinx Documentation Build check' -- job: 'ubuntu_1604_python35_legacy_torch' +- job: 'ubuntu_1604_python35_legacy_torch_tf' pool: vmImage: 'Ubuntu 16.04' @@ -72,6 +56,7 @@ jobs: - script: | set -e python3 -m pip install --upgrade pip setuptools --user + python3 -m pip install pylint==2.3.1 astroid==2.2.5 --user python3 -m pip install coverage --user echo "##vso[task.setvariable variable=PATH]${HOME}/.local/bin:${PATH}" displayName: 'Install python tools' @@ -88,6 +73,21 @@ jobs: nnictl package install --name=SMAC nnictl package install --name=BOHB displayName: 'Install dependencies' + - script: | + set -e + python3 -m pylint --rcfile pylintrc nni_annotation + python3 -m pylint --rcfile pylintrc nni_cmd + python3 -m pylint --rcfile pylintrc nni_gpu_tool + python3 -m pylint --rcfile pylintrc nni_trial_tool + python3 -m pylint --rcfile pylintrc nni + python3 -m pylint --rcfile pylintrc nnicli + displayName: 'Run pylint' + - script: | + set -e + python3 -m pip install flake8 --user + EXCLUDES=./src/nni_manager/,./src/webui,./tools/nni_annotation/testcase/,./examples/trials/mnist-nas/*/mnist*.py,./examples/trials/nas_cifar10/src/cifar10/general_child.py + python3 -m flake8 . --count --exclude=$EXCLUDES --select=E9,F63,F72,F82 --show-source --statistics + displayName: 'Run flake8 tests to find Python syntax errors and undefined names' - script: | cd test source scripts/unittest.sh diff --git a/src/sdk/pynni/tests/test_compressor.py b/src/sdk/pynni/tests/test_compressor.py index e6603e4d4f..7641ae7d25 100644 --- a/src/sdk/pynni/tests/test_compressor.py +++ b/src/sdk/pynni/tests/test_compressor.py @@ -143,7 +143,7 @@ def test_torch_fpgm_pruner(self): @tf2 def test_tf_fpgm_pruner(self): - w = np.array([np.ones((5, 5, 5)) * (i+1) for i in range(10)]).astype(np.float32) + w = np.array([np.ones((5, 3, 3)) * (i+1) for i in range(10)]).astype(np.float32) model = get_tf_model() config_list = [{'sparsity': 0.2, 'op_types': ['Conv2D']}] diff --git a/test/config/integration_tests_tf2.yml b/test/config/integration_tests_tf2.yml new file mode 100644 index 0000000000..1c3e375990 --- /dev/null +++ b/test/config/integration_tests_tf2.yml @@ -0,0 +1,159 @@ + +defaultTestCaseConfig: + launchCommand: nnictl create --config $configFile --debug + stopCommand: nnictl stop + experimentStatusCheck: True + platform: linux darwin win32 + +testCases: +####################################################################### +# nni examples test +####################################################################### +- name: sklearn-classification + # test case config yml file relative to nni source code directory + configFile: test/config/examples/sklearn-classification.yml + + # test case specific config, the content of configFile will be overrided + # by config section + config: + + # validator is called after experiment is done + # validator class needs to be implemented in nni_test/nnitest/validators.py + validator: + + # launch command, default launch command is 'nnictl create --config $configFile' + launchCommand: nnictl create --config $configFile --debug + + # stop command, default stop command is 'nnictl stop', empty means no stop command + stopCommand: nnictl stop + + # set experiment ID into variable, variable name should start with $, such as $expId + setExperimentIdtoVar: $expId + + # check status of experiment before calling validator + experimentStatusCheck: True + +- name: sklearn-regression + configFile: test/config/examples/sklearn-regression.yml + +- name: mnist-pytorch + configFile: test/config/examples/mnist-pytorch.yml + +- name: cifar10-pytorch + configFile: test/config/examples/cifar10-pytorch.yml + config: + # this example downloads large pretrained model weights + # test 1 trial to save time + maxExecDuration: 10m + maxTrialNum: 1 + trialConcurrency: 1 + trial: + command: python3 main.py --epochs 1 --batches 1 + gpuNum: 0 + +######################################################################### +# nni features test +######################################################################### +- name: metrics-float + configFile: test/config/metrics_test/config.yml + config: + maxTrialNum: 1 + trialConcurrency: 1 + validator: + class: MetricsValidator + kwargs: + expected_result_file: expected_metrics.json + +- name: export-float + configFile: test/config/metrics_test/config.yml + config: + maxTrialNum: 1 + trialConcurrency: 1 + validator: + class: ExportValidator + +- name: metrics-dict + configFile: test/config/metrics_test/config_dict_metrics.yml + config: + maxTrialNum: 1 + trialConcurrency: 1 + validator: + class: MetricsValidator + kwargs: + expected_result_file: expected_metrics_dict.json + +- name: export-dict + configFile: test/config/metrics_test/config_dict_metrics.yml + config: + maxTrialNum: 1 + trialConcurrency: 1 + validator: + class: ExportValidator + +- name: nnicli + configFile: test/config/examples/sklearn-regression.yml + config: + maxTrialNum: 4 + trialConcurrency: 4 + launchCommand: python3 -c 'import nnicli as nc; nc.start_nni("$configFile")' + stopCommand: python3 -c 'import nnicli as nc; nc.stop_nni()' + validator: + class: NnicliValidator + platform: linux darwin + +- name: foreground + configFile: test/config/examples/sklearn-regression.yml + launchCommand: python3 nni_test/nnitest/foreground.py --config $configFile --timeout 45 + stopCommand: + experimentStatusCheck: False + platform: linux darwin + +# Experiment resume test part 1 +- name: nnictl-resume-1 + configFile: test/config/examples/sklearn-regression.yml + setExperimentIdtoVar: $resumeExpId + +# Experiment resume test part 2 +- name: nnictl-resume-2 + configFile: test/config/examples/sklearn-regression.yml + launchCommand: nnictl resume $resumeExpId + +# Experiment view test +- name: nnictl-view + configFile: test/config/examples/sklearn-regression.yml + launchCommand: nnictl view $resumeExpId + experimentStatusCheck: False + +- name: multi-thread + configFile: test/config/multi_thread/config.yml + +- name: multi-phase-batch + configFile: test/config/multi_phase/batch.yml + config: + # for batch tuner, maxTrialNum can not exceed length of search space + maxTrialNum: 2 + trialConcurrency: 2 + +- name: multi-phase-evolution + configFile: test/config/multi_phase/evolution.yml + +- name: multi-phase-grid + configFile: test/config/multi_phase/grid.yml + config: + maxTrialNum: 2 + trialConcurrency: 2 + +- name: multi-phase-metis + configFile: test/config/multi_phase/metis.yml + +- name: multi-phase-tpe + configFile: test/config/multi_phase/tpe.yml + +######################################################################### +# nni assessor test +######################################################################### +- name: assessor-curvefitting + configFile: test/config/assessors/curvefitting.yml + +- name: assessor-medianstop + configFile: test/config/assessors/medianstop.yml diff --git a/test/pipelines/pipelines-it-local-tf2.yml b/test/pipelines/pipelines-it-local-tf2.yml new file mode 100644 index 0000000000..95421a8ce4 --- /dev/null +++ b/test/pipelines/pipelines-it-local-tf2.yml @@ -0,0 +1,36 @@ +jobs: +- job: 'integration_test_local_ubuntu' + timeoutInMinutes: 120 + + steps: + - script: python3 -m pip install --upgrade pip setuptools --user + displayName: 'Install python tools' + - script: | + source install.sh + displayName: 'Install nni toolkit via source code' + - script: | + set -e + python3 -m pip install scikit-learn==0.20.0 --user + python3 -m pip install torch==1.3.1 torchvision==0.4.1 -f https://download.pytorch.org/whl/torch_stable.html --user + python3 -m pip install tensorflow-gpu==2.2.0 --user + python3 -m pip install keras==2.4.2 --user + sudo apt-get install swig -y + PATH=$HOME/.local/bin:$PATH nnictl package install --name=SMAC + PATH=$HOME/.local/bin:$PATH nnictl package install --name=BOHB + displayName: 'Install dependencies for integration tests' + - script: | + cd test + source scripts/unittest.sh + displayName: 'Unit test' + - script: | + cd test + PATH=$HOME/.local/bin:$PATH python3 nni_test/nnitest/run_tests.py --config config/integration_tests_tf2.yml --ts local + displayName: 'Integration test' + - script: | + cd test + PATH=$HOME/.local/bin:$PATH source scripts/nas.sh + displayName: 'NAS test' + - script: | + cd test + source scripts/model_compression.sh + displayName: 'Model compression test' From a5764016739676a55e5e10ae88e081ebf94d2a38 Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Tue, 23 Jun 2020 13:27:41 +0800 Subject: [PATCH 11/17] Install builtin tuners (#2439) --- README.md | 1 + deployment/pypi/setup.py | 3 +- docs/en_US/Tuner/InstallCustomizedTuner.md | 54 +++ docs/en_US/Tutorial/InstallCustomizedAlgos.md | 164 +++++++ docs/en_US/Tutorial/Nnictl.md | 84 +++- docs/en_US/_templates/index.html | 1 + docs/en_US/hpo_advanced.rst | 2 + examples/tuners/customized_tuner/README.md | 3 + .../customized_tuner/demo_tuner/__init__.py | 1 + .../customized_tuner/demo_tuner/demo_tuner.py | 35 ++ examples/tuners/customized_tuner/setup.py | 24 ++ setup.py | 3 +- .../rest_server/restValidationSchemas.ts | 6 +- src/sdk/pynni/nni/__init__.py | 1 + src/sdk/pynni/nni/__main__.py | 59 +-- .../pynni/nni/bohb_advisor/bohb_advisor.py | 17 +- src/sdk/pynni/nni/constants.py | 138 +++--- .../curvefitting_assessor.py | 11 + .../nni/evolution_tuner/evolution_tuner.py | 9 + src/sdk/pynni/nni/gp_tuner/gp_tuner.py | 15 + .../hyperband_advisor/hyperband_advisor.py | 10 + .../nni/hyperopt_tuner/hyperopt_tuner.py | 9 + .../medianstop_assessor.py | 10 + src/sdk/pynni/nni/metis_tuner/metis_tuner.py | 11 + .../networkmorphism_tuner.py | 13 +- src/sdk/pynni/nni/package_utils.py | 373 ++++++++++++++++ src/sdk/pynni/nni/pbt_tuner/pbt_tuner.py | 11 + src/sdk/pynni/nni/ppo_tuner/ppo_tuner.py | 17 + src/sdk/pynni/nni/smac_tuner/smac_tuner.py | 9 + src/sdk/pynni/nni/utils.py | 63 ++- test/scripts/unittest.sh | 6 + tools/nni_cmd/command_utils.py | 10 +- tools/nni_cmd/common_utils.py | 69 +-- tools/nni_cmd/config_schema.py | 401 +++++++++--------- tools/nni_cmd/constants.py | 40 +- tools/nni_cmd/launcher.py | 22 +- tools/nni_cmd/launcher_utils.py | 231 +--------- tools/nni_cmd/nnictl.py | 15 +- tools/nni_cmd/nnictl_utils.py | 3 +- tools/nni_cmd/package_management.py | 184 +++++++- tools/nni_cmd/tensorboard_utils.py | 7 +- .../config_files/invalid/custom-tuner-1.yml | 25 ++ .../config_files/invalid/custom-tuner-2.yml | 27 ++ .../tests/config_files/invalid/mytuner.py | 5 + .../tests/config_files/invalid/no-tuner.yml | 21 + .../config_files/invalid/search_space.json | 6 + .../config_files/invalid/searchspace-path.yml | 24 ++ .../config_files/invalid/tuner-wrong-key.yml | 24 ++ .../config_files/invalid/wrong-class-args.yml | 23 + .../invalid/wrong-training-service.yml | 23 + .../nni_cmd/tests/config_files/valid/main.py | 1 + .../config_files/valid/search_space.json | 6 + .../nni_cmd/tests/config_files/valid/test.yml | 22 + tools/nni_cmd/tests/test_config_validation.py | 31 ++ 54 files changed, 1744 insertions(+), 639 deletions(-) create mode 100644 docs/en_US/Tuner/InstallCustomizedTuner.md create mode 100644 docs/en_US/Tutorial/InstallCustomizedAlgos.md create mode 100644 examples/tuners/customized_tuner/README.md create mode 100644 examples/tuners/customized_tuner/demo_tuner/__init__.py create mode 100644 examples/tuners/customized_tuner/demo_tuner/demo_tuner.py create mode 100644 examples/tuners/customized_tuner/setup.py create mode 100644 src/sdk/pynni/nni/package_utils.py create mode 100644 tools/nni_cmd/tests/config_files/invalid/custom-tuner-1.yml create mode 100644 tools/nni_cmd/tests/config_files/invalid/custom-tuner-2.yml create mode 100644 tools/nni_cmd/tests/config_files/invalid/mytuner.py create mode 100644 tools/nni_cmd/tests/config_files/invalid/no-tuner.yml create mode 100644 tools/nni_cmd/tests/config_files/invalid/search_space.json create mode 100644 tools/nni_cmd/tests/config_files/invalid/searchspace-path.yml create mode 100644 tools/nni_cmd/tests/config_files/invalid/tuner-wrong-key.yml create mode 100644 tools/nni_cmd/tests/config_files/invalid/wrong-class-args.yml create mode 100644 tools/nni_cmd/tests/config_files/invalid/wrong-training-service.yml create mode 100644 tools/nni_cmd/tests/config_files/valid/main.py create mode 100644 tools/nni_cmd/tests/config_files/valid/search_space.json create mode 100644 tools/nni_cmd/tests/config_files/valid/test.yml create mode 100644 tools/nni_cmd/tests/test_config_validation.py diff --git a/README.md b/README.md index 003df29783..10c3391fe7 100644 --- a/README.md +++ b/README.md @@ -192,6 +192,7 @@ Within the following table, we summarized the current NNI capabilities, we are g diff --git a/deployment/pypi/setup.py b/deployment/pypi/setup.py index 48a7176e04..3c8bd4429f 100644 --- a/deployment/pypi/setup.py +++ b/deployment/pypi/setup.py @@ -63,7 +63,8 @@ 'scipy', 'coverage', 'colorama', - 'scikit-learn>=0.20,<0.22' + 'scikit-learn>=0.20,<0.22', + 'pkginfo' ], classifiers = [ 'Programming Language :: Python :: 3', diff --git a/docs/en_US/Tuner/InstallCustomizedTuner.md b/docs/en_US/Tuner/InstallCustomizedTuner.md new file mode 100644 index 0000000000..f24c0248b7 --- /dev/null +++ b/docs/en_US/Tuner/InstallCustomizedTuner.md @@ -0,0 +1,54 @@ +# How to install customized tuner as a builtin tuner + +You can following below steps to install a customized tuner in `nni/examples/tuners/customized_tuner` as a builtin tuner. + +## Prepare installation source and install package + +There are 2 options to install this customized tuner: + +### Option 1: install from directory + +Step 1: From `nni/examples/tuners/customized_tuner` directory, run: + +`python setup.py develop` + +This command will build the `nni/examples/tuners/customized_tuner` directory as a pip installation source. + +Step 2: Run command: + +`nnictl package install ./` + +### Option 2: install from whl file + +Step 1: From `nni/examples/tuners/customized_tuner` directory, run: + +`python setup.py bdist_wheel` + +This command build a whl file which is a pip installation source. + +Step 2: Run command: + +`nnictl package install dist/demo_tuner-0.1-py3-none-any.whl` + +## Check the installed package + +Then run command `nnictl package list`, you should be able to see that demotuner is installed: +``` ++-----------------+------------+-----------+--------=-------------+------------------------------------------+ +| Name | Type | Installed | Class Name | Module Name | ++-----------------+------------+-----------+----------------------+------------------------------------------+ +| demotuner | tuners | Yes | DemoTuner | demo_tuner | ++-----------------+------------+-----------+----------------------+------------------------------------------+ +``` + +## Use the installed tuner in experiment + +Now you can use the demotuner in experiment configuration file the same way as other builtin tuners: + +```yaml +tuner: + builtinTunerName: demotuner + classArgs: + #choice: maximize, minimize + optimize_mode: maximize +``` diff --git a/docs/en_US/Tutorial/InstallCustomizedAlgos.md b/docs/en_US/Tutorial/InstallCustomizedAlgos.md new file mode 100644 index 0000000000..63b61227f1 --- /dev/null +++ b/docs/en_US/Tutorial/InstallCustomizedAlgos.md @@ -0,0 +1,164 @@ +**How to install customized algorithms as builtin tuners, assessors and advisors** +=== + +## Overview + +NNI provides a lot of [builtin tuners](../Tuner/BuiltinTuner.md), [advisors](../Tuner/BuiltinTuner.md#Hyperband) and [assessors](../Assessor/BuiltinAssessor.md) can be used directly for Hyper Parameter Optimization, and some extra algorithms can be installed via `nnictl package install --name ` after NNI is installed. You can check these extra algorithms via `nnictl package list` command. + +NNI also provides the ability to build your own customized tuners, advisors and assessors. To use the customized algorithm, users can simply follow the spec in experiment config file to properly reference the algorithm, which has been illustrated in the tutorials of [customized tuners](../Tuner/CustomizeTuner.md)/[advisors](../Tuner/CustomizeAdvisor.md)/[assessors](../Assessor/CustomizeAssessor.md). + +NNI also allows users to install the customized algorithm as a builtin algorithm, in order for users to use the algorithm in the same way as NNI builtin tuners/advisors/assessors. More importantly, it becomes much easier for users to share or distribute their implemented algorithm to others. Customized tuners/advisors/assessors can be installed into NNI as builtin algorithms, once they are installed into NNI, you can use your customized algorithms the same way as builtin tuners/advisors/assessors in your experiment configuration file. For example, you built a customized tuner and installed it into NNI using a builtin name `mytuner`, then you can use this tuner in your configuration file like below: +```yaml +tuner: + builtinTunerName: mytuner +``` + +## Install customized algorithms as builtin tuners, assessors and advisors +You can follow below steps to build a customized tuner/assessor/advisor, and install it into NNI as builtin algorithm. + +### 1. Create a customized tuner/assessor/advisor +Reference following instructions to create: +* [customized tuner](../Tuner/CustomizeTuner.md) +* [customized assessor](../Assessor/CustomizeAssessor.md) +* [customized advisor](../Tuner/CustomizeAdvisor.md) + +### 2. (Optional) Create a validator to validate classArgs +NNI provides a `ClassArgsValidator` interface for customized algorithms author to validate the classArgs parameters in experiment configuration file which are passed to customized algorithms constructors. +The `ClassArgsValidator` interface is defined as: +```python +class ClassArgsValidator(object): + def validate_class_args(self, **kwargs): + """ + The classArgs fields in experiment configuration are packed as a dict and + passed to validator as kwargs. + """ + pass +``` +For example, you can implement your validator such as: +```python +from schema import Schema, Optional +from nni import ClassArgsValidator + +class MedianstopClassArgsValidator(ClassArgsValidator): + def validate_class_args(self, **kwargs): + Schema({ + Optional('optimize_mode'): self.choices('optimize_mode', 'maximize', 'minimize'), + Optional('start_step'): self.range('start_step', int, 0, 9999), + }).validate(kwargs) +``` +The validator will be invoked before experiment is started to check whether the classArgs fields are valid for your customized algorithms. + +### 3. Prepare package installation source +In order to be installed as builtin tuners, assessors and advisors, the customized algorithms need to be packaged as installable source which can be recognized by `pip` command, under the hood nni calls `pip` command to install the package. +Besides being a common pip source, the package needs to provide meta information in the `classifiers` field. +Format of classifiers field is a following: +``` +NNI Package :: :: :: :: +``` +* `type`: type of algorithms, could be one of `tuner`, `assessor`, `advisor` +* `builtin name`: builtin name used in experiment configuration file +* `full class name of tuner`: tuner class name, including its module name, for example: `demo_tuner.DemoTuner` +* `full class name of class args validator`: class args validator class name, including its module name, for example: `demo_tuner.MyClassArgsValidator` + +Following is an example of classfiers in package's `setup.py`: + +```python + classifiers = [ + 'Programming Language :: Python :: 3', + 'License :: OSI Approved :: MIT License', + 'Operating System :: ', + 'NNI Package :: tuner :: demotuner :: demo_tuner.DemoTuner :: demo_tuner.MyClassArgsValidator' + ], +``` + +Once you have the meta info in `setup.py`, you can build your pip installation source via: +* Run command `python setup.py develop` from the package directory, this command will build the directory as a pip installation source. +* Run command `python setup.py bdist_wheel` from the package directory, this command build a whl file which is a pip installation source. + +NNI will look for the classifier starts with `NNI Package` to retrieve the package meta information while the package being installed with `nnictl package install ` command. + +Reference [customized tuner example](https://github.com/microsoft/nni/blob/master/examples/tuners/customized_tuner/README.md) for a full example. + +### 4. Install customized algorithms package into NNI + +If your installation source is prepared as a directory with `python setup.py develop`, you can install the package by following command: + +`nnictl package install ` + +For example: + +`nnictl package install nni/examples/tuners/customized_tuner/` + +If your installation source is prepared as a whl file with `python setup.py bdist_wheel`, you can install the package by following command: + +`nnictl package install ` + +For example: + +`nnictl package install nni/examples/tuners/customized_tuner/dist/demo_tuner-0.1-py3-none-any.whl` + +## 5. Use the installed builtin algorithms in experiment +Once your customized algorithms is installed, you can use it in experiment configuration file the same way as other builtin tuners/assessors/advisors, for example: + +```yaml +tuner: + builtinTunerName: demotuner + classArgs: + #choice: maximize, minimize + optimize_mode: maximize +``` + + +## Manage packages using `nnictl package` + +### List installed packages + +Run following command to list the installed packages: + +``` +nnictl package list ++-----------------+------------+-----------+--------=-------------+------------------------------------------+ +| Name | Type | Installed | Class Name | Module Name | ++-----------------+------------+-----------+----------------------+------------------------------------------+ +| demotuner | tuners | Yes | DemoTuner | demo_tuner | +| SMAC | tuners | No | SMACTuner | nni.smac_tuner.smac_tuner | +| PPOTuner | tuners | No | PPOTuner | nni.ppo_tuner.ppo_tuner | +| BOHB | advisors | Yes | BOHB | nni.bohb_advisor.bohb_advisor | ++-----------------+------------+-----------+----------------------+------------------------------------------+ +``` + +Run following command to list all packages, including the builtin packages can not be uninstalled. + +``` +nnictl package list --all ++-----------------+------------+-----------+--------=-------------+------------------------------------------+ +| Name | Type | Installed | Class Name | Module Name | ++-----------------+------------+-----------+----------------------+------------------------------------------+ +| TPE | tuners | Yes | HyperoptTuner | nni.hyperopt_tuner.hyperopt_tuner | +| Random | tuners | Yes | HyperoptTuner | nni.hyperopt_tuner.hyperopt_tuner | +| Anneal | tuners | Yes | HyperoptTuner | nni.hyperopt_tuner.hyperopt_tuner | +| Evolution | tuners | Yes | EvolutionTuner | nni.evolution_tuner.evolution_tuner | +| BatchTuner | tuners | Yes | BatchTuner | nni.batch_tuner.batch_tuner | +| GridSearch | tuners | Yes | GridSearchTuner | nni.gridsearch_tuner.gridsearch_tuner | +| NetworkMorphism | tuners | Yes | NetworkMorphismTuner | nni.networkmorphism_tuner.networkmo... | +| MetisTuner | tuners | Yes | MetisTuner | nni.metis_tuner.metis_tuner | +| GPTuner | tuners | Yes | GPTuner | nni.gp_tuner.gp_tuner | +| PBTTuner | tuners | Yes | PBTTuner | nni.pbt_tuner.pbt_tuner | +| SMAC | tuners | No | SMACTuner | nni.smac_tuner.smac_tuner | +| PPOTuner | tuners | No | PPOTuner | nni.ppo_tuner.ppo_tuner | +| Medianstop | assessors | Yes | MedianstopAssessor | nni.medianstop_assessor.medianstop_... | +| Curvefitting | assessors | Yes | CurvefittingAssessor | nni.curvefitting_assessor.curvefitt... | +| Hyperband | advisors | Yes | Hyperband | nni.hyperband_advisor.hyperband_adv... | +| BOHB | advisors | Yes | BOHB | nni.bohb_advisor.bohb_advisor | ++-----------------+------------+-----------+----------------------+------------------------------------------+ +``` + +### Uninstall package + +Run following command to uninstall an installed package: + +`nnictl package uninstall ` + +For example: + +`nnictl package uninstall demotuner` diff --git a/docs/en_US/Tutorial/Nnictl.md b/docs/en_US/Tutorial/Nnictl.md index b0a8b33513..16b8ad734e 100644 --- a/docs/en_US/Tutorial/Nnictl.md +++ b/docs/en_US/Tutorial/Nnictl.md @@ -702,40 +702,108 @@ Debug mode will disable version check function in Trialkeeper. * __nnictl package install__ * Description - Install the packages needed in nni experiments. + Install a package (customized algorithms or nni provided algorithms) as builtin tuner/assessor/advisor. * Usage ```bash - nnictl package install [OPTIONS] + nnictl package install --name + ``` + + The available `` can be checked via `nnictl package list` command. + + or + + ```bash + nnictl package install + ``` + + Reference [Install customized algorithms](InstallCustomizedAlgos.md) to prepare the installation source. + + * Example + + > Install SMAC tuner + + ```bash + nnictl package install --name SMAC + ``` + + > Install a customized tuner + + ```bash + nnictl package install nni/examples/tuners/customized_tuner/dist/demo_tuner-0.1-py3-none-any.whl + ``` + + +* __nnictl package show__ + + * Description + + Show the detailed information of specified packages. + + * Usage + + ```bash + nnictl package show + ``` + + * Example + + ```bash + nnictl package show SMAC + ``` + +* __nnictl package list__ + * Description + + List the installed/all packages. + + * Usage + + ```bash + nnictl package list [OPTIONS] ``` * Options |Name, shorthand|Required|Default|Description| |------|------|------ |------| - |--name| True| |The name of package to be installed| + |--all| False| |List all packages| * Example - > Install the packages needed in tuner SMAC + > List installed packages ```bash - nnictl package install --name=SMAC + nnictl package list ``` -* __nnictl package show__ + > List all packages + + ```bash + nnictl package list --all + ``` + +* __nnictl package uninstall__ * Description - List the packages supported. + Uninstall a package. * Usage ```bash - nnictl package show + nnictl package uninstall ``` + * Example + Uninstall SMAC package + + ```bash + nnictl package uninstall SMAC + ``` + + ![](https://placehold.it/15/1589F0/000000?text=+) `Generate search space` diff --git a/docs/en_US/_templates/index.html b/docs/en_US/_templates/index.html index da2a6f48d7..9ca37b459c 100644 --- a/docs/en_US/_templates/index.html +++ b/docs/en_US/_templates/index.html @@ -214,6 +214,7 @@

NNI capabilities in a glance

diff --git a/docs/en_US/hpo_advanced.rst b/docs/en_US/hpo_advanced.rst index 50b9b236d2..8b85d4edaf 100644 --- a/docs/en_US/hpo_advanced.rst +++ b/docs/en_US/hpo_advanced.rst @@ -8,3 +8,5 @@ Advanced Features Write a New Assessor Write a New Advisor Write a New Training Service + Install Customized Algorithms as Builtin Tuners/Assessors/Advisors + How to install customized tuner as a builtin tuner diff --git a/examples/tuners/customized_tuner/README.md b/examples/tuners/customized_tuner/README.md new file mode 100644 index 0000000000..bde0a06c2e --- /dev/null +++ b/examples/tuners/customized_tuner/README.md @@ -0,0 +1,3 @@ +# How to install this customized tuner as a builtin tuner + +Reference [this document](https://github.com/microsoft/nni/blob/master/docs/en_US/Tuner/InstallCustomizedTuner.md) to install this customized tuner as a builtin tuner. \ No newline at end of file diff --git a/examples/tuners/customized_tuner/demo_tuner/__init__.py b/examples/tuners/customized_tuner/demo_tuner/__init__.py new file mode 100644 index 0000000000..fe22174ff3 --- /dev/null +++ b/examples/tuners/customized_tuner/demo_tuner/__init__.py @@ -0,0 +1 @@ +from .demo_tuner import DemoTuner, MyClassArgsValidator diff --git a/examples/tuners/customized_tuner/demo_tuner/demo_tuner.py b/examples/tuners/customized_tuner/demo_tuner/demo_tuner.py new file mode 100644 index 0000000000..1881d83123 --- /dev/null +++ b/examples/tuners/customized_tuner/demo_tuner/demo_tuner.py @@ -0,0 +1,35 @@ +import random +import numpy as np +from nni.tuner import Tuner +from nni.utils import ClassArgsValidator + +class DemoTuner(Tuner): + def __init__(self, optimize_mode='maximize'): + # optimize_mode is used to demo how to create ClassArgsValidator + self.optimize_mode = optimize_mode + + def update_search_space(self, search_space): + self._space = search_space + + def generate_parameters(self, parameter_id, **kwargs): + params = {} + for k in self._space: + t, v = self._space[k]['_type'], self._space[k]['_value'] + if t == 'choice': + params[k] = random.choice(v) + elif t == 'randint': + params[k] = random.choice(range(v[0], v[1])) + elif t == 'uniform': + params[k] = np.random.uniform(v[0], v[1]) + else: + raise RuntimeError('parameter type {} is supported by DemoTuner!'.format(t)) + return params + + def receive_trial_result(self, parameter_id, parameters, value, **kwargs): + pass + +class MyClassArgsValidator(ClassArgsValidator): + def validate_class_args(self, **kwargs): + if 'optimize_mode' in kwargs: + assert kwargs['optimize_mode'] in ['maximize', 'minimize'], \ + 'optimize_mode {} is invalid!'.format(kwargs['optimize_mode']) diff --git a/examples/tuners/customized_tuner/setup.py b/examples/tuners/customized_tuner/setup.py new file mode 100644 index 0000000000..5a831c7688 --- /dev/null +++ b/examples/tuners/customized_tuner/setup.py @@ -0,0 +1,24 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +import setuptools + +setuptools.setup( + name = 'demo-tuner', + version = '0.1', + packages = setuptools.find_packages(exclude=['*test*']), + + python_requires = '>=3.5', + classifiers = [ + 'Programming Language :: Python :: 3', + 'License :: OSI Approved :: MIT License', + 'Operating System :: ', + 'NNI Package :: tuner :: demotuner :: demo_tuner.DemoTuner :: demo_tuner.MyClassArgsValidator' + ], + + author = 'Microsoft NNI Team', + author_email = 'nni@microsoft.com', + description = 'NNI control for Neural Network Intelligence project', + license = 'MIT', + url = 'https://github.com/Microsoft/nni' +) diff --git a/setup.py b/setup.py index a4be914036..6f5bdda244 100644 --- a/setup.py +++ b/setup.py @@ -41,7 +41,8 @@ def read(fname): 'schema', 'PythonWebHDFS', 'colorama', - 'scikit-learn>=0.20,<0.22' + 'scikit-learn>=0.20,<0.22', + 'pkginfo' ], entry_points = { diff --git a/src/nni_manager/rest_server/restValidationSchemas.ts b/src/nni_manager/rest_server/restValidationSchemas.ts index 773066bfd4..7b620c9537 100644 --- a/src/nni_manager/rest_server/restValidationSchemas.ts +++ b/src/nni_manager/rest_server/restValidationSchemas.ts @@ -169,7 +169,7 @@ export namespace ValidationSchemas { versionCheck: joi.boolean(), logCollection: joi.string(), advisor: joi.object({ - builtinAdvisorName: joi.string().valid('Hyperband', 'BOHB'), + builtinAdvisorName: joi.string(), codeDir: joi.string(), classFileName: joi.string(), className: joi.string(), @@ -178,7 +178,7 @@ export namespace ValidationSchemas { gpuIndices: joi.string() }), tuner: joi.object({ - builtinTunerName: joi.string().valid('TPE', 'Random', 'Anneal', 'Evolution', 'SMAC', 'BatchTuner', 'GridSearch', 'NetworkMorphism', 'MetisTuner', 'GPTuner', 'PPOTuner', 'PBTTuner'), + builtinTunerName: joi.string(), codeDir: joi.string(), classFileName: joi.string(), className: joi.string(), @@ -188,7 +188,7 @@ export namespace ValidationSchemas { gpuIndices: joi.string() }), assessor: joi.object({ - builtinAssessorName: joi.string().valid('Medianstop', 'Curvefitting'), + builtinAssessorName: joi.string(), codeDir: joi.string(), classFileName: joi.string(), className: joi.string(), diff --git a/src/sdk/pynni/nni/__init__.py b/src/sdk/pynni/nni/__init__.py index c7236adc1c..f83650eee1 100644 --- a/src/sdk/pynni/nni/__init__.py +++ b/src/sdk/pynni/nni/__init__.py @@ -4,6 +4,7 @@ __version__ = '999.0.0-developing' from .env_vars import dispatcher_env_vars +from .utils import ClassArgsValidator if dispatcher_env_vars.SDK_PROCESS != 'dispatcher': from .trial import * diff --git a/src/sdk/pynni/nni/__main__.py b/src/sdk/pynni/nni/__main__.py index 2723cf6456..003a2bfe41 100644 --- a/src/sdk/pynni/nni/__main__.py +++ b/src/sdk/pynni/nni/__main__.py @@ -2,16 +2,14 @@ # Licensed under the MIT license. import os -import sys import argparse import logging import json -import importlib import base64 from .common import enable_multi_thread, enable_multi_phase -from .constants import ModuleName, ClassName, ClassArgs, AdvisorModuleName, AdvisorClassName from .msg_dispatcher import MsgDispatcher +from .package_utils import create_builtin_class_instance, create_customized_class_instance logger = logging.getLogger('nni.main') logger.debug('START') @@ -20,49 +18,6 @@ import coverage coverage.process_startup() -def augment_classargs(input_class_args, classname): - if classname in ClassArgs: - for key, value in ClassArgs[classname].items(): - if key not in input_class_args: - input_class_args[key] = value - return input_class_args - - -def create_builtin_class_instance(class_name, class_args, builtin_module_dict, builtin_class_dict): - if class_name not in builtin_module_dict or \ - importlib.util.find_spec(builtin_module_dict[class_name]) is None: - raise RuntimeError('Builtin module is not found: {}'.format(class_name)) - class_module = importlib.import_module(builtin_module_dict[class_name]) - class_constructor = getattr(class_module, builtin_class_dict[class_name]) - - if class_args is None: - class_args = {} - class_args = augment_classargs(class_args, class_name) - instance = class_constructor(**class_args) - - return instance - - -def create_customized_class_instance(class_params): - code_dir = class_params.get('codeDir') - class_filename = class_params.get('classFileName') - class_name = class_params.get('className') - class_args = class_params.get('classArgs') - - if not os.path.isfile(os.path.join(code_dir, class_filename)): - raise ValueError('Class file not found: {}'.format( - os.path.join(code_dir, class_filename))) - sys.path.append(code_dir) - module_name = os.path.splitext(class_filename)[0] - class_module = importlib.import_module(module_name) - class_constructor = getattr(class_module, class_name) - - if class_args is None: - class_args = {} - instance = class_constructor(**class_args) - - return instance - def main(): parser = argparse.ArgumentParser(description='Dispatcher command line parser') @@ -106,11 +61,11 @@ def main(): def _run_advisor(exp_params): - if exp_params.get('advisor').get('builtinAdvisorName') in AdvisorModuleName: + if exp_params.get('advisor').get('builtinAdvisorName'): dispatcher = create_builtin_class_instance( exp_params.get('advisor').get('builtinAdvisorName'), exp_params.get('advisor').get('classArgs'), - AdvisorModuleName, AdvisorClassName) + 'advisors') else: dispatcher = create_customized_class_instance(exp_params.get('advisor')) if dispatcher is None: @@ -123,11 +78,11 @@ def _run_advisor(exp_params): def _create_tuner(exp_params): - if exp_params.get('tuner').get('builtinTunerName') in ModuleName: + if exp_params.get('tuner').get('builtinTunerName'): tuner = create_builtin_class_instance( exp_params.get('tuner').get('builtinTunerName'), exp_params.get('tuner').get('classArgs'), - ModuleName, ClassName) + 'tuners') else: tuner = create_customized_class_instance(exp_params.get('tuner')) if tuner is None: @@ -136,11 +91,11 @@ def _create_tuner(exp_params): def _create_assessor(exp_params): - if exp_params.get('assessor').get('builtinAssessorName') in ModuleName: + if exp_params.get('assessor').get('builtinAssessorName'): assessor = create_builtin_class_instance( exp_params.get('assessor').get('builtinAssessorName'), exp_params.get('assessor').get('classArgs'), - ModuleName, ClassName) + 'assessors') else: assessor = create_customized_class_instance(exp_params.get('assessor')) if assessor is None: diff --git a/src/sdk/pynni/nni/bohb_advisor/bohb_advisor.py b/src/sdk/pynni/nni/bohb_advisor/bohb_advisor.py index fd1c407c6e..40a78ada69 100644 --- a/src/sdk/pynni/nni/bohb_advisor/bohb_advisor.py +++ b/src/sdk/pynni/nni/bohb_advisor/bohb_advisor.py @@ -9,10 +9,11 @@ import math import logging import json_tricks - +from schema import Schema, Optional import ConfigSpace as CS import ConfigSpace.hyperparameters as CSH +from nni import ClassArgsValidator from nni.protocol import CommandType, send from nni.msg_dispatcher_base import MsgDispatcherBase from nni.utils import OptimizeMode, MetricType, extract_scalar_reward @@ -230,6 +231,20 @@ def _record_hyper_configs(self, hyper_configs): self.num_configs_to_run.append(len(hyper_configs)) self.increase_i() +class BOHBClassArgsValidator(ClassArgsValidator): + def validate_class_args(self, **kwargs): + Schema({ + 'optimize_mode': self.choices('optimize_mode', 'maximize', 'minimize'), + Optional('min_budget'): self.range('min_budget', int, 0, 9999), + Optional('max_budget'): self.range('max_budget', int, 0, 9999), + Optional('eta'): self.range('eta', int, 0, 9999), + Optional('min_points_in_model'): self.range('min_points_in_model', int, 0, 9999), + Optional('top_n_percent'): self.range('top_n_percent', int, 1, 99), + Optional('num_samples'): self.range('num_samples', int, 1, 9999), + Optional('random_fraction'): self.range('random_fraction', float, 0, 9999), + Optional('bandwidth_factor'): self.range('bandwidth_factor', float, 0, 9999), + Optional('min_bandwidth'): self.range('min_bandwidth', float, 0, 9999), + }).validate(kwargs) class BOHB(MsgDispatcherBase): """ diff --git a/src/sdk/pynni/nni/constants.py b/src/sdk/pynni/nni/constants.py index 211d320b89..412577571a 100644 --- a/src/sdk/pynni/nni/constants.py +++ b/src/sdk/pynni/nni/constants.py @@ -1,60 +1,86 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. - -ModuleName = { - 'TPE': 'nni.hyperopt_tuner.hyperopt_tuner', - 'Random': 'nni.hyperopt_tuner.hyperopt_tuner', - 'Anneal': 'nni.hyperopt_tuner.hyperopt_tuner', - 'Evolution': 'nni.evolution_tuner.evolution_tuner', - 'SMAC': 'nni.smac_tuner.smac_tuner', - 'BatchTuner': 'nni.batch_tuner.batch_tuner', - 'Medianstop': 'nni.medianstop_assessor.medianstop_assessor', - 'GridSearch': 'nni.gridsearch_tuner.gridsearch_tuner', - 'NetworkMorphism': 'nni.networkmorphism_tuner.networkmorphism_tuner', - 'Curvefitting': 'nni.curvefitting_assessor.curvefitting_assessor', - 'MetisTuner': 'nni.metis_tuner.metis_tuner', - 'GPTuner': 'nni.gp_tuner.gp_tuner', - 'PPOTuner': 'nni.ppo_tuner.ppo_tuner', - 'PBTTuner': 'nni.pbt_tuner.pbt_tuner' -} - -ClassName = { - 'TPE': 'HyperoptTuner', - 'Random': 'HyperoptTuner', - 'Anneal': 'HyperoptTuner', - 'Evolution': 'EvolutionTuner', - 'SMAC': 'SMACTuner', - 'BatchTuner': 'BatchTuner', - 'GridSearch': 'GridSearchTuner', - 'NetworkMorphism':'NetworkMorphismTuner', - 'MetisTuner':'MetisTuner', - 'GPTuner':'GPTuner', - 'PPOTuner': 'PPOTuner', - 'PBTTuner': 'PBTTuner', - - 'Medianstop': 'MedianstopAssessor', - 'Curvefitting': 'CurvefittingAssessor' -} - -ClassArgs = { - 'TPE': { - 'algorithm_name': 'tpe' - }, - 'Random': { - 'algorithm_name': 'random_search' - }, - 'Anneal': { - 'algorithm_name': 'anneal' - } -} - -AdvisorModuleName = { - 'Hyperband': 'nni.hyperband_advisor.hyperband_advisor', - 'BOHB': 'nni.bohb_advisor.bohb_advisor' -} - -AdvisorClassName = { - 'Hyperband': 'Hyperband', - 'BOHB': 'BOHB' +BuiltinAlgorithms = { + 'tuners': [ + { + 'name': 'TPE', + 'class_name': 'nni.hyperopt_tuner.hyperopt_tuner.HyperoptTuner', + 'class_args': { + 'algorithm_name': 'tpe' + }, + 'class_args_validator': 'nni.hyperopt_tuner.hyperopt_tuner.HyperoptClassArgsValidator' + }, + { + 'name': 'Random', + 'class_name': 'nni.hyperopt_tuner.hyperopt_tuner.HyperoptTuner', + 'class_args': { + 'algorithm_name': 'random_search' + }, + 'accept_class_args': False, + 'class_args_validator': 'nni.hyperopt_tuner.hyperopt_tuner.HyperoptClassArgsValidator' + }, + { + 'name': 'Anneal', + 'class_name': 'nni.hyperopt_tuner.hyperopt_tuner.HyperoptTuner', + 'class_args': { + 'algorithm_name': 'anneal' + }, + 'class_args_validator': 'nni.hyperopt_tuner.hyperopt_tuner.HyperoptClassArgsValidator' + }, + { + 'name': 'Evolution', + 'class_name': 'nni.evolution_tuner.evolution_tuner.EvolutionTuner', + 'class_args_validator': 'nni.evolution_tuner.evolution_tuner.EvolutionClassArgsValidator' + }, + { + 'name': 'BatchTuner', + 'class_name': 'nni.batch_tuner.batch_tuner.BatchTuner', + 'accept_class_args': False, + }, + { + 'name': 'GridSearch', + 'class_name': 'nni.gridsearch_tuner.gridsearch_tuner.GridSearchTuner', + 'accept_class_args': False, + }, + { + 'name': 'NetworkMorphism', + 'class_name': 'nni.networkmorphism_tuner.networkmorphism_tuner.NetworkMorphismTuner', + 'class_args_validator': 'nni.networkmorphism_tuner.networkmorphism_tuner.NetworkMorphismClassArgsValidator' + }, + { + 'name': 'MetisTuner', + 'class_name': 'nni.metis_tuner.metis_tuner.MetisTuner', + 'class_args_validator': 'nni.metis_tuner.metis_tuner.MetisClassArgsValidator' + }, + { + 'name': 'GPTuner', + 'class_name': 'nni.gp_tuner.gp_tuner.GPTuner', + 'class_args_validator': 'nni.gp_tuner.gp_tuner.GPClassArgsValidator' + }, + { + 'name': 'PBTTuner', + 'class_name': 'nni.pbt_tuner.pbt_tuner.PBTTuner', + 'class_args_validator': 'nni.pbt_tuner.pbt_tuner.PBTClassArgsValidator' + } + ], + 'assessors': [ + { + 'name': 'Medianstop', + 'class_name': 'nni.medianstop_assessor.medianstop_assessor.MedianstopAssessor', + 'class_args_validator': 'nni.medianstop_assessor.medianstop_assessor.MedianstopClassArgsValidator' + }, + { + 'name': 'Curvefitting', + 'class_name': 'nni.curvefitting_assessor.curvefitting_assessor.CurvefittingAssessor', + 'class_args_validator': 'nni.curvefitting_assessor.curvefitting_assessor.CurvefittingClassArgsValidator' + }, + ], + 'advisors': [ + { + 'name': 'Hyperband', + 'class_name': 'nni.hyperband_advisor.hyperband_advisor.Hyperband', + 'class_args_validator': 'nni.hyperband_advisor.hyperband_advisor.HyperbandClassArgsValidator' + } + ] } diff --git a/src/sdk/pynni/nni/curvefitting_assessor/curvefitting_assessor.py b/src/sdk/pynni/nni/curvefitting_assessor/curvefitting_assessor.py index b05814c4d3..885886e89b 100644 --- a/src/sdk/pynni/nni/curvefitting_assessor/curvefitting_assessor.py +++ b/src/sdk/pynni/nni/curvefitting_assessor/curvefitting_assessor.py @@ -3,12 +3,23 @@ import logging import datetime +from schema import Schema, Optional + +from nni import ClassArgsValidator from nni.assessor import Assessor, AssessResult from nni.utils import extract_scalar_history from .model_factory import CurveModel logger = logging.getLogger('curvefitting_Assessor') +class CurvefittingClassArgsValidator(ClassArgsValidator): + def validate_class_args(self, **kwargs): + Schema({ + 'epoch_num': self.range('epoch_num', int, 0, 9999), + Optional('start_step'): self.range('start_step', int, 0, 9999), + Optional('threshold'): self.range('threshold', float, 0, 9999), + Optional('gap'): self.range('gap', int, 1, 9999), + }).validate(kwargs) class CurvefittingAssessor(Assessor): """CurvefittingAssessor uses learning curve fitting algorithm to predict the learning curve performance in the future. diff --git a/src/sdk/pynni/nni/evolution_tuner/evolution_tuner.py b/src/sdk/pynni/nni/evolution_tuner/evolution_tuner.py index 120d2b5dc1..c00a4ed900 100644 --- a/src/sdk/pynni/nni/evolution_tuner/evolution_tuner.py +++ b/src/sdk/pynni/nni/evolution_tuner/evolution_tuner.py @@ -9,6 +9,9 @@ import random import numpy as np +from schema import Schema, Optional + +from nni import ClassArgsValidator from nni.tuner import Tuner from nni.utils import OptimizeMode, extract_scalar_reward, split_index, json2parameter, json2space @@ -65,6 +68,12 @@ def mutation(self, config=None, info=None, save_dir=None): self.save_dir = save_dir self.info = info +class EvolutionClassArgsValidator(ClassArgsValidator): + def validate_class_args(self, **kwargs): + Schema({ + 'optimize_mode': self.choices('optimize_mode', 'maximize', 'minimize'), + Optional('population_size'): self.range('population_size', int, 0, 99999), + }).validate(kwargs) class EvolutionTuner(Tuner): """ diff --git a/src/sdk/pynni/nni/gp_tuner/gp_tuner.py b/src/sdk/pynni/nni/gp_tuner/gp_tuner.py index d676fef720..c4e6e9a89c 100644 --- a/src/sdk/pynni/nni/gp_tuner/gp_tuner.py +++ b/src/sdk/pynni/nni/gp_tuner/gp_tuner.py @@ -10,10 +10,12 @@ import warnings import logging import numpy as np +from schema import Schema, Optional from sklearn.gaussian_process.kernels import Matern from sklearn.gaussian_process import GaussianProcessRegressor +from nni import ClassArgsValidator from nni.tuner import Tuner from nni.utils import OptimizeMode, extract_scalar_reward @@ -22,6 +24,19 @@ logger = logging.getLogger("GP_Tuner_AutoML") +class GPClassArgsValidator(ClassArgsValidator): + def validate_class_args(self, **kwargs): + Schema({ + Optional('optimize_mode'): self.choices('optimize_mode', 'maximize', 'minimize'), + Optional('utility'): self.choices('utility', 'ei', 'ucb', 'poi'), + Optional('kappa'): float, + Optional('xi'): float, + Optional('nu'): float, + Optional('alpha'): float, + Optional('cold_start_num'): int, + Optional('selection_num_warm_up'): int, + Optional('selection_num_starting_points'): int, + }).validate(kwargs) class GPTuner(Tuner): """ diff --git a/src/sdk/pynni/nni/hyperband_advisor/hyperband_advisor.py b/src/sdk/pynni/nni/hyperband_advisor/hyperband_advisor.py index 69bf3bf514..2b13d8f3c3 100644 --- a/src/sdk/pynni/nni/hyperband_advisor/hyperband_advisor.py +++ b/src/sdk/pynni/nni/hyperband_advisor/hyperband_advisor.py @@ -12,6 +12,9 @@ import json_tricks import numpy as np +from schema import Schema, Optional + +from nni import ClassArgsValidator from nni.common import multi_phase_enabled from nni.msg_dispatcher_base import MsgDispatcherBase from nni.protocol import CommandType, send @@ -249,6 +252,13 @@ def _record_hyper_configs(self, hyper_configs): self.num_configs_to_run.append(len(hyper_configs)) self.increase_i() +class HyperbandClassArgsValidator(ClassArgsValidator): + def validate_class_args(self, **kwargs): + Schema({ + 'optimize_mode': self.choices('optimize_mode', 'maximize', 'minimize'), + Optional('R'): int, + Optional('eta'): int + }).validate(kwargs) class Hyperband(MsgDispatcherBase): """Hyperband inherit from MsgDispatcherBase rather than Tuner, because it integrates both tuner's functions and assessor's functions. diff --git a/src/sdk/pynni/nni/hyperopt_tuner/hyperopt_tuner.py b/src/sdk/pynni/nni/hyperopt_tuner/hyperopt_tuner.py index c7e168191f..fbe9cda13f 100644 --- a/src/sdk/pynni/nni/hyperopt_tuner/hyperopt_tuner.py +++ b/src/sdk/pynni/nni/hyperopt_tuner/hyperopt_tuner.py @@ -10,6 +10,8 @@ import hyperopt as hp import numpy as np +from schema import Optional, Schema +from nni import ClassArgsValidator from nni.tuner import Tuner from nni.utils import NodeType, OptimizeMode, extract_scalar_reward, split_index @@ -178,6 +180,13 @@ def _add_index(in_x, parameter): return parameter return None # note: this is not written by original author, feel free to modify if you think it's incorrect +class HyperoptClassArgsValidator(ClassArgsValidator): + def validate_class_args(self, **kwargs): + Schema({ + Optional('optimize_mode'): self.choices('optimize_mode', 'maximize', 'minimize'), + Optional('parallel_optimize'): bool, + Optional('constant_liar_type'): self.choices('constant_liar_type', 'min', 'max', 'mean') + }).validate(kwargs) class HyperoptTuner(Tuner): """ diff --git a/src/sdk/pynni/nni/medianstop_assessor/medianstop_assessor.py b/src/sdk/pynni/nni/medianstop_assessor/medianstop_assessor.py index 89a1215471..56eb82a3c9 100644 --- a/src/sdk/pynni/nni/medianstop_assessor/medianstop_assessor.py +++ b/src/sdk/pynni/nni/medianstop_assessor/medianstop_assessor.py @@ -2,11 +2,21 @@ # Licensed under the MIT license. import logging +from schema import Schema, Optional + +from nni import ClassArgsValidator from nni.assessor import Assessor, AssessResult from nni.utils import extract_scalar_history logger = logging.getLogger('medianstop_Assessor') +class MedianstopClassArgsValidator(ClassArgsValidator): + def validate_class_args(self, **kwargs): + Schema({ + Optional('optimize_mode'): self.choices('optimize_mode', 'maximize', 'minimize'), + Optional('start_step'): self.range('start_step', int, 0, 9999), + }).validate(kwargs) + class MedianstopAssessor(Assessor): """MedianstopAssessor is The median stopping rule stops a pending trial X at step S if the trial’s best objective value by step S is strictly worse than the median value diff --git a/src/sdk/pynni/nni/metis_tuner/metis_tuner.py b/src/sdk/pynni/nni/metis_tuner/metis_tuner.py index 6efc4d211c..cdd50dbcf3 100644 --- a/src/sdk/pynni/nni/metis_tuner/metis_tuner.py +++ b/src/sdk/pynni/nni/metis_tuner/metis_tuner.py @@ -12,7 +12,9 @@ import warnings from multiprocessing.dummy import Pool as ThreadPool import numpy as np +from schema import Schema, Optional +from nni import ClassArgsValidator import nni.metis_tuner.lib_constraint_summation as lib_constraint_summation import nni.metis_tuner.lib_data as lib_data import nni.metis_tuner.Regression_GMM.CreateModel as gmm_create_model @@ -31,6 +33,15 @@ CONSTRAINT_UPPERBOUND = None CONSTRAINT_PARAMS_IDX = [] +class MetisClassArgsValidator(ClassArgsValidator): + def validate_class_args(self, **kwargs): + Schema({ + Optional('optimize_mode'): self.choices('optimize_mode', 'maximize', 'minimize'), + Optional('no_resampling'): bool, + Optional('no_candidates'): bool, + Optional('selection_num_starting_points'): int, + Optional('cold_start_num'): int, + }).validate(kwargs) class MetisTuner(Tuner): """ diff --git a/src/sdk/pynni/nni/networkmorphism_tuner/networkmorphism_tuner.py b/src/sdk/pynni/nni/networkmorphism_tuner/networkmorphism_tuner.py index 5ea48ef4f9..50706756dd 100644 --- a/src/sdk/pynni/nni/networkmorphism_tuner/networkmorphism_tuner.py +++ b/src/sdk/pynni/nni/networkmorphism_tuner/networkmorphism_tuner.py @@ -7,17 +7,26 @@ import logging import os - +from schema import Optional, Schema from nni.tuner import Tuner from nni.utils import OptimizeMode, extract_scalar_reward from nni.networkmorphism_tuner.bayesian import BayesianOptimizer from nni.networkmorphism_tuner.nn import CnnGenerator, MlpGenerator from nni.networkmorphism_tuner.utils import Constant - from nni.networkmorphism_tuner.graph import graph_to_json, json_to_graph +from nni import ClassArgsValidator logger = logging.getLogger("NetworkMorphism_AutoML") +class NetworkMorphismClassArgsValidator(ClassArgsValidator): + def validate_class_args(self, **kwargs): + Schema({ + Optional('optimize_mode'): self.choices('optimize_mode', 'maximize', 'minimize'), + Optional('task'): self.choices('task', 'cv', 'nlp', 'common'), + Optional('input_width'): int, + Optional('input_channel'): int, + Optional('n_output_node'): int + }).validate(kwargs) class NetworkMorphismTuner(Tuner): """ diff --git a/src/sdk/pynni/nni/package_utils.py b/src/sdk/pynni/nni/package_utils.py new file mode 100644 index 0000000000..67fa961551 --- /dev/null +++ b/src/sdk/pynni/nni/package_utils.py @@ -0,0 +1,373 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +import os +import site +import sys +from collections import defaultdict +from pathlib import Path +import importlib +import ruamel.yaml as yaml + +from .constants import BuiltinAlgorithms + +ALGO_TYPES = ['tuners', 'assessors', 'advisors'] + +def get_all_builtin_names(algo_type): + """Get all valid builtin names, including: + 1. BuiltinAlgorithms which is pre-installed. + 2. User installed packages in /config/installed_packages.yml + + Parameters + ---------- + algo_type: str + can be one of 'tuners', 'assessors' or 'advisors' + + Returns: list of string + ------- + All builtin names of specified type, for example, if algo_type is 'tuners', returns + all builtin tuner names. + """ + assert algo_type in ALGO_TYPES + merged_dict = _get_merged_builtin_dict() + + builtin_names = [x['name'] for x in merged_dict[algo_type]] + return builtin_names + +def get_not_installable_builtin_names(algo_type=None): + """Get builtin names in BuiltinAlgorithms which do not need to be installed + and can be used once NNI is installed. + + Parameters + ---------- + algo_type: str | None + can be one of 'tuners', 'assessors', 'advisors' or None + + Returns: list of string + ------- + All builtin names of specified type, for example, if algo_type is 'tuners', returns + all builtin tuner names. + If algo_type is None, returns all builtin names of all types. + """ + if algo_type is None: + meta = BuiltinAlgorithms + else: + assert algo_type in ALGO_TYPES + meta = { + algo_type: BuiltinAlgorithms[algo_type] + } + names = [] + for t in ALGO_TYPES: + if t in meta: + names.extend([x['name'] for x in meta[t]]) + return names + +def get_builtin_algo_meta(algo_type=None, builtin_name=None): + """ Get meta information of builtin algorithms from: + 1. Pre-installed BuiltinAlgorithms + 2. User installed packages in /config/installed_packages.yml + + Parameters + ---------- + algo_type: str | None + can be one of 'tuners', 'assessors', 'advisors' or None + builtin_name: str | None + builtin name. + + Returns: dict | list of dict | None + ------- + If builtin_name is specified, returns meta information of speicified builtin + alogorithms, for example: + { + 'name': 'Random', + 'class_name': 'nni.hyperopt_tuner.hyperopt_tuner.HyperoptTuner', + 'class_args': { + 'algorithm_name': 'random_search' + }, + 'accept_class_args': False, + 'class_args_validator': 'nni.hyperopt_tuner.hyperopt_tuner.HyperoptClassArgsValidator' + } + If builtin_name is None, returns multiple meta information in a list. + """ + merged_dict = _get_merged_builtin_dict() + + if algo_type is None and builtin_name is None: + return merged_dict + + if algo_type: + assert algo_type in ALGO_TYPES + metas = merged_dict[algo_type] + else: + metas = merged_dict['tuners'] + merged_dict['assessors'] + merged_dict['advisors'] + if builtin_name: + for m in metas: + if m['name'] == builtin_name: + return m + else: + return metas + + return None + +def get_installed_package_meta(algo_type, builtin_name): + """ Get meta information of user installed algorithms from: + /config/installed_packages.yml + + Parameters + ---------- + algo_type: str | None + can be one of 'tuners', 'assessors', 'advisors' or None + builtin_name: str + builtin name. + + Returns: dict | None + ------- + Returns meta information of speicified builtin alogorithms, for example: + { + 'class_args_validator': 'nni.smac_tuner.smac_tuner.SMACClassArgsValidator', + 'class_name': 'nni.smac_tuner.smac_tuner.SMACTuner', + 'name': 'SMAC' + } + """ + assert builtin_name is not None + if algo_type: + assert algo_type in ALGO_TYPES + config = read_installed_package_meta() + + candidates = [] + if algo_type: + candidates = config[algo_type] + else: + for algo_type in ALGO_TYPES: + candidates.extend(config[algo_type]) + for meta in candidates: + if meta['name'] == builtin_name: + return meta + return None + +def _parse_full_class_name(full_class_name): + if not full_class_name: + return None, None + parts = full_class_name.split('.') + module_name, class_name = '.'.join(parts[:-1]), parts[-1] + return module_name, class_name + +def get_builtin_module_class_name(algo_type, builtin_name): + """Get module name and class name of all builtin algorithms + + Parameters + ---------- + algo_type: str + can be one of 'tuners', 'assessors', 'advisors' + builtin_name: str + builtin name. + + Returns: tuple + ------- + tuple of (module name, class name) + """ + assert algo_type in ALGO_TYPES + assert builtin_name is not None + meta = get_builtin_algo_meta(algo_type, builtin_name) + if not meta: + return None, None + return _parse_full_class_name(meta['class_name']) + +def create_validator_instance(algo_type, builtin_name): + """Create instance of validator class + + Parameters + ---------- + algo_type: str + can be one of 'tuners', 'assessors', 'advisors' + builtin_name: str + builtin name. + + Returns: object | None + ------- + Returns validator class instance. + If specified validator class does not exist, returns None. + """ + assert algo_type in ALGO_TYPES + assert builtin_name is not None + meta = get_builtin_algo_meta(algo_type, builtin_name) + if not meta or 'class_args_validator' not in meta: + return None + module_name, class_name = _parse_full_class_name(meta['class_args_validator']) + class_module = importlib.import_module(module_name) + class_constructor = getattr(class_module, class_name) + + return class_constructor() + +def create_builtin_class_instance(builtin_name, input_class_args, algo_type): + """Create instance of builtin algorithms + + Parameters + ---------- + builtin_name: str + builtin name. + input_class_args: dict + kwargs for builtin class constructor + algo_type: str + can be one of 'tuners', 'assessors', 'advisors' + + Returns: object + ------- + Returns builtin class instance. + """ + assert algo_type in ALGO_TYPES + if builtin_name not in get_all_builtin_names(algo_type): + raise RuntimeError('Builtin name is not found: {}'.format(builtin_name)) + + def parse_algo_meta(algo_meta, input_class_args): + """ + 1. parse class_name field in meta data into module name and class name, + for example: + parse class_name 'nni.hyperopt_tuner.hyperopt_tuner.HyperoptTuner' in meta data into: + module name: nni.hyperopt_tuner.hyperopt_tuner + class name: HyperoptTuner + 2. merge user specified class args together with builtin class args. + """ + assert algo_meta + module_name, class_name = _parse_full_class_name(algo_meta['class_name']) + + class_args = {} + if 'class_args' in algo_meta: + class_args = algo_meta['class_args'] + if input_class_args is not None: + class_args.update(input_class_args) + + return module_name, class_name, class_args + + algo_meta = get_builtin_algo_meta(algo_type, builtin_name) + module_name, class_name, class_args = parse_algo_meta(algo_meta, input_class_args) + + if importlib.util.find_spec(module_name) is None: + raise RuntimeError('Builtin module can not be loaded: {}'.format(module_name)) + + class_module = importlib.import_module(module_name) + class_constructor = getattr(class_module, class_name) + + instance = class_constructor(**class_args) + + return instance + +def create_customized_class_instance(class_params): + """Create instance of customized algorithms + + Parameters + ---------- + class_params: dict + class_params should contains following keys: + codeDir: code directory + classFileName: python file name of the class + className: class name + classArgs (optional): kwargs pass to class constructor + Returns: object + ------- + Returns customized class instance. + """ + + code_dir = class_params.get('codeDir') + class_filename = class_params.get('classFileName') + class_name = class_params.get('className') + class_args = class_params.get('classArgs') + + if not os.path.isfile(os.path.join(code_dir, class_filename)): + raise ValueError('Class file not found: {}'.format( + os.path.join(code_dir, class_filename))) + sys.path.append(code_dir) + module_name = os.path.splitext(class_filename)[0] + class_module = importlib.import_module(module_name) + class_constructor = getattr(class_module, class_name) + + if class_args is None: + class_args = {} + instance = class_constructor(**class_args) + + return instance + +def get_python_dir(sitepackages_path): + if sys.platform == "win32": + return str(Path(sitepackages_path)) + else: + return str(Path(sitepackages_path).parents[2]) + +def get_nni_installation_parent_dir(): + ''' Find nni installation parent directory + ''' + def try_installation_path_sequentially(*sitepackages): + '''Try different installation path sequentially util nni is found. + Return None if nothing is found + ''' + def _generate_installation_path(sitepackages_path): + python_dir = get_python_dir(sitepackages_path) + entry_file = os.path.join(python_dir, 'nni', 'main.js') + if os.path.isfile(entry_file): + return python_dir + return None + + for sitepackage in sitepackages: + python_dir = _generate_installation_path(sitepackage) + if python_dir: + return python_dir + return None + + if os.getenv('VIRTUAL_ENV'): + # if 'virtualenv' package is used, `site` has not attr getsitepackages, so we will instead use VIRTUAL_ENV + # Note that conda venv will not have VIRTUAL_ENV + python_dir = os.getenv('VIRTUAL_ENV') + else: + python_sitepackage = site.getsitepackages()[0] + # If system-wide python is used, we will give priority to using `local sitepackage`--"usersitepackages()" given + # that nni exists there + if python_sitepackage.startswith('/usr') or python_sitepackage.startswith('/Library'): + python_dir = try_installation_path_sequentially(site.getusersitepackages(), site.getsitepackages()[0]) + else: + python_dir = try_installation_path_sequentially(site.getsitepackages()[0], site.getusersitepackages()) + + return python_dir + +def get_nni_installation_path(): + ''' Find nni installation directory + ''' + parent_dir = get_nni_installation_parent_dir() + if parent_dir: + entry_file = os.path.join(parent_dir, 'nni', 'main.js') + if os.path.isfile(entry_file): + return os.path.join(parent_dir, 'nni') + return None + +def get_nni_config_dir(): + return os.path.join(get_nni_installation_path(), 'config') + +def get_package_config_path(): + config_dir = get_nni_config_dir() + if not os.path.exists(config_dir): + os.makedirs(config_dir, exist_ok=True) + return os.path.join(config_dir, 'installed_packages.yml') + +def read_installed_package_meta(): + config_file = get_package_config_path() + if os.path.exists(config_file): + with open(config_file, 'r') as f: + config = yaml.load(f, Loader=yaml.Loader) + else: + config = defaultdict(list) + for t in ALGO_TYPES: + if t not in config: + config[t] = [] + return config + +def write_package_meta(config): + config_file = get_package_config_path() + with open(config_file, 'w') as f: + f.write(yaml.dump(dict(config), default_flow_style=False)) + +def _get_merged_builtin_dict(): + def merge_meta_dict(d1, d2): + res = defaultdict(list) + for t in ALGO_TYPES: + res[t] = d1[t] + d2[t] + return res + + return merge_meta_dict(BuiltinAlgorithms, read_installed_package_meta()) diff --git a/src/sdk/pynni/nni/pbt_tuner/pbt_tuner.py b/src/sdk/pynni/nni/pbt_tuner/pbt_tuner.py index f1a0189aad..507c519a2a 100755 --- a/src/sdk/pynni/nni/pbt_tuner/pbt_tuner.py +++ b/src/sdk/pynni/nni/pbt_tuner/pbt_tuner.py @@ -6,8 +6,10 @@ import os import random import numpy as np +from schema import Schema, Optional import nni +from nni import ClassArgsValidator import nni.parameter_expressions from nni.tuner import Tuner from nni.utils import OptimizeMode, extract_scalar_reward, split_index, json2parameter, json2space @@ -157,6 +159,15 @@ def __init__(self, checkpoint_dir=None, hyper_parameters=None, parameter_id=None def clean_id(self): self.parameter_id = None +class PBTClassArgsValidator(ClassArgsValidator): + def validate_class_args(self, **kwargs): + Schema({ + 'optimize_mode': self.choices('optimize_mode', 'maximize', 'minimize'), + Optional('all_checkpoint_dir'): str, + Optional('population_size'): self.range('population_size', int, 0, 99999), + Optional('factors'): float, + Optional('fraction'): float, + }).validate(kwargs) class PBTTuner(Tuner): def __init__(self, optimize_mode="maximize", all_checkpoint_dir=None, population_size=10, factor=0.2, diff --git a/src/sdk/pynni/nni/ppo_tuner/ppo_tuner.py b/src/sdk/pynni/nni/ppo_tuner/ppo_tuner.py index 80166370f7..33b62d600e 100644 --- a/src/sdk/pynni/nni/ppo_tuner/ppo_tuner.py +++ b/src/sdk/pynni/nni/ppo_tuner/ppo_tuner.py @@ -10,8 +10,10 @@ class PPOTuner import logging import numpy as np from gym import spaces +from schema import Schema, Optional import nni +from nni import ClassArgsValidator from nni.tuner import Tuner from nni.utils import OptimizeMode, extract_scalar_reward @@ -285,6 +287,21 @@ def train(self, trials_info, nenvs): mbstates = states[mbenvinds] self.model.train(lrnow, cliprangenow, *slices, mbstates) +class PPOClassArgsValidator(ClassArgsValidator): + def validate_class_args(self, **kwargs): + Schema({ + 'optimize_mode': self.choices('optimize_mode', 'maximize', 'minimize'), + Optional('trials_per_update'): self.range('trials_per_update', int, 0, 99999), + Optional('epochs_per_update'): self.range('epochs_per_update', int, 0, 99999), + Optional('minibatch_size'): self.range('minibatch_size', int, 0, 99999), + Optional('ent_coef'): float, + Optional('lr'): float, + Optional('vf_coef'): float, + Optional('max_grad_norm'): float, + Optional('gamma'): float, + Optional('lam'): float, + Optional('cliprange'): float, + }).validate(kwargs) class PPOTuner(Tuner): """ diff --git a/src/sdk/pynni/nni/smac_tuner/smac_tuner.py b/src/sdk/pynni/nni/smac_tuner/smac_tuner.py index 9683a2b60b..fdaed49d8f 100644 --- a/src/sdk/pynni/nni/smac_tuner/smac_tuner.py +++ b/src/sdk/pynni/nni/smac_tuner/smac_tuner.py @@ -9,6 +9,7 @@ import sys import numpy as np +from schema import Schema, Optional from smac.facade.epils_facade import EPILS from smac.facade.roar_facade import ROAR @@ -19,6 +20,7 @@ from ConfigSpaceNNI import Configuration import nni +from nni import ClassArgsValidator from nni.tuner import Tuner from nni.utils import OptimizeMode, extract_scalar_reward @@ -26,6 +28,13 @@ logger = logging.getLogger('smac_AutoML') +class SMACClassArgsValidator(ClassArgsValidator): + def validate_class_args(self, **kwargs): + Schema({ + 'optimize_mode': self.choices('optimize_mode', 'maximize', 'minimize'), + Optional('config_dedup'): bool + }).validate(kwargs) + class SMACTuner(Tuner): """ This is a wrapper of [SMAC](https://github.com/automl/SMAC3) following NNI tuner interface. diff --git a/src/sdk/pynni/nni/utils.py b/src/sdk/pynni/nni/utils.py index 37a3b6e5a9..0346aec9fc 100644 --- a/src/sdk/pynni/nni/utils.py +++ b/src/sdk/pynni/nni/utils.py @@ -6,6 +6,7 @@ import functools from enum import Enum, unique import json_tricks +from schema import And from . import parameter_expressions from .common import init_logger @@ -217,7 +218,6 @@ def json2parameter(x, is_rand, random_state, oldy=None, Rand=False, name=NodeTyp y = copy.deepcopy(x) return y - def merge_parameter(base_params, override_params): """ Update the parameters in ``base_params`` with ``override_params``. @@ -256,3 +256,64 @@ def merge_parameter(base_params, override_params): (k, type(getattr(base_params, k)), type(v))) setattr(base_params, k, v) return base_params + +class ClassArgsValidator(object): + """ + NNI tuners/assessors/adivisors accept a `classArgs` parameter in experiment configuration file. + This ClassArgsValidator interface is used to validate the classArgs section in exeperiment + configuration file. + """ + def validate_class_args(self, **kwargs): + """ + Validate the classArgs configuration in experiment configuration file. + + Parameters + ---------- + kwargs: dict + kwargs passed to tuner/assessor/advisor constructor + + Raises: + Raise an execption if the kwargs is invalid. + """ + pass + + def choices(self, key, *args): + """ + Utility method to create a scheme to check whether the `key` is one of the `args`. + + Parameters: + ---------- + key: str + key name of the data to be validated + args: list of str + list of the choices + + Returns: Schema + -------- + A scheme to check whether the `key` is one of the `args`. + """ + return And(lambda n: n in args, error='%s should be in [%s]!' % (key, str(args))) + + def range(self, key, keyType, start, end): + """ + Utility method to create a schema to check whether the `key` is in the range of [start, end]. + + Parameters: + ---------- + key: str + key name of the data to be validated + keyType: type + python data type, such as int, float + start: type is specified by keyType + start of the range + end: type is specified by keyType + end of the range + + Returns: Schema + -------- + A scheme to check whether the `key` is in the range of [start, end]. + """ + return And( + And(keyType, error='%s should be %s type!' % (key, keyType.__name__)), + And(lambda n: start <= n <= end, error='%s should be in range of (%s, %s)!' % (key, start, end)) + ) diff --git a/test/scripts/unittest.sh b/test/scripts/unittest.sh index 359f8a31f3..0c03b2ab77 100644 --- a/test/scripts/unittest.sh +++ b/test/scripts/unittest.sh @@ -25,3 +25,9 @@ cd ${CWD}/../src/nni_manager echo "" echo "===========================Testing: nni_manager===========================" npm run test + +## ------Run nnictl unit test------ +echo "" +echo "===========================Testing: nnictl===========================" +cd ${CWD}/../tools/nni_cmd/ +python3 -m unittest discover -v tests diff --git a/tools/nni_cmd/command_utils.py b/tools/nni_cmd/command_utils.py index be2279b443..2bbcc883d1 100644 --- a/tools/nni_cmd/command_utils.py +++ b/tools/nni_cmd/command_utils.py @@ -62,7 +62,7 @@ def install_requirements_command(requirements_path): requirements_path: str Path to the directory that contains `requirements.txt`. """ - call(_get_pip_install() + ["-r", os.path.join(requirements_path, "requirements.txt")], shell=False) + return call(_get_pip_install() + ["-r", requirements_path], shell=False) def _get_pip_install(): @@ -72,3 +72,11 @@ def _get_pip_install(): (sys.platform != "win32" and os.getuid() != 0): # on unix and not running in root ret.append("--user") # not in virtualenv or conda return ret + +def call_pip_install(source): + return call(_get_pip_install() + [source]) + +def call_pip_uninstall(module_name): + python = "python" if sys.platform == "win32" else "python3" + cmd = [python, "-m", "pip", "uninstall", module_name] + return call(cmd) diff --git a/tools/nni_cmd/common_utils.py b/tools/nni_cmd/common_utils.py index 56dfc7af29..4166bf034c 100644 --- a/tools/nni_cmd/common_utils.py +++ b/tools/nni_cmd/common_utils.py @@ -2,14 +2,14 @@ # Licensed under the MIT license. import os -import site import sys import json import socket -from pathlib import Path import ruamel.yaml as yaml import psutil -from .constants import ERROR_INFO, NORMAL_INFO, WARNING_INFO, COLOR_RED_FORMAT, COLOR_YELLOW_FORMAT +from colorama import Fore + +from .constants import ERROR_INFO, NORMAL_INFO, WARNING_INFO def get_yml_content(file_path): '''Load yaml file content''' @@ -34,17 +34,22 @@ def get_json_content(file_path): print_error(err) return None -def print_error(content): + +def print_error(*content): '''Print error information to screen''' - print(COLOR_RED_FORMAT % (ERROR_INFO % content)) + print(Fore.RED + ERROR_INFO + ' '.join([str(c) for c in content]) + Fore.RESET) + +def print_green(*content): + '''Print information to screen in green''' + print(Fore.GREEN + ' '.join([str(c) for c in content]) + Fore.RESET) -def print_normal(content): +def print_normal(*content): '''Print error information to screen''' - print(NORMAL_INFO % content) + print(NORMAL_INFO, *content) -def print_warning(content): +def print_warning(*content): '''Print warning information to screen''' - print(COLOR_YELLOW_FORMAT % (WARNING_INFO % content)) + print(Fore.YELLOW + WARNING_INFO + ' '.join([str(c) for c in content]) + Fore.RESET) def detect_process(pid): '''Detect if a process is alive''' @@ -70,12 +75,6 @@ def get_user(): else: return os.environ['USER'] -def get_python_dir(sitepackages_path): - if sys.platform == "win32": - return str(Path(sitepackages_path)) - else: - return str(Path(sitepackages_path).parents[2]) - def check_tensorboard_version(): try: import tensorboard @@ -84,43 +83,3 @@ def check_tensorboard_version(): print_error('import tensorboard error!') exit(1) -def get_nni_installation_path(): - ''' Find nni lib from the following locations in order - Return nni root directory if it exists - ''' - def try_installation_path_sequentially(*sitepackages): - '''Try different installation path sequentially util nni is found. - Return None if nothing is found - ''' - def _generate_installation_path(sitepackages_path): - python_dir = get_python_dir(sitepackages_path) - entry_file = os.path.join(python_dir, 'nni', 'main.js') - if os.path.isfile(entry_file): - return python_dir - return None - - for sitepackage in sitepackages: - python_dir = _generate_installation_path(sitepackage) - if python_dir: - return python_dir - return None - - if os.getenv('VIRTUAL_ENV'): - # if 'virtualenv' package is used, `site` has not attr getsitepackages, so we will instead use VIRTUAL_ENV - # Note that conda venv will not have VIRTUAL_ENV - python_dir = os.getenv('VIRTUAL_ENV') - else: - python_sitepackage = site.getsitepackages()[0] - # If system-wide python is used, we will give priority to using `local sitepackage`--"usersitepackages()" given - # that nni exists there - if python_sitepackage.startswith('/usr') or python_sitepackage.startswith('/Library'): - python_dir = try_installation_path_sequentially(site.getusersitepackages(), site.getsitepackages()[0]) - else: - python_dir = try_installation_path_sequentially(site.getsitepackages()[0], site.getusersitepackages()) - - if python_dir: - entry_file = os.path.join(python_dir, 'nni', 'main.js') - if os.path.isfile(entry_file): - return os.path.join(python_dir, 'nni') - print_error('Fail to find nni under python library') - exit(1) \ No newline at end of file diff --git a/tools/nni_cmd/config_schema.py b/tools/nni_cmd/config_schema.py index 9aa516beef..27f6e7af13 100644 --- a/tools/nni_cmd/config_schema.py +++ b/tools/nni_cmd/config_schema.py @@ -2,8 +2,12 @@ # Licensed under the MIT license. import os -from schema import Schema, And, Optional, Regex, Or +import json +import netifaces +from schema import Schema, And, Optional, Regex, Or, SchemaError +from nni.package_utils import create_validator_instance, get_all_builtin_names, get_builtin_algo_meta from .constants import SCHEMA_TYPE_ERROR, SCHEMA_RANGE_ERROR, SCHEMA_PATH_ERROR +from .common_utils import get_yml_content, print_warning def setType(key, valueType): @@ -25,6 +29,85 @@ def setPathCheck(key): '''check if path exist''' return And(os.path.exists, error=SCHEMA_PATH_ERROR % key) +class AlgoSchema: + """ + This class is the schema of 'tuner', 'assessor' and 'advisor' sections of experiment configuraion file. + For example: + AlgoSchema('tuner') creates the schema of tuner section. + """ + def __init__(self, algo_type): + """ + Parameters: + ----------- + algo_type: str + One of ['tuner', 'assessor', 'advisor']. + 'tuner': This AlgoSchema class create the schema of tuner section. + 'assessor': This AlgoSchema class create the schema of assessor section. + 'advisor': This AlgoSchema class create the schema of advisor section. + """ + assert algo_type in ['tuner', 'assessor', 'advisor'] + self.algo_type = algo_type + self.algo_schema = { + Optional('codeDir'): setPathCheck('codeDir'), + Optional('classFileName'): setType('classFileName', str), + Optional('className'): setType('className', str), + Optional('classArgs'): dict, + Optional('includeIntermediateResults'): setType('includeIntermediateResults', bool), + Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'), + } + self.builtin_keys = { + 'tuner': 'builtinTunerName', + 'assessor': 'builtinAssessorName', + 'advisor': 'builtinAdvisorName' + } + self.builtin_name_schema = {} + for k, n in self.builtin_keys.items(): + self.builtin_name_schema[k] = {Optional(n): setChoice(n, *get_all_builtin_names(k+'s'))} + + self.customized_keys = set(['codeDir', 'classFileName', 'className']) + + def validate_class_args(self, class_args, algo_type, builtin_name): + if not builtin_name or not class_args: + return + meta = get_builtin_algo_meta(algo_type+'s', builtin_name) + if meta and 'accept_class_args' in meta and meta['accept_class_args'] == False: + raise SchemaError('classArgs is not allowed.') + + validator = create_validator_instance(algo_type+'s', builtin_name) + if validator: + try: + validator.validate_class_args(**class_args) + except Exception as e: + raise SchemaError(str(e)) + + def missing_customized_keys(self, data): + return self.customized_keys - set(data.keys()) + + def validate_extras(self, data, algo_type): + builtin_key = self.builtin_keys[algo_type] + if (builtin_key in data) and (set(data.keys()) & self.customized_keys): + raise SchemaError('{} and {} cannot be specified at the same time.'.format( + builtin_key, set(data.keys()) & self.customized_keys + )) + + if self.missing_customized_keys(data) and builtin_key not in data: + raise SchemaError('Either customized {} ({}) or builtin {} ({}) must be set.'.format( + algo_type, self.customized_keys, algo_type, builtin_key)) + + if not self.missing_customized_keys(data): + class_file_name = os.path.join(data['codeDir'], data['classFileName']) + if not os.path.isfile(class_file_name): + raise SchemaError('classFileName {} not found.'.format(class_file_name)) + + builtin_name = data.get(builtin_key) + class_args = data.get('classArgs') + self.validate_class_args(class_args, algo_type, builtin_name) + + def validate(self, data): + self.algo_schema.update(self.builtin_name_schema[self.algo_type]) + Schema(self.algo_schema).validate(data) + self.validate_extras(data, self.algo_type) + common_schema = { 'authorName': setType('authorName', str), 'experimentName': setType('experimentName', str), @@ -44,196 +127,15 @@ def setPathCheck(key): Optional('logLevel'): setChoice('logLevel', 'trace', 'debug', 'info', 'warning', 'error', 'fatal'), Optional('logCollection'): setChoice('logCollection', 'http', 'none'), 'useAnnotation': setType('useAnnotation', bool), - Optional('tuner'): dict, - Optional('advisor'): dict, - Optional('assessor'): dict, + Optional('tuner'): AlgoSchema('tuner'), + Optional('advisor'): AlgoSchema('advisor'), + Optional('assessor'): AlgoSchema('assessor'), Optional('localConfig'): { Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'), Optional('maxTrialNumPerGpu'): setType('maxTrialNumPerGpu', int), Optional('useActiveGpu'): setType('useActiveGpu', bool) } } -tuner_schema_dict = { - 'Anneal': { - 'builtinTunerName': 'Anneal', - Optional('classArgs'): { - 'optimize_mode': setChoice('optimize_mode', 'maximize', 'minimize'), - }, - Optional('includeIntermediateResults'): setType('includeIntermediateResults', bool), - Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'), - }, - 'SMAC': { - 'builtinTunerName': 'SMAC', - Optional('classArgs'): { - 'optimize_mode': setChoice('optimize_mode', 'maximize', 'minimize'), - Optional('config_dedup'): setType('config_dedup', bool) - }, - Optional('includeIntermediateResults'): setType('includeIntermediateResults', bool), - Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'), - }, - ('Evolution'): { - 'builtinTunerName': setChoice('builtinTunerName', 'Evolution'), - Optional('classArgs'): { - 'optimize_mode': setChoice('optimize_mode', 'maximize', 'minimize'), - Optional('population_size'): setNumberRange('population_size', int, 0, 99999), - }, - Optional('includeIntermediateResults'): setType('includeIntermediateResults', bool), - Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'), - }, - ('BatchTuner', 'GridSearch', 'Random'): { - 'builtinTunerName': setChoice('builtinTunerName', 'BatchTuner', 'GridSearch', 'Random'), - Optional('includeIntermediateResults'): setType('includeIntermediateResults', bool), - Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'), - }, - 'TPE': { - 'builtinTunerName': 'TPE', - Optional('classArgs'): { - Optional('optimize_mode'): setChoice('optimize_mode', 'maximize', 'minimize'), - Optional('parallel_optimize'): setType('parallel_optimize', bool), - Optional('constant_liar_type'): setChoice('constant_liar_type', 'min', 'max', 'mean') - }, - Optional('includeIntermediateResults'): setType('includeIntermediateResults', bool), - Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'), - }, - 'NetworkMorphism': { - 'builtinTunerName': 'NetworkMorphism', - Optional('classArgs'): { - Optional('optimize_mode'): setChoice('optimize_mode', 'maximize', 'minimize'), - Optional('task'): setChoice('task', 'cv', 'nlp', 'common'), - Optional('input_width'): setType('input_width', int), - Optional('input_channel'): setType('input_channel', int), - Optional('n_output_node'): setType('n_output_node', int), - }, - Optional('includeIntermediateResults'): setType('includeIntermediateResults', bool), - Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'), - }, - 'MetisTuner': { - 'builtinTunerName': 'MetisTuner', - Optional('classArgs'): { - Optional('optimize_mode'): setChoice('optimize_mode', 'maximize', 'minimize'), - Optional('no_resampling'): setType('no_resampling', bool), - Optional('no_candidates'): setType('no_candidates', bool), - Optional('selection_num_starting_points'): setType('selection_num_starting_points', int), - Optional('cold_start_num'): setType('cold_start_num', int), - }, - Optional('includeIntermediateResults'): setType('includeIntermediateResults', bool), - Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'), - }, - 'GPTuner': { - 'builtinTunerName': 'GPTuner', - Optional('classArgs'): { - Optional('optimize_mode'): setChoice('optimize_mode', 'maximize', 'minimize'), - Optional('utility'): setChoice('utility', 'ei', 'ucb', 'poi'), - Optional('kappa'): setType('kappa', float), - Optional('xi'): setType('xi', float), - Optional('nu'): setType('nu', float), - Optional('alpha'): setType('alpha', float), - Optional('cold_start_num'): setType('cold_start_num', int), - Optional('selection_num_warm_up'): setType('selection_num_warm_up', int), - Optional('selection_num_starting_points'): setType('selection_num_starting_points', int), - }, - Optional('includeIntermediateResults'): setType('includeIntermediateResults', bool), - Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'), - }, - 'PPOTuner': { - 'builtinTunerName': 'PPOTuner', - 'classArgs': { - 'optimize_mode': setChoice('optimize_mode', 'maximize', 'minimize'), - Optional('trials_per_update'): setNumberRange('trials_per_update', int, 0, 99999), - Optional('epochs_per_update'): setNumberRange('epochs_per_update', int, 0, 99999), - Optional('minibatch_size'): setNumberRange('minibatch_size', int, 0, 99999), - Optional('ent_coef'): setType('ent_coef', float), - Optional('lr'): setType('lr', float), - Optional('vf_coef'): setType('vf_coef', float), - Optional('max_grad_norm'): setType('max_grad_norm', float), - Optional('gamma'): setType('gamma', float), - Optional('lam'): setType('lam', float), - Optional('cliprange'): setType('cliprange', float), - }, - Optional('includeIntermediateResults'): setType('includeIntermediateResults', bool), - Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'), - }, - 'PBTTuner': { - 'builtinTunerName': 'PBTTuner', - 'classArgs': { - 'optimize_mode': setChoice('optimize_mode', 'maximize', 'minimize'), - Optional('all_checkpoint_dir'): setType('all_checkpoint_dir', str), - Optional('population_size'): setNumberRange('population_size', int, 0, 99999), - Optional('factors'): setType('factors', tuple), - Optional('fraction'): setType('fraction', float), - }, - Optional('includeIntermediateResults'): setType('includeIntermediateResults', bool), - Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'), - }, - 'customized': { - 'codeDir': setPathCheck('codeDir'), - 'classFileName': setType('classFileName', str), - 'className': setType('className', str), - Optional('classArgs'): dict, - Optional('includeIntermediateResults'): setType('includeIntermediateResults', bool), - Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'), - } -} - -advisor_schema_dict = { - 'Hyperband':{ - 'builtinAdvisorName': Or('Hyperband'), - 'classArgs': { - 'optimize_mode': setChoice('optimize_mode', 'maximize', 'minimize'), - Optional('R'): setType('R', int), - Optional('eta'): setType('eta', int) - }, - Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'), - }, - 'BOHB':{ - 'builtinAdvisorName': Or('BOHB'), - 'classArgs': { - 'optimize_mode': setChoice('optimize_mode', 'maximize', 'minimize'), - Optional('min_budget'): setNumberRange('min_budget', int, 0, 9999), - Optional('max_budget'): setNumberRange('max_budget', int, 0, 9999), - Optional('eta'):setNumberRange('eta', int, 0, 9999), - Optional('min_points_in_model'): setNumberRange('min_points_in_model', int, 0, 9999), - Optional('top_n_percent'): setNumberRange('top_n_percent', int, 1, 99), - Optional('num_samples'): setNumberRange('num_samples', int, 1, 9999), - Optional('random_fraction'): setNumberRange('random_fraction', float, 0, 9999), - Optional('bandwidth_factor'): setNumberRange('bandwidth_factor', float, 0, 9999), - Optional('min_bandwidth'): setNumberRange('min_bandwidth', float, 0, 9999), - }, - Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'), - }, - 'customized':{ - 'codeDir': setPathCheck('codeDir'), - 'classFileName': setType('classFileName', str), - 'className': setType('className', str), - Optional('classArgs'): dict, - Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'), - } -} - -assessor_schema_dict = { - 'Medianstop': { - 'builtinAssessorName': 'Medianstop', - Optional('classArgs'): { - Optional('optimize_mode'): setChoice('optimize_mode', 'maximize', 'minimize'), - Optional('start_step'): setNumberRange('start_step', int, 0, 9999), - }, - }, - 'Curvefitting': { - 'builtinAssessorName': 'Curvefitting', - Optional('classArgs'): { - 'epoch_num': setNumberRange('epoch_num', int, 0, 9999), - Optional('start_step'): setNumberRange('start_step', int, 0, 9999), - Optional('threshold'): setNumberRange('threshold', float, 0, 9999), - Optional('gap'): setNumberRange('gap', int, 1, 9999), - }, - }, - 'customized': { - 'codeDir': setPathCheck('codeDir'), - 'classFileName': setType('classFileName', str), - 'className': setType('className', str), - Optional('classArgs'): dict, - } -} common_trial_schema = { 'trial':{ @@ -441,7 +343,7 @@ def setPathCheck(key): } machine_list_schema = { - Optional('machineList'):[Or( + 'machineList':[Or( { 'ip': setType('ip', str), Optional('port'): setNumberRange('port', int, 1, 65535), @@ -463,16 +365,129 @@ def setPathCheck(key): })] } -LOCAL_CONFIG_SCHEMA = Schema({**common_schema, **common_trial_schema}) +training_service_schema_dict = { + 'local': Schema({**common_schema, **common_trial_schema}), + 'remote': Schema({**common_schema, **common_trial_schema, **machine_list_schema}), + 'pai': Schema({**common_schema, **pai_trial_schema, **pai_config_schema}), + 'paiYarn': Schema({**common_schema, **pai_yarn_trial_schema, **pai_yarn_config_schema}), + 'kubeflow': Schema({**common_schema, **kubeflow_trial_schema, **kubeflow_config_schema}), + 'frameworkcontroller': Schema({**common_schema, **frameworkcontroller_trial_schema, **frameworkcontroller_config_schema}), + 'dlts': Schema({**common_schema, **dlts_trial_schema, **dlts_config_schema}), +} + +class NNIConfigSchema: + def validate(self, data): + train_service = data['trainingServicePlatform'] + Schema(common_schema['trainingServicePlatform']).validate(train_service) + train_service_schema = training_service_schema_dict[train_service] + train_service_schema.validate(data) + self.validate_extras(data) + + def validate_extras(self, experiment_config): + self.validate_tuner_adivosr_assessor(experiment_config) + self.validate_pai_trial_conifg(experiment_config) + self.validate_kubeflow_operators(experiment_config) + self.validate_eth0_device(experiment_config) + + def validate_tuner_adivosr_assessor(self, experiment_config): + if experiment_config.get('advisor'): + if experiment_config.get('assessor') or experiment_config.get('tuner'): + raise SchemaError('advisor could not be set with assessor or tuner simultaneously!') + self.validate_annotation_content(experiment_config, 'advisor', 'builtinAdvisorName') + else: + if not experiment_config.get('tuner'): + raise SchemaError('Please provide tuner spec!') + self.validate_annotation_content(experiment_config, 'tuner', 'builtinTunerName') + + def validate_search_space_content(self, experiment_config): + '''Validate searchspace content, + if the searchspace file is not json format or its values does not contain _type and _value which must be specified, + it will not be a valid searchspace file''' + try: + search_space_content = json.load(open(experiment_config.get('searchSpacePath'), 'r')) + for value in search_space_content.values(): + if not value.get('_type') or not value.get('_value'): + raise SchemaError('please use _type and _value to specify searchspace!') + except Exception as e: + raise SchemaError('searchspace file is not a valid json format! ' + str(e)) -REMOTE_CONFIG_SCHEMA = Schema({**common_schema, **common_trial_schema, **machine_list_schema}) + def validate_kubeflow_operators(self, experiment_config): + '''Validate whether the kubeflow operators are valid''' + if experiment_config.get('kubeflowConfig'): + if experiment_config.get('kubeflowConfig').get('operator') == 'tf-operator': + if experiment_config.get('trial').get('master') is not None: + raise SchemaError('kubeflow with tf-operator can not set master') + if experiment_config.get('trial').get('worker') is None: + raise SchemaError('kubeflow with tf-operator must set worker') + elif experiment_config.get('kubeflowConfig').get('operator') == 'pytorch-operator': + if experiment_config.get('trial').get('ps') is not None: + raise SchemaError('kubeflow with pytorch-operator can not set ps') + if experiment_config.get('trial').get('master') is None: + raise SchemaError('kubeflow with pytorch-operator must set master') -PAI_CONFIG_SCHEMA = Schema({**common_schema, **pai_trial_schema, **pai_config_schema}) + if experiment_config.get('kubeflowConfig').get('storage') == 'nfs': + if experiment_config.get('kubeflowConfig').get('nfs') is None: + raise SchemaError('please set nfs configuration!') + elif experiment_config.get('kubeflowConfig').get('storage') == 'azureStorage': + if experiment_config.get('kubeflowConfig').get('azureStorage') is None: + raise SchemaError('please set azureStorage configuration!') + elif experiment_config.get('kubeflowConfig').get('storage') is None: + if experiment_config.get('kubeflowConfig').get('azureStorage'): + raise SchemaError('please set storage type!') -PAI_YARN_CONFIG_SCHEMA = Schema({**common_schema, **pai_yarn_trial_schema, **pai_yarn_config_schema}) + def validate_annotation_content(self, experiment_config, spec_key, builtin_name): + ''' + Valid whether useAnnotation and searchSpacePath is coexist + spec_key: 'advisor' or 'tuner' + builtin_name: 'builtinAdvisorName' or 'builtinTunerName' + ''' + if experiment_config.get('useAnnotation'): + if experiment_config.get('searchSpacePath'): + raise SchemaError('If you set useAnnotation=true, please leave searchSpacePath empty') + else: + # validate searchSpaceFile + if experiment_config[spec_key].get(builtin_name) == 'NetworkMorphism': + return + if experiment_config[spec_key].get(builtin_name): + if experiment_config.get('searchSpacePath') is None: + raise SchemaError('Please set searchSpacePath!') + self.validate_search_space_content(experiment_config) -DLTS_CONFIG_SCHEMA = Schema({**common_schema, **dlts_trial_schema, **dlts_config_schema}) + def validate_pai_config_path(self, experiment_config): + '''validate paiConfigPath field''' + if experiment_config.get('trainingServicePlatform') == 'pai': + if experiment_config.get('trial', {}).get('paiConfigPath'): + # validate commands + pai_config = get_yml_content(experiment_config['trial']['paiConfigPath']) + taskRoles_dict = pai_config.get('taskRoles') + if not taskRoles_dict: + raise SchemaError('Please set taskRoles in paiConfigPath config file!') + else: + pai_trial_fields_required_list = ['image', 'gpuNum', 'cpuNum', 'memoryMB', 'paiStoragePlugin', 'command'] + for trial_field in pai_trial_fields_required_list: + if experiment_config['trial'].get(trial_field) is None: + raise SchemaError('Please set {0} in trial configuration,\ + or set additional pai configuration file path in paiConfigPath!'.format(trial_field)) -KUBEFLOW_CONFIG_SCHEMA = Schema({**common_schema, **kubeflow_trial_schema, **kubeflow_config_schema}) + def validate_pai_trial_conifg(self, experiment_config): + '''validate the trial config in pai platform''' + if experiment_config.get('trainingServicePlatform') in ['pai', 'paiYarn']: + if experiment_config.get('trial').get('shmMB') and \ + experiment_config['trial']['shmMB'] > experiment_config['trial']['memoryMB']: + raise SchemaError('shmMB should be no more than memoryMB!') + #backward compatibility + warning_information = '{0} is not supported in NNI anymore, please remove the field in config file!\ + please refer https://github.com/microsoft/nni/blob/master/docs/en_US/TrainingService/PaiMode.md#run-an-experiment\ + for the practices of how to get data and output model in trial code' + if experiment_config.get('trial').get('dataDir'): + print_warning(warning_information.format('dataDir')) + if experiment_config.get('trial').get('outputDir'): + print_warning(warning_information.format('outputDir')) + self.validate_pai_config_path(experiment_config) -FRAMEWORKCONTROLLER_CONFIG_SCHEMA = Schema({**common_schema, **frameworkcontroller_trial_schema, **frameworkcontroller_config_schema}) + def validate_eth0_device(self, experiment_config): + '''validate whether the machine has eth0 device''' + if experiment_config.get('trainingServicePlatform') not in ['local'] \ + and not experiment_config.get('nniManagerIp') \ + and 'eth0' not in netifaces.interfaces(): + raise SchemaError('This machine does not contain eth0 network device, please set nniManagerIp in config file!') diff --git a/tools/nni_cmd/constants.py b/tools/nni_cmd/constants.py index 32889c87e6..5a37c3a1f1 100644 --- a/tools/nni_cmd/constants.py +++ b/tools/nni_cmd/constants.py @@ -6,14 +6,11 @@ NNICTL_HOME_DIR = os.path.join(os.path.expanduser('~'), '.local', 'nnictl') -ERROR_INFO = 'ERROR: %s' - -NORMAL_INFO = 'INFO: %s' - -WARNING_INFO = 'WARNING: %s' +ERROR_INFO = 'ERROR: ' +NORMAL_INFO = 'INFO: ' +WARNING_INFO = 'WARNING: ' DEFAULT_REST_PORT = 8080 - REST_TIME_OUT = 20 EXPERIMENT_SUCCESS_INFO = Fore.GREEN + 'Successfully started experiment!\n' + Fore.RESET + \ @@ -62,10 +59,25 @@ TRIAL_MONITOR_TAIL = '-------------------------------------------------------------------------------------\n\n\n' -PACKAGE_REQUIREMENTS = { - 'SMAC': 'smac_tuner', - 'BOHB': 'bohb_advisor', - 'PPOTuner': 'ppo_tuner' +INSTALLABLE_PACKAGE_META = { + 'SMAC': { + 'type': 'tuner', + 'class_name': 'nni.smac_tuner.smac_tuner.SMACTuner', + 'code_sub_dir': 'smac_tuner', + 'class_args_validator': 'nni.smac_tuner.smac_tuner.SMACClassArgsValidator' + }, + 'BOHB': { + 'type': 'advisor', + 'class_name': 'nni.bohb_advisor.bohb_advisor.BOHB', + 'code_sub_dir': 'bohb_advisor', + 'class_args_validator': 'nni.bohb_advisor.bohb_advisor.BOHBClassArgsValidator' + }, + 'PPOTuner': { + 'type': 'tuner', + 'class_name': 'nni.ppo_tuner.ppo_tuner.PPOTuner', + 'code_sub_dir': 'ppo_tuner', + 'class_args_validator': 'nni.ppo_tuner.ppo_tuner.PPOClassArgsValidator' + } } TUNERS_SUPPORTING_IMPORT_DATA = { @@ -83,14 +95,6 @@ 'Hyperband' } -COLOR_RED_FORMAT = Fore.RED + '%s' - -COLOR_GREEN_FORMAT = Fore.GREEN + '%s' - -COLOR_YELLOW_FORMAT = Fore.YELLOW + '%s' - SCHEMA_TYPE_ERROR = '%s should be %s type!' - SCHEMA_RANGE_ERROR = '%s should be in range of %s!' - SCHEMA_PATH_ERROR = '%s path not exist!' diff --git a/tools/nni_cmd/launcher.py b/tools/nni_cmd/launcher.py index 6ea6eb348f..cd9a89a992 100644 --- a/tools/nni_cmd/launcher.py +++ b/tools/nni_cmd/launcher.py @@ -10,14 +10,15 @@ import tempfile from subprocess import Popen, check_call, CalledProcessError, PIPE, STDOUT from nni_annotation import expand_annotations, generate_search_space -from nni.constants import ModuleName, AdvisorModuleName +from nni.package_utils import get_builtin_module_class_name, get_nni_installation_path from .launcher_utils import validate_all_content from .rest_utils import rest_put, rest_post, check_rest_server, check_response from .url_utils import cluster_metadata_url, experiment_url, get_local_urls from .config_utils import Config, Experiments from .common_utils import get_yml_content, get_json_content, print_error, print_normal, \ - detect_port, get_user, get_nni_installation_path -from .constants import NNICTL_HOME_DIR, ERROR_INFO, REST_TIME_OUT, EXPERIMENT_SUCCESS_INFO, LOG_HEADER, PACKAGE_REQUIREMENTS + detect_port, get_user + +from .constants import NNICTL_HOME_DIR, ERROR_INFO, REST_TIME_OUT, EXPERIMENT_SUCCESS_INFO, LOG_HEADER, INSTALLABLE_PACKAGE_META from .command_utils import check_output_command, kill_command from .nnictl_utils import update_experiment @@ -52,6 +53,9 @@ def start_rest_server(port, platform, mode, config_file_name, foreground=False, print_normal('Starting restful server...') entry_dir = get_nni_installation_path() + if (not entry_dir) or (not os.path.exists(entry_dir)): + print_error('Fail to find nni under python library') + exit(1) entry_file = os.path.join(entry_dir, 'main.js') node_command = 'node' @@ -390,10 +394,10 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen package_name, module_name = None, None if experiment_config.get('tuner') and experiment_config['tuner'].get('builtinTunerName'): package_name = experiment_config['tuner']['builtinTunerName'] - module_name = ModuleName.get(package_name) + module_name, _ = get_builtin_module_class_name('tuners', package_name) elif experiment_config.get('advisor') and experiment_config['advisor'].get('builtinAdvisorName'): package_name = experiment_config['advisor']['builtinAdvisorName'] - module_name = AdvisorModuleName.get(package_name) + module_name, _ = get_builtin_module_class_name('advisors', package_name) if package_name and module_name: try: stdout_full_path, stderr_full_path = get_log_path(config_file_name) @@ -402,7 +406,7 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen except CalledProcessError: print_error('some errors happen when import package %s.' %(package_name)) print_log_content(config_file_name) - if package_name in PACKAGE_REQUIREMENTS: + if package_name in INSTALLABLE_PACKAGE_META: print_error('If %s is not installed, it should be installed through '\ '\'nnictl package install --name %s\''%(package_name, package_name)) exit(1) @@ -502,7 +506,11 @@ def create_experiment(args): print_error('Please set correct config path!') exit(1) experiment_config = get_yml_content(config_path) - validate_all_content(experiment_config, config_path) + try: + validate_all_content(experiment_config, config_path) + except Exception as e: + print_error(e) + exit(1) nni_config.set_config('experimentConfig', experiment_config) nni_config.set_config('restServerPort', args.port) diff --git a/tools/nni_cmd/launcher_utils.py b/tools/nni_cmd/launcher_utils.py index 8be9a8bf0e..48dd2779a3 100644 --- a/tools/nni_cmd/launcher_utils.py +++ b/tools/nni_cmd/launcher_utils.py @@ -2,14 +2,9 @@ # Licensed under the MIT license. import os -import json -import netifaces from schema import SchemaError -from schema import Schema -from .config_schema import LOCAL_CONFIG_SCHEMA, REMOTE_CONFIG_SCHEMA, PAI_CONFIG_SCHEMA, PAI_YARN_CONFIG_SCHEMA, \ - DLTS_CONFIG_SCHEMA, KUBEFLOW_CONFIG_SCHEMA, FRAMEWORKCONTROLLER_CONFIG_SCHEMA, \ - tuner_schema_dict, advisor_schema_dict, assessor_schema_dict -from .common_utils import print_error, print_warning, print_normal, get_yml_content +from .config_schema import NNIConfigSchema +from .common_utils import print_normal def expand_path(experiment_config, key): '''Change '~' to user home directory''' @@ -27,12 +22,10 @@ def parse_time(time): '''Change the time to seconds''' unit = time[-1] if unit not in ['s', 'm', 'h', 'd']: - print_error('the unit of time could only from {s, m, h, d}') - exit(1) + raise SchemaError('the unit of time could only from {s, m, h, d}') time = time[:-1] if not time.isdigit(): - print_error('time format error!') - exit(1) + raise SchemaError('time format error!') parse_dict = {'s':1, 'm':60, 'h':3600, 'd':86400} return int(time) * parse_dict[unit] @@ -101,100 +94,7 @@ def parse_path(experiment_config, config_path): if experiment_config['trial'].get('paiConfigPath'): parse_relative_path(root_path, experiment_config['trial'], 'paiConfigPath') -def validate_search_space_content(experiment_config): - '''Validate searchspace content, - if the searchspace file is not json format or its values does not contain _type and _value which must be specified, - it will not be a valid searchspace file''' - try: - search_space_content = json.load(open(experiment_config.get('searchSpacePath'), 'r')) - for value in search_space_content.values(): - if not value.get('_type') or not value.get('_value'): - print_error('please use _type and _value to specify searchspace!') - exit(1) - except: - print_error('searchspace file is not a valid json format!') - exit(1) - -def validate_kubeflow_operators(experiment_config): - '''Validate whether the kubeflow operators are valid''' - if experiment_config.get('kubeflowConfig'): - if experiment_config.get('kubeflowConfig').get('operator') == 'tf-operator': - if experiment_config.get('trial').get('master') is not None: - print_error('kubeflow with tf-operator can not set master') - exit(1) - if experiment_config.get('trial').get('worker') is None: - print_error('kubeflow with tf-operator must set worker') - exit(1) - elif experiment_config.get('kubeflowConfig').get('operator') == 'pytorch-operator': - if experiment_config.get('trial').get('ps') is not None: - print_error('kubeflow with pytorch-operator can not set ps') - exit(1) - if experiment_config.get('trial').get('master') is None: - print_error('kubeflow with pytorch-operator must set master') - exit(1) - - if experiment_config.get('kubeflowConfig').get('storage') == 'nfs': - if experiment_config.get('kubeflowConfig').get('nfs') is None: - print_error('please set nfs configuration!') - exit(1) - elif experiment_config.get('kubeflowConfig').get('storage') == 'azureStorage': - if experiment_config.get('kubeflowConfig').get('azureStorage') is None: - print_error('please set azureStorage configuration!') - exit(1) - elif experiment_config.get('kubeflowConfig').get('storage') is None: - if experiment_config.get('kubeflowConfig').get('azureStorage'): - print_error('please set storage type!') - exit(1) - -def validate_common_content(experiment_config): - '''Validate whether the common values in experiment_config is valid''' - if not experiment_config.get('trainingServicePlatform') or \ - experiment_config.get('trainingServicePlatform') not in [ - 'local', 'remote', 'pai', 'kubeflow', 'frameworkcontroller', 'paiYarn', 'dlts' - ]: - print_error('Please set correct trainingServicePlatform!') - exit(1) - schema_dict = { - 'local': LOCAL_CONFIG_SCHEMA, - 'remote': REMOTE_CONFIG_SCHEMA, - 'pai': PAI_CONFIG_SCHEMA, - 'paiYarn': PAI_YARN_CONFIG_SCHEMA, - 'kubeflow': KUBEFLOW_CONFIG_SCHEMA, - 'frameworkcontroller': FRAMEWORKCONTROLLER_CONFIG_SCHEMA, - 'dlts': DLTS_CONFIG_SCHEMA, - } - separate_schema_dict = { - 'tuner': tuner_schema_dict, - 'advisor': advisor_schema_dict, - 'assessor': assessor_schema_dict - } - separate_builtInName_dict = { - 'tuner': 'builtinTunerName', - 'advisor': 'builtinAdvisorName', - 'assessor': 'builtinAssessorName' - } - try: - schema_dict.get(experiment_config['trainingServicePlatform']).validate(experiment_config) - for separate_key in separate_schema_dict.keys(): - if experiment_config.get(separate_key): - if experiment_config[separate_key].get(separate_builtInName_dict[separate_key]): - validate = False - for key in separate_schema_dict[separate_key].keys(): - if key.__contains__(experiment_config[separate_key][separate_builtInName_dict[separate_key]]): - Schema({**separate_schema_dict[separate_key][key]}).validate(experiment_config[separate_key]) - validate = True - break - if not validate: - print_error('%s %s error!' % (separate_key, separate_builtInName_dict[separate_key])) - exit(1) - else: - Schema({**separate_schema_dict[separate_key]['customized']}).validate(experiment_config[separate_key]) - except SchemaError as error: - print_error('Your config file is not correct, please check your config file content!') - print_error(error.code) - exit(1) - - #set default value +def set_default_values(experiment_config): if experiment_config.get('maxExecDuration') is None: experiment_config['maxExecDuration'] = '999d' if experiment_config.get('maxTrialNum') is None: @@ -204,124 +104,11 @@ def validate_common_content(experiment_config): if experiment_config['machineList'][index].get('port') is None: experiment_config['machineList'][index]['port'] = 22 -def validate_customized_file(experiment_config, spec_key): - ''' - check whether the file of customized tuner/assessor/advisor exists - spec_key: 'tuner', 'assessor', 'advisor' - ''' - if experiment_config[spec_key].get('codeDir') and \ - experiment_config[spec_key].get('classFileName') and \ - experiment_config[spec_key].get('className'): - if not os.path.exists(os.path.join( - experiment_config[spec_key]['codeDir'], - experiment_config[spec_key]['classFileName'])): - print_error('%s file directory is not valid!'%(spec_key)) - exit(1) - else: - print_error('%s file directory is not valid!'%(spec_key)) - exit(1) - -def parse_tuner_content(experiment_config): - '''Validate whether tuner in experiment_config is valid''' - if not experiment_config['tuner'].get('builtinTunerName'): - validate_customized_file(experiment_config, 'tuner') - -def parse_assessor_content(experiment_config): - '''Validate whether assessor in experiment_config is valid''' - if experiment_config.get('assessor'): - if not experiment_config['assessor'].get('builtinAssessorName'): - validate_customized_file(experiment_config, 'assessor') - -def parse_advisor_content(experiment_config): - '''Validate whether advisor in experiment_config is valid''' - if not experiment_config['advisor'].get('builtinAdvisorName'): - validate_customized_file(experiment_config, 'advisor') - -def validate_annotation_content(experiment_config, spec_key, builtin_name): - ''' - Valid whether useAnnotation and searchSpacePath is coexist - spec_key: 'advisor' or 'tuner' - builtin_name: 'builtinAdvisorName' or 'builtinTunerName' - ''' - if experiment_config.get('useAnnotation'): - if experiment_config.get('searchSpacePath'): - print_error('If you set useAnnotation=true, please leave searchSpacePath empty') - exit(1) - else: - # validate searchSpaceFile - if experiment_config[spec_key].get(builtin_name) == 'NetworkMorphism': - return - if experiment_config[spec_key].get(builtin_name): - if experiment_config.get('searchSpacePath') is None: - print_error('Please set searchSpacePath!') - exit(1) - validate_search_space_content(experiment_config) - -def validate_machine_list(experiment_config): - '''Validate machine list''' - if experiment_config.get('trainingServicePlatform') == 'remote' and experiment_config.get('machineList') is None: - print_error('Please set machineList!') - exit(1) - -def validate_pai_config_path(experiment_config): - '''validate paiConfigPath field''' - if experiment_config.get('trainingServicePlatform') == 'pai': - if experiment_config.get('trial', {}).get('paiConfigPath'): - # validate commands - pai_config = get_yml_content(experiment_config['trial']['paiConfigPath']) - taskRoles_dict = pai_config.get('taskRoles') - if not taskRoles_dict: - print_error('Please set taskRoles in paiConfigPath config file!') - exit(1) - else: - pai_trial_fields_required_list = ['image', 'gpuNum', 'cpuNum', 'memoryMB', 'paiStorageConfigName', 'command'] - for trial_field in pai_trial_fields_required_list: - if experiment_config['trial'].get(trial_field) is None: - print_error('Please set {0} in trial configuration,\ - or set additional pai configuration file path in paiConfigPath!'.format(trial_field)) - exit(1) - -def validate_pai_trial_conifg(experiment_config): - '''validate the trial config in pai platform''' - if experiment_config.get('trainingServicePlatform') in ['pai', 'paiYarn']: - if experiment_config.get('trial').get('shmMB') and \ - experiment_config['trial']['shmMB'] > experiment_config['trial']['memoryMB']: - print_error('shmMB should be no more than memoryMB!') - exit(1) - #backward compatibility - warning_information = '{0} is not supported in NNI anymore, please remove the field in config file!\ - please refer https://github.com/microsoft/nni/blob/master/docs/en_US/TrainingService/PaiMode.md#run-an-experiment\ - for the practices of how to get data and output model in trial code' - if experiment_config.get('trial').get('dataDir'): - print_warning(warning_information.format('dataDir')) - if experiment_config.get('trial').get('outputDir'): - print_warning(warning_information.format('outputDir')) - validate_pai_config_path(experiment_config) - -def validate_eth0_device(experiment_config): - '''validate whether the machine has eth0 device''' - if experiment_config.get('trainingServicePlatform') not in ['local'] \ - and not experiment_config.get('nniManagerIp') \ - and 'eth0' not in netifaces.interfaces(): - print_error('This machine does not contain eth0 network device, please set nniManagerIp in config file!') - exit(1) - def validate_all_content(experiment_config, config_path): '''Validate whether experiment_config is valid''' parse_path(experiment_config, config_path) - validate_common_content(experiment_config) - validate_eth0_device(experiment_config) - validate_pai_trial_conifg(experiment_config) + set_default_values(experiment_config) + + NNIConfigSchema().validate(experiment_config) + experiment_config['maxExecDuration'] = parse_time(experiment_config['maxExecDuration']) - if experiment_config.get('advisor'): - if experiment_config.get('assessor') or experiment_config.get('tuner'): - print_error('advisor could not be set with assessor or tuner simultaneously!') - exit(1) - parse_advisor_content(experiment_config) - validate_annotation_content(experiment_config, 'advisor', 'builtinAdvisorName') - else: - if not experiment_config.get('tuner'): - raise Exception('Please provide tuner spec!') - parse_tuner_content(experiment_config) - parse_assessor_content(experiment_config) - validate_annotation_content(experiment_config, 'tuner', 'builtinTunerName') diff --git a/tools/nni_cmd/nnictl.py b/tools/nni_cmd/nnictl.py index 030c9f9352..6a2991fe50 100644 --- a/tools/nni_cmd/nnictl.py +++ b/tools/nni_cmd/nnictl.py @@ -12,7 +12,7 @@ log_trial, experiment_clean, platform_clean, experiment_list, \ monitor_experiment, export_trials_data, trial_codegen, webui_url, \ get_config, log_stdout, log_stderr, search_space_auto_gen, webui_nas -from .package_management import package_install, package_show +from .package_management import package_install, package_uninstall, package_show, package_list from .constants import DEFAULT_REST_PORT from .tensorboard_utils import start_tensorboard, stop_tensorboard init(autoreset=True) @@ -196,11 +196,22 @@ def parse_args(): # add subparsers for parser_package parser_package_subparsers = parser_package.add_subparsers() parser_package_install = parser_package_subparsers.add_parser('install', help='install packages') - parser_package_install.add_argument('--name', '-n', dest='name', help='package name to be installed') + parser_package_install.add_argument('source', nargs='?', help='installation source, can be a directory or whl file') + parser_package_install.add_argument('--name', '-n', dest='name', help='package name to be installed', required=False) parser_package_install.set_defaults(func=package_install) + + parser_package_uninstall = parser_package_subparsers.add_parser('uninstall', help='uninstall packages') + parser_package_uninstall.add_argument('name', nargs=1, help='package name to be uninstalled') + parser_package_uninstall.set_defaults(func=package_uninstall) + parser_package_show = parser_package_subparsers.add_parser('show', help='show the information of packages') + parser_package_show.add_argument('name', nargs=1, help='builtin name of the package') parser_package_show.set_defaults(func=package_show) + parser_package_list = parser_package_subparsers.add_parser('list', help='list installed packages') + parser_package_list.add_argument('--all', action='store_true', help='list all builtin packages') + parser_package_list.set_defaults(func=package_list) + #parse tensorboard command parser_tensorboard = subparsers.add_parser('tensorboard', help='manage tensorboard') parser_tensorboard_subparsers = parser_tensorboard.add_subparsers() diff --git a/tools/nni_cmd/nnictl_utils.py b/tools/nni_cmd/nnictl_utils.py index 7bccc1085a..bbbf54fcc6 100644 --- a/tools/nni_cmd/nnictl_utils.py +++ b/tools/nni_cmd/nnictl_utils.py @@ -13,13 +13,14 @@ from pathlib import Path from subprocess import Popen from pyhdfs import HdfsClient +from nni.package_utils import get_nni_installation_path from nni_annotation import expand_annotations from .rest_utils import rest_get, rest_delete, check_rest_server_quick, check_response from .url_utils import trial_jobs_url, experiment_url, trial_job_id_url, export_data_url from .config_utils import Config, Experiments from .constants import NNICTL_HOME_DIR, EXPERIMENT_INFORMATION_FORMAT, EXPERIMENT_DETAIL_FORMAT, \ EXPERIMENT_MONITOR_INFO, TRIAL_MONITOR_HEAD, TRIAL_MONITOR_CONTENT, TRIAL_MONITOR_TAIL, REST_TIME_OUT -from .common_utils import print_normal, print_error, print_warning, detect_process, get_yml_content, get_nni_installation_path +from .common_utils import print_normal, print_error, print_warning, detect_process, get_yml_content from .command_utils import check_output_command, kill_command from .ssh_utils import create_ssh_sftp_client, remove_remote_directory diff --git a/tools/nni_cmd/package_management.py b/tools/nni_cmd/package_management.py index 604690a10e..942665b97a 100644 --- a/tools/nni_cmd/package_management.py +++ b/tools/nni_cmd/package_management.py @@ -2,23 +2,183 @@ # Licensed under the MIT license. import os +from collections import defaultdict +import json +import pkginfo import nni -from .constants import PACKAGE_REQUIREMENTS -from .common_utils import print_error -from .command_utils import install_requirements_command +from nni.package_utils import read_installed_package_meta, get_installed_package_meta, \ + write_package_meta, get_builtin_algo_meta, get_not_installable_builtin_names, ALGO_TYPES -def process_install(package_name): - if PACKAGE_REQUIREMENTS.get(package_name) is None: - print_error('{0} is not supported!' % package_name) - else: - requirements_path = os.path.join(nni.__path__[0], PACKAGE_REQUIREMENTS[package_name]) - install_requirements_command(requirements_path) +from .constants import INSTALLABLE_PACKAGE_META +from .common_utils import print_error, print_green +from .command_utils import install_requirements_command, call_pip_install, call_pip_uninstall + +PACKAGE_TYPES = ['tuner', 'assessor', 'advisor'] + +def install_by_name(package_name): + if package_name not in INSTALLABLE_PACKAGE_META: + raise RuntimeError('{} is not found in installable packages!'.format(package_name)) + + requirements_path = os.path.join(nni.__path__[0], INSTALLABLE_PACKAGE_META[package_name]['code_sub_dir'], 'requirements.txt') + assert os.path.exists(requirements_path) + + return install_requirements_command(requirements_path) def package_install(args): '''install packages''' - process_install(args.name) + installed = False + try: + if args.name: + if install_by_name(args.name) == 0: + package_meta = {} + package_meta['type'] = INSTALLABLE_PACKAGE_META[args.name]['type'] + package_meta['name'] = args.name + package_meta['class_name'] = INSTALLABLE_PACKAGE_META[args.name]['class_name'] + package_meta['class_args_validator'] = INSTALLABLE_PACKAGE_META[args.name]['class_args_validator'] + save_package_meta_data(package_meta) + print_green('{} installed!'.format(args.name)) + installed = True + else: + package_meta = get_nni_meta(args.source) + if package_meta: + if call_pip_install(args.source) == 0: + save_package_meta_data(package_meta) + print_green('{} installed!'.format(package_meta['name'])) + installed = True + except Exception as e: + print_error(e) + if not installed: + print_error('installation failed!') + +def package_uninstall(args): + '''uninstall packages''' + name = args.name[0] + if name in get_not_installable_builtin_names(): + print_error('{} can not be uninstalled!'.format(name)) + exit(1) + meta = get_installed_package_meta(None, name) + if meta is None: + print_error('package {} not found!'.format(name)) + return + if 'installed_package' in meta: + call_pip_uninstall(meta['installed_package']) + if remove_package_meta_data(name): + print_green('{} uninstalled sucessfully!'.format(name)) + else: + print_error('Failed to uninstall {}!'.format(name)) def package_show(args): - '''show all packages''' - print(' '.join(PACKAGE_REQUIREMENTS.keys())) + '''show specified packages''' + builtin_name = args.name[0] + meta = get_builtin_algo_meta(builtin_name=builtin_name) + if meta: + print(json.dumps(meta, indent=4)) + else: + print_error('package {} not found'.format(builtin_name)) + +def print_package_list(meta): + print('+-----------------+------------+-----------+--------=-------------+------------------------------------------+') + print('| Name | Type | Installed | Class Name | Module Name |') + print('+-----------------+------------+-----------+----------------------+------------------------------------------+') + MAX_MODULE_NAME = 38 + for t in ['tuners', 'assessors', 'advisors']: + for p in meta[t]: + module_name = '.'.join(p['class_name'].split('.')[:-1]) + if len(module_name) > MAX_MODULE_NAME: + module_name = module_name[:MAX_MODULE_NAME-3] + '...' + class_name = p['class_name'].split('.')[-1] + print('| {:15s} | {:10s} | {:9s} | {:20s} | {:40s} |'.format(p['name'], t, p['installed'], class_name, module_name[:38])) + print('+-----------------+------------+-----------+----------------------+------------------------------------------+') + +def package_list(args): + '''list all packages''' + if args.all: + meta = get_builtin_algo_meta() + else: + meta = read_installed_package_meta() + + installed_names = defaultdict(list) + for t in ['tuners', 'assessors', 'advisors']: + for p in meta[t]: + p['installed'] = 'Yes' + installed_names[t].append(p['name']) + for k, v in INSTALLABLE_PACKAGE_META.items(): + t = v['type']+'s' + if k not in installed_names[t]: + meta[t].append({ + 'name': k, + 'class_name': v['class_name'], + 'class_args_validator': v['class_args_validator'], + 'installed': 'No' + }) + + print_package_list(meta) + +def save_package_meta_data(meta_data): + assert meta_data['type'] in PACKAGE_TYPES + assert 'name' in meta_data + assert 'class_name' in meta_data + + config = read_installed_package_meta() + + if meta_data['name'] in [x['name'] for x in config[meta_data['type']+'s']]: + raise ValueError('name %s already installed' % meta_data['name']) + + package_meta = {k: meta_data[k] for k in ['name', 'class_name', 'class_args_validator'] if k in meta_data} + if 'package_name' in meta_data: + package_meta['installed_package'] = meta_data['package_name'] + config[meta_data['type']+'s'].append(package_meta) + write_package_meta(config) + +def remove_package_meta_data(name): + config = read_installed_package_meta() + + updated = False + for t in ALGO_TYPES: + for meta in config[t]: + if meta['name'] == name: + config[t].remove(meta) + updated = True + if updated: + write_package_meta(config) + return True + return False + +def get_nni_meta(source): + if not os.path.exists(source): + print_error('{} does not exist'.format(source)) + return None + + if os.path.isdir(source): + if not os.path.exists(os.path.join(source, 'setup.py')): + print_error('setup.py not found') + return None + pkg = pkginfo.Develop(source) + else: + if not source.endswith('.whl'): + print_error('File name {} must ends with \'.whl\''.format(source)) + return False + pkg = pkginfo.Wheel(source) + + classifiers = pkg.classifiers + meta = parse_classifiers(classifiers) + meta['package_name'] = pkg.name + return meta + +def parse_classifiers(classifiers): + parts = [] + for c in classifiers: + if c.startswith('NNI Package'): + parts = [x.strip() for x in c.split('::')] + break + if len(parts) < 4 or not all(parts): + raise ValueError('Can not find correct NNI meta data in package classifiers.') + meta = { + 'type': parts[1], + 'name': parts[2], + 'class_name': parts[3] + } + if len(parts) >= 5: + meta['class_args_validator'] = parts[4] + return meta diff --git a/tools/nni_cmd/tensorboard_utils.py b/tools/nni_cmd/tensorboard_utils.py index 5d3101605c..adf85f3a74 100644 --- a/tools/nni_cmd/tensorboard_utils.py +++ b/tools/nni_cmd/tensorboard_utils.py @@ -9,8 +9,8 @@ from .rest_utils import rest_get, check_rest_server_quick, check_response from .config_utils import Config, Experiments from .url_utils import trial_jobs_url, get_local_urls -from .constants import COLOR_GREEN_FORMAT, REST_TIME_OUT -from .common_utils import print_normal, print_error, detect_process, detect_port, check_tensorboard_version +from .constants import REST_TIME_OUT +from .common_utils import print_normal, print_error, print_green, detect_process, detect_port, check_tensorboard_version from .nnictl_utils import check_experiment_id, check_experiment_id from .ssh_utils import create_ssh_sftp_client, copy_remote_directory_to_local @@ -81,7 +81,8 @@ def start_tensorboard_process(args, nni_config, path_list, temp_nni_path): cmds = ['tensorboard', log_dir_cmd, format_tensorboard_log_path(path_list), '--port', str(args.port)] tensorboard_process = Popen(cmds, stdout=stdout_file, stderr=stderr_file) url_list = get_local_urls(args.port) - print_normal(COLOR_GREEN_FORMAT % 'Start tensorboard success!\n' + 'Tensorboard urls: ' + ' '.join(url_list)) + print_green('Start tensorboard success!') + print_normal('Tensorboard urls: ' + ' '.join(url_list)) tensorboard_process_pid_list = nni_config.get_config('tensorboardPidList') if tensorboard_process_pid_list is None: tensorboard_process_pid_list = [tensorboard_process.pid] diff --git a/tools/nni_cmd/tests/config_files/invalid/custom-tuner-1.yml b/tools/nni_cmd/tests/config_files/invalid/custom-tuner-1.yml new file mode 100644 index 0000000000..5fda582d43 --- /dev/null +++ b/tools/nni_cmd/tests/config_files/invalid/custom-tuner-1.yml @@ -0,0 +1,25 @@ +authorName: nni +experimentName: default_test +maxExecDuration: 15m +maxTrialNum: 2 +trialConcurrency: 2 +searchSpacePath: ./search_space.json + +# error: no className +tuner: + codeDir: ./ + classFileName: mytuner.py + +assessor: + builtinAssessorName: Medianstop + classArgs: + optimize_mode: maximize +trial: + codeDir: ./ + command: python3 main.py + +useAnnotation: false +multiPhase: false +multiThread: false + +trainingServicePlatform: local diff --git a/tools/nni_cmd/tests/config_files/invalid/custom-tuner-2.yml b/tools/nni_cmd/tests/config_files/invalid/custom-tuner-2.yml new file mode 100644 index 0000000000..dca12da101 --- /dev/null +++ b/tools/nni_cmd/tests/config_files/invalid/custom-tuner-2.yml @@ -0,0 +1,27 @@ +authorName: nni +experimentName: default_test +maxExecDuration: 15m +maxTrialNum: 2 +trialConcurrency: 2 +searchSpacePath: ./search_space.json + +# error: builtinTunerName conflicts with custom tuner settings +tuner: + codeDir: ./ + classFileName: mytuner.py + className: MyTuner + builtinTunerName: Random + +assessor: + builtinAssessorName: Medianstop + classArgs: + optimize_mode: maximize +trial: + codeDir: ./ + command: python3 main.py + +useAnnotation: false +multiPhase: false +multiThread: false + +trainingServicePlatform: local diff --git a/tools/nni_cmd/tests/config_files/invalid/mytuner.py b/tools/nni_cmd/tests/config_files/invalid/mytuner.py new file mode 100644 index 0000000000..4979cf5813 --- /dev/null +++ b/tools/nni_cmd/tests/config_files/invalid/mytuner.py @@ -0,0 +1,5 @@ +from nni import Tuner + +class MyTuner(Tuner): + def __init__(self): + pass diff --git a/tools/nni_cmd/tests/config_files/invalid/no-tuner.yml b/tools/nni_cmd/tests/config_files/invalid/no-tuner.yml new file mode 100644 index 0000000000..3cc59ddaa3 --- /dev/null +++ b/tools/nni_cmd/tests/config_files/invalid/no-tuner.yml @@ -0,0 +1,21 @@ +authorName: nni +experimentName: default_test +maxExecDuration: 15m +maxTrialNum: 2 +trialConcurrency: 2 +searchSpacePath: ./search_space.json + +# error: no tuner or advisor +assessor: + builtinAssessorName: Medianstop + classArgs: + optimize_mode: maximize +trial: + codeDir: ./ + command: python3 main.py + +useAnnotation: false +multiPhase: false +multiThread: false + +trainingServicePlatform: local diff --git a/tools/nni_cmd/tests/config_files/invalid/search_space.json b/tools/nni_cmd/tests/config_files/invalid/search_space.json new file mode 100644 index 0000000000..c26cdce369 --- /dev/null +++ b/tools/nni_cmd/tests/config_files/invalid/search_space.json @@ -0,0 +1,6 @@ +{ + "batch_size": {"_type":"choice", "_value": [16, 32, 64, 128]}, + "hidden_size":{"_type":"choice","_value":[128, 256, 512, 1024]}, + "lr":{"_type":"choice","_value":[0.0001, 0.001, 0.01, 0.1]}, + "momentum":{"_type":"uniform","_value":[0, 1]} +} diff --git a/tools/nni_cmd/tests/config_files/invalid/searchspace-path.yml b/tools/nni_cmd/tests/config_files/invalid/searchspace-path.yml new file mode 100644 index 0000000000..0ed1321194 --- /dev/null +++ b/tools/nni_cmd/tests/config_files/invalid/searchspace-path.yml @@ -0,0 +1,24 @@ +authorName: nni +experimentName: default_test +maxExecDuration: 15m +maxTrialNum: 2 +trialConcurrency: 2 + +# error: searchSpacePath can not be found +searchSpacePath: ./wrong_search_space.json + +tuner: + builtinTunerName: Random +assessor: + builtinAssessorName: Medianstop + classArgs: + optimize_mode: maximize +trial: + codeDir: ./ + command: python3 mnist.py --epochs 1 --batch_num 10 + +useAnnotation: false +multiPhase: false +multiThread: false + +trainingServicePlatform: local diff --git a/tools/nni_cmd/tests/config_files/invalid/tuner-wrong-key.yml b/tools/nni_cmd/tests/config_files/invalid/tuner-wrong-key.yml new file mode 100644 index 0000000000..35cae1ac32 --- /dev/null +++ b/tools/nni_cmd/tests/config_files/invalid/tuner-wrong-key.yml @@ -0,0 +1,24 @@ +authorName: nni +experimentName: default_test +maxExecDuration: 15m +maxTrialNum: 2 +trialConcurrency: 2 +searchSpacePath: ./search_space.json + +tuner: + # error: wrong key + wrongTunerKey: abc + +assessor: + builtinAssessorName: Medianstop + classArgs: + optimize_mode: maximize +trial: + codeDir: ./ + command: python3 main.py + +useAnnotation: false +multiPhase: false +multiThread: false + +trainingServicePlatform: local diff --git a/tools/nni_cmd/tests/config_files/invalid/wrong-class-args.yml b/tools/nni_cmd/tests/config_files/invalid/wrong-class-args.yml new file mode 100644 index 0000000000..fbbed974c8 --- /dev/null +++ b/tools/nni_cmd/tests/config_files/invalid/wrong-class-args.yml @@ -0,0 +1,23 @@ +authorName: nni +experimentName: default_test +maxExecDuration: 15m +maxTrialNum: 2 +trialConcurrency: 2 +searchSpacePath: ./search_space.json + +tuner: + builtinTunerName: Random +assessor: + builtinAssessorName: Medianstop + classArgs: + # wrong class args, should be detected by assessor validator + optimize_mode: aaaaaa +trial: + codeDir: ./ + command: python3 main.py + +useAnnotation: false +multiPhase: false +multiThread: false + +trainingServicePlatform: local diff --git a/tools/nni_cmd/tests/config_files/invalid/wrong-training-service.yml b/tools/nni_cmd/tests/config_files/invalid/wrong-training-service.yml new file mode 100644 index 0000000000..0ce77c6160 --- /dev/null +++ b/tools/nni_cmd/tests/config_files/invalid/wrong-training-service.yml @@ -0,0 +1,23 @@ +authorName: nni +experimentName: default_test +maxExecDuration: 15m +maxTrialNum: 2 +trialConcurrency: 2 +searchSpacePath: ./search_space.json + +tuner: + builtinTunerName: Random +assessor: + builtinAssessorName: Medianstop + classArgs: + optimize_mode: maximize +trial: + codeDir: ./ + command: python3 main.py + +useAnnotation: false +multiPhase: false +multiThread: false + +# error: wrong training service name +trainingServicePlatform: local222 diff --git a/tools/nni_cmd/tests/config_files/valid/main.py b/tools/nni_cmd/tests/config_files/valid/main.py new file mode 100644 index 0000000000..a40d7a46c1 --- /dev/null +++ b/tools/nni_cmd/tests/config_files/valid/main.py @@ -0,0 +1 @@ +print('my trial') diff --git a/tools/nni_cmd/tests/config_files/valid/search_space.json b/tools/nni_cmd/tests/config_files/valid/search_space.json new file mode 100644 index 0000000000..c26cdce369 --- /dev/null +++ b/tools/nni_cmd/tests/config_files/valid/search_space.json @@ -0,0 +1,6 @@ +{ + "batch_size": {"_type":"choice", "_value": [16, 32, 64, 128]}, + "hidden_size":{"_type":"choice","_value":[128, 256, 512, 1024]}, + "lr":{"_type":"choice","_value":[0.0001, 0.001, 0.01, 0.1]}, + "momentum":{"_type":"uniform","_value":[0, 1]} +} diff --git a/tools/nni_cmd/tests/config_files/valid/test.yml b/tools/nni_cmd/tests/config_files/valid/test.yml new file mode 100644 index 0000000000..8565cd5f59 --- /dev/null +++ b/tools/nni_cmd/tests/config_files/valid/test.yml @@ -0,0 +1,22 @@ +authorName: nni +experimentName: default_test +maxExecDuration: 15m +maxTrialNum: 2 +trialConcurrency: 2 +searchSpacePath: ./search_space.json + +tuner: + builtinTunerName: Random +assessor: + builtinAssessorName: Medianstop + classArgs: + optimize_mode: maximize +trial: + codeDir: ./ + command: python3 main.py + +useAnnotation: false +multiPhase: false +multiThread: false + +trainingServicePlatform: local diff --git a/tools/nni_cmd/tests/test_config_validation.py b/tools/nni_cmd/tests/test_config_validation.py new file mode 100644 index 0000000000..2ce448b625 --- /dev/null +++ b/tools/nni_cmd/tests/test_config_validation.py @@ -0,0 +1,31 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +import glob +from unittest import TestCase, main +from schema import SchemaError +from nni_cmd.launcher_utils import validate_all_content +from nni_cmd.nnictl_utils import get_yml_content +from nni_cmd.common_utils import print_error, print_green + +class ConfigValidationTestCase(TestCase): + def test_valid_config(self): + file_names = glob.glob('./config_files/valid/*.yml') + for fn in file_names: + experiment_config = get_yml_content(fn) + validate_all_content(experiment_config, fn) + print_green('config file:', fn, 'validation success!') + + def test_invalid_config(self): + file_names = glob.glob('./config_files/invalid/*.yml') + for fn in file_names: + experiment_config = get_yml_content(fn) + try: + validate_all_content(experiment_config, fn) + print_error('config file:', fn,'Schema error should be raised for invalid config file!') + assert False + except SchemaError as e: + print_green('config file:', fn, 'Expected error catched:', e) + +if __name__ == '__main__': + main() From 3d4f122a8991158e5a023b4f3b0bb948e3b5abc8 Mon Sep 17 00:00:00 2001 From: Lijiaoa <61399850+Lijiaoa@users.noreply.github.com> Date: Wed, 24 Jun 2020 11:28:11 +0800 Subject: [PATCH 12/17] fix metric style (#2571) --- src/webui/src/components/trial-detail/TableList.tsx | 10 +++++++--- src/webui/src/static/style/table.scss | 6 ++++++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/webui/src/components/trial-detail/TableList.tsx b/src/webui/src/components/trial-detail/TableList.tsx index f92365d92a..d8648f15ed 100644 --- a/src/webui/src/components/trial-detail/TableList.tsx +++ b/src/webui/src/components/trial-detail/TableList.tsx @@ -3,7 +3,7 @@ import axios from 'axios'; import ReactEcharts from 'echarts-for-react'; import { Stack, Dropdown, DetailsList, IDetailsListProps, DetailsListLayoutMode, - PrimaryButton, Modal, IDropdownOption, IColumn, Selection, SelectionMode, IconButton + PrimaryButton, Modal, IDropdownOption, IColumn, Selection, SelectionMode, IconButton, TooltipHost } from 'office-ui-fabric-react'; import { LineChart, blocked, copy } from '../Buttons/Icon'; import { MANAGER_IP, COLUMNPro } from '../../static/const'; @@ -149,7 +149,9 @@ class TableList extends React.Component { isResizable: true, data: 'number', onColumnClick: this.onColumnClick, - onRender: (item): React.ReactNode =>
{item.formattedLatestAccuracy}
+ onRender: (item): React.ReactNode => +
{item.formattedLatestAccuracy}
+
}; SequenceIdColumnConfig: any = { @@ -556,7 +558,9 @@ class TableList extends React.Component { other = accDictionary[item].toString(); } return ( -
{other}
+ +
{other}
+
); } }); diff --git a/src/webui/src/static/style/table.scss b/src/webui/src/static/style/table.scss index 4838fb1ed7..d8f57dd424 100644 --- a/src/webui/src/static/style/table.scss +++ b/src/webui/src/static/style/table.scss @@ -11,6 +11,12 @@ } } +.ellipsis{ + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + #succeTable, #tableList{ .commonTableStyle .leftTitle div{ text-align: left; From e6817d22d4063550a9ad43f447169cd07b53a777 Mon Sep 17 00:00:00 2001 From: Ningxin Zheng <49771382+zheng-ningxin@users.noreply.github.com> Date: Wed, 24 Jun 2020 11:39:18 +0800 Subject: [PATCH 13/17] Support the Resnet/Squeezenet/Mobilenet for speedup (#2579) --- docs/en_US/Compressor/CompressionReference.md | 12 +- docs/en_US/Compressor/CompressionUtils.md | 6 +- src/sdk/pynni/nni/_graph_utils.py | 78 +++++- .../torch/speedup/compress_modules.py | 43 ++- .../compression/torch/speedup/compressor.py | 31 ++- .../compression/torch/speedup/infer_shape.py | 259 +++++++++++++++++- .../compression/torch/utils/mask_conflict.py | 248 +++++++++++++++-- .../torch/utils/shape_dependency.py | 232 +++++++++++++++- src/sdk/pynni/tests/test_compression_utils.py | 17 +- src/sdk/pynni/tests/test_model_speedup.py | 68 ++++- 10 files changed, 910 insertions(+), 84 deletions(-) diff --git a/docs/en_US/Compressor/CompressionReference.md b/docs/en_US/Compressor/CompressionReference.md index c190a46eb6..8b2444aa91 100644 --- a/docs/en_US/Compressor/CompressionReference.md +++ b/docs/en_US/Compressor/CompressionReference.md @@ -18,6 +18,16 @@ .. autoclass:: nni.compression.torch.utils.shape_dependency.ChannelDependency :members: -.. autoclass:: nni.compression.torch.utils.mask_conflict.MaskConflict +.. autoclass:: nni.compression.torch.utils.shape_dependency.GroupDependency :members: + +.. autoclass:: nni.compression.torch.utils.mask_conflict.CatMaskPadding + :members: + +.. autoclass:: nni.compression.torch.utils.mask_conflict.GroupMaskConflict + :members: + +.. autoclass:: nni.compression.torch.utils.mask_conflict.ChannelMaskConflict + :members: + ``` diff --git a/docs/en_US/Compressor/CompressionUtils.md b/docs/en_US/Compressor/CompressionUtils.md index 09418912b9..066225c730 100644 --- a/docs/en_US/Compressor/CompressionUtils.md +++ b/docs/en_US/Compressor/CompressionUtils.md @@ -116,8 +116,6 @@ Set 12,layer4.1.conv1 When the masks of different layers in a model have conflict (for example, assigning different sparsities for the layers that have channel dependency), we can fix the mask conflict by MaskConflict. Specifically, the MaskConflict loads the masks exported by the pruners(L1FilterPruner, etc), and check if there is mask conflict, if so, MaskConflict sets the conflicting masks to the same value. ``` -from nni.compression.torch.utils.mask_conflict import MaskConflict -mc = MaskConflict('./resnet18_mask', net, data) -mc.fix_mask_conflict() -mc.export('./resnet18_fixed_mask') +from nni.compression.torch.utils.mask_conflict import fix_mask_conflict +fixed_mask = fix_mask_conflict('./resnet18_mask', net, data) ``` \ No newline at end of file diff --git a/src/sdk/pynni/nni/_graph_utils.py b/src/sdk/pynni/nni/_graph_utils.py index 445aaebd58..5fec566b46 100644 --- a/src/sdk/pynni/nni/_graph_utils.py +++ b/src/sdk/pynni/nni/_graph_utils.py @@ -10,6 +10,7 @@ from torch.utils.tensorboard._pytorch_graph import NodePy, NodePyIO, NodePyOP, GraphPy CLASSTYPE_KIND = 'ClassType' GETATTR_KIND = 'prim::GetAttr' +CAT_KIND = 'aten::cat' _logger = logging.getLogger(__name__) @@ -236,6 +237,7 @@ def __init__(self, model=None, dummy_input=None, traced_model=None): super().__init__(model, dummy_input, traced_model) self.global_count = 0 self.name_to_node, self.input_to_node, self.output_to_node = self._build_graph() + self._extract_auxiliary_info() def _expand_non_prim_node(self, node, nodes, input_to_node, output_to_node, module_type): @@ -364,6 +366,58 @@ def _expand_module_node(self, node, node_name, unique_name, op_type, nodes, node_group, inputs=inputs, outputs=outputs) return nodepy + def _extract_cat_info(self, node_group, cpp_node): + """ + Extract the detail information of the cat operation, + such the order of the input tensor, the shape of each + input tensor, the output shape, and the cat dimension. + + Parameters + ---------- + node_group : NodePyGroup + cpp_node: torch._C.Node + It should be ```aten::cat``` node + + Returns + ------- + dict + Include auxiliary information for the cat operation. + This dict objec has four keys: 'cat_dim', 'out_shape', + 'in_order' and 'in_shape'. cat_dim is the dimension of + the cat operation to concat the input tensors. out_shape + is the shape of the output tensor of the cat operation. + in_order is an ordered list which contains the corresponding + parent operaion nodes of the input tensors. in_shape is also + an ordered list that contains the input shapes of the input + tensor. + """ + # only suport the cat operation + assert cpp_node.kind() == CAT_KIND + cat_info = {} + # get the shape of the output tensor + t_output = cpp_node.output() + out_shape = t_output.type().sizes() + cat_info['out_shape'] = out_shape + # get the cat dimension + inputs = cpp_node.inputs() + cat_dim = list(inputs)[1].toIValue() + cat_info['cat_dim'] = cat_dim + # get the order of the input tensors + # To get the order of the input tensors, we need + # to be aware of the topology of the model, which + # means we should extract the auxiliary information + # after the build_index function. + input_order = [] + list_construct_cpp = list(cpp_node.inputs())[0].node() + input_tensors = list(list_construct_cpp.inputs()) + for _tensor in input_tensors: + debug_name = _tensor.debugName() + input_order.append(self.output_to_node[debug_name].unique_name) + cat_info['in_order'] = input_order + input_shapes = [t.type().sizes() for t in input_tensors] + cat_info['in_shape'] = input_shapes + return cat_info + def _extract_shape_info(self, node): """ Extract the shape information of ```aten::view``` node @@ -541,8 +595,8 @@ def _build_graph(self): node, nodes, input_to_node, output_to_node, 'func') nodes_py.nodes_op.append(node_group) # get shape infor for view (aten::view) func - if node_group.op_type in ['aten::view', 'aten::flatten']: - node_group.auxiliary = self._extract_shape_info(node) + # if node_group.op_type in ['aten::view', 'aten::flatten']: + # node_group.auxiliary = self._extract_shape_info(node) for node in graph.outputs(): # Create sink nodes for output ops node_py = NodePyIO(node, 'output') @@ -552,6 +606,26 @@ def _build_graph(self): # build index return self._build_index(self.nodes_py.nodes_op) + def _extract_auxiliary_info(self): + """ + Extract the auxiliary information for the nodegroups + if necessary. For example, view/flatten operations may + need the shape of the input tensor and output tensor. + """ + # extract the input & output shape for the view and flatten + for node_group in self.nodes_py.nodes_op: + if node_group.op_type in ['aten::view', 'aten::flatten', 'aten::mean', 'aten::reshape']: + # get shape infor for view (aten::view) func + cpp_node = list(filter(lambda x: x.kind() == node_group.op_type, + node_group.node_cpps))[0] + node_group.auxiliary = self._extract_shape_info(cpp_node) + elif node_group.op_type == CAT_KIND: + # get the detail information for cat func + cpp_node = list(filter(lambda x: x.kind() == node_group.op_type, + node_group.node_cpps))[0] + node_group.auxiliary = self._extract_cat_info( + node_group, cpp_node) + def find_predecessors(self, unique_name): """ Find predecessor node of the given node diff --git a/src/sdk/pynni/nni/compression/torch/speedup/compress_modules.py b/src/sdk/pynni/nni/compression/torch/speedup/compress_modules.py index 666c497482..0b349f9d5c 100644 --- a/src/sdk/pynni/nni/compression/torch/speedup/compress_modules.py +++ b/src/sdk/pynni/nni/compression/torch/speedup/compress_modules.py @@ -14,7 +14,11 @@ 'AvgPool2d': lambda module, mask: no_replace(module, mask), 'AdaptiveAvgPool2d': lambda module, mask: no_replace(module, mask), 'ReLU': lambda module, mask: no_replace(module, mask), - 'Linear': lambda module, mask: replace_linear(module, mask) + 'ReLU6': lambda module, mask: no_replace(module, mask), + 'Linear': lambda module, mask: replace_linear(module, mask), + 'Dropout': lambda module, mask: no_replace(module, mask), + 'Dropout2d': lambda module, mask: no_replace(module, mask), + 'Dropout3d': lambda module, mask: no_replace(module, mask) } def no_replace(module, mask): @@ -111,6 +115,7 @@ def replace_conv2d(conv, mask): else: out_channels_index = mask.output_mask.mask_index[1] out_channels = out_channels_index.size()[0] + _logger.debug("replace conv2d with in_channels: %d, out_channels: %d", in_channels, out_channels) new_conv = torch.nn.Conv2d(in_channels=in_channels, out_channels=out_channels, @@ -118,21 +123,45 @@ def replace_conv2d(conv, mask): stride=conv.stride, padding=conv.padding, dilation=conv.dilation, - groups=1, # currently only support groups is 1 + groups=conv.groups, bias=conv.bias is not None, padding_mode=conv.padding_mode) + new_conv.to(conv.weight.device) tmp_weight_data = tmp_bias_data = None + if mask.output_mask is not None: tmp_weight_data = torch.index_select(conv.weight.data, 0, out_channels_index) if conv.bias is not None: tmp_bias_data = torch.index_select(conv.bias.data, 0, out_channels_index) - # NOTE: does not support group + else: + tmp_weight_data = conv.weight.data + # For the convolutional layers that have more than one group + # we need to copy the weight group by group, because the input + # channal is also divided into serveral groups and each group + # filter may have different input channel indexes. + input_step = int(conv.in_channels / conv.groups) + in_channels_group = int(in_channels / conv.groups) + filter_step = int(out_channels / conv.groups) if mask.input_mask is not None: - tmp_weight_data = torch.index_select(conv.weight.data if tmp_weight_data is None else tmp_weight_data, - 1, in_channels_index) - assert tmp_weight_data is not None, "Conv2d weight should be updated based on masks" - new_conv.weight.data.copy_(tmp_weight_data) + for groupid in range(conv.groups): + start = groupid * input_step + end = (groupid + 1) * input_step + current_input_index = list(filter(lambda x: start <= x and x < end, in_channels_index.tolist())) + # shift the global index into the group index + current_input_index = [x-start for x in current_input_index] + # if the groups is larger than 1, the input channels of each + # group should be pruned evenly. + assert len(current_input_index) == in_channels_group, \ + 'Input channels of each group are not pruned evenly' + current_input_index = torch.tensor(current_input_index).to(tmp_weight_data.device) # pylint: disable=not-callable + f_start = groupid * filter_step + f_end = (groupid + 1) * filter_step + new_conv.weight.data[f_start:f_end] = torch.index_select(tmp_weight_data[f_start:f_end], 1, current_input_index) + else: + new_conv.weight.data.copy_(tmp_weight_data) + if conv.bias is not None: new_conv.bias.data.copy_(conv.bias.data if tmp_bias_data is None else tmp_bias_data) + return new_conv diff --git a/src/sdk/pynni/nni/compression/torch/speedup/compressor.py b/src/sdk/pynni/nni/compression/torch/speedup/compressor.py index 084d5b8ea4..4b569d7e4f 100644 --- a/src/sdk/pynni/nni/compression/torch/speedup/compressor.py +++ b/src/sdk/pynni/nni/compression/torch/speedup/compressor.py @@ -4,6 +4,7 @@ import logging import torch from nni._graph_utils import build_module_graph +from nni.compression.torch.utils.mask_conflict import fix_mask_conflict from .compress_modules import replace_module from .infer_shape import ModuleMasks, infer_from_mask, infer_from_inshape, infer_from_outshape @@ -53,9 +54,10 @@ def __init__(self, model, dummy_input, masks_file, map_location=None): self.bound_model = model self.masks = torch.load(masks_file, map_location) self.inferred_masks = dict() # key: module_name, value: ModuleMasks + self.dummy_input = dummy_input self.torch_graph = build_module_graph(model, dummy_input) - def infer_module_mask(self, module_name, mask=None, in_shape=None, out_shape=None): + def infer_module_mask(self, module_name, last_module, mask=None, in_shape=None, out_shape=None): """ Infer input shape / output shape based on the module's weight mask / input shape / output shape. @@ -71,6 +73,8 @@ def infer_module_mask(self, module_name, mask=None, in_shape=None, out_shape=Non ---------- module_name : str The name of the node + last_module : str + The name of last visited node mask : tensor of mask or ModuleMasks Mask of the weights in this node (i.e., module) in_shape : ModuleMasks @@ -100,10 +104,17 @@ def infer_module_mask(self, module_name, mask=None, in_shape=None, out_shape=Non raise RuntimeError( "Has not supported infering output shape from input shape for module/function: `{}`, {}" .format(m_type, module_name)) - if m_type in ['aten::view', 'aten::flatten']: + if m_type in ['aten::view', 'aten::flatten', 'aten::mean', 'aten::reshape']: output_cmask = infer_from_inshape[m_type](module_masks, in_shape, self.torch_graph.name_to_node[module_name].auxiliary) + elif m_type in ['aten::cat']: + # To calculate the mask for concat operation, the output shape + # , cat dimension, and the order of the input parameters. + output_cmask = infer_from_inshape[m_type](module_masks, + in_shape, + self.torch_graph.name_to_node[module_name].auxiliary, + last_module) else: output_cmask = infer_from_inshape[m_type](module_masks, in_shape) if out_shape is not None: @@ -117,18 +128,19 @@ def infer_module_mask(self, module_name, mask=None, in_shape=None, out_shape=Non if input_cmask: predecessors = self.torch_graph.find_predecessors(module_name) for _module_name in predecessors: - self.infer_module_mask(_module_name, out_shape=input_cmask) + self.infer_module_mask(_module_name, module_name, out_shape=input_cmask) if output_cmask: successors = self.torch_graph.find_successors(module_name) for _module_name in successors: - self.infer_module_mask(_module_name, in_shape=output_cmask) + self.infer_module_mask(_module_name, module_name, in_shape=output_cmask) def infer_modules_masks(self): """ Do shape inference of involved modules, including the shape of weights, inputs, output """ for module_name, mask in self.masks.items(): - self.infer_module_mask(module_name, mask=mask) + _logger.debug('Start mask inference from %s', module_name) + self.infer_module_mask(module_name, None, mask=mask) def replace_compressed_modules(self): """ @@ -144,19 +156,20 @@ def replace_compressed_modules(self): _logger.debug("replace %s, in %s type, with op_type %s", module_name, g_node.type, g_node.op_type) if g_node.type == 'module': - super_module, leaf_module = get_module_by_name(self.bound_model, module_name) + super_module, leaf_module = get_module_by_name(self.bound_model, g_node.name) m_type = g_node.op_type if not m_type in replace_module: raise RuntimeError("Has not supported replacing the module: `{}`".format(m_type)) - _logger.info("replace module (name: %s, op_type: %s)", module_name, m_type) + _logger.info("replace module (name: %s, op_type: %s)", g_node.name, m_type) compressed_module = replace_module[m_type](leaf_module, self.inferred_masks[module_name]) - setattr(super_module, module_name.split('.')[-1], compressed_module) + setattr(super_module, g_node.name.split('.')[-1], compressed_module) elif g_node.type == 'func': _logger.info("Warning: cannot replace (name: %s, op_type: %s) which is func type", module_name, g_node.op_type) else: raise RuntimeError("Unsupported node type: {}".format(g_node.type)) + def speedup_model(self): """ There are basically two steps: @@ -165,6 +178,8 @@ def speedup_model(self): """ training = self.bound_model.training _logger.info("start to speed up the model") + _logger.info("fix the mask conflict of the interdependent layers") + fix_mask_conflict(self.masks, self.bound_model, self.dummy_input) _logger.info("infer module masks...") self.infer_modules_masks() _logger.info("replace compressed modules...") diff --git a/src/sdk/pynni/nni/compression/torch/speedup/infer_shape.py b/src/sdk/pynni/nni/compression/torch/speedup/infer_shape.py index 82401659ec..964c5046ec 100644 --- a/src/sdk/pynni/nni/compression/torch/speedup/infer_shape.py +++ b/src/sdk/pynni/nni/compression/torch/speedup/infer_shape.py @@ -8,11 +8,13 @@ import torch + class CoarseMask: """ Coarse grained mask for a given tensor, here tensor could be weights, input tensor, or output tensor """ + def __init__(self, num_dim): """ Parameters @@ -50,13 +52,26 @@ def merge_index(index_a, index_b): ------- tensor The merged index (1-dimension) tensor + Note that: the output tensor will be moved + to the same device as index_a. """ + device = index_a.device s = set() - for num in index_a: + for num in index_a.tolist(): + # we need to transfer the tensor to list here + # first, directly traversing the tensor by for + # loop will return the list of tensor(x) object, + # even the value are the same, but they are different + # tensor objects, so the set will contains multiple + # tensor objects that has the same value. For example + # for num in torch.ones(2): + # s.add(num) + # s will be {tensor(1), tensor(1)} s.add(num) - for num in index_b: + for num in index_b.tolist(): s.add(num) - return torch.tensor(sorted(s)) # pylint: disable=not-callable + # move the output tensor to the same device with index_a + return torch.tensor(sorted(s)).to(device) # pylint: disable=not-callable def merge(self, cmask): """ @@ -86,10 +101,65 @@ def merge(self, cmask): def __repr__(self): return 'mask_index: {}'.format(self.mask_index) + def eq_on_dim(self, other, dim): + assert isinstance(other, CoarseMask) + if self.mask_index[dim] is None and other.mask_index[dim] is None: + return True + elif isinstance(self.mask_index[dim], torch.Tensor) \ + and isinstance(other.mask_index[dim], torch.Tensor): + return torch.equal(self.mask_index[dim], other.mask_index[dim]) + else: + return False + + def __eq__(self, other): + assert isinstance(other, CoarseMask) + if len(self.mask_index) != len(other.mask_index): + return False + for i in range(len(self.mask_index)): + if not self.eq_on_dim(other, i): + return False + return True + + def __lt__(self, other): + """ + Judge if the mask is a subset of another CoarseMask. + """ + assert isinstance(other, CoarseMask) + for dim, _ in enumerate(self.mask_index): + # if self has more dimensions + if dim >= len(other.mask_index): + return False + if self.mask_index[dim] is None: + # if no mask on this dimension, then we have less + # masks then the other CoraseMask. + continue + elif other.mask_index[dim] is None: + return False + else: + s1 = set(self.mask_index[dim].tolist()) + s2 = set(other.mask_index[dim].tolist()) + if not s1 < s2: + return False + return True + + def __le__(self, other): + """ + Return if self's mask is less or equal to other's mask. + """ + assert isinstance(other, CoarseMask) + if self.__lt__(other) or self.__eq__(other): + return True + return False + + def __ne__(self, other): + return not self.__eq__(other) + + class ModuleMasks: """ The masks of a module, including the masks for weights, inputs, output """ + def __init__(self, module_name): """ Parameters @@ -136,6 +206,7 @@ def __repr__(self): self.input_mask, self.output_mask, self.param_masks ) + """ Infer input and output shape of a module/function from its weight mask """ @@ -149,18 +220,27 @@ def __repr__(self): """ infer_from_inshape = { 'ReLU': lambda module_masks, mask: relu_inshape(module_masks, mask), + 'ReLU6': lambda module_masks, mask: relu_inshape(module_masks, mask), 'aten::relu': lambda module_masks, mask: relu_inshape(module_masks, mask), 'Conv2d': lambda module_masks, mask: conv2d_inshape(module_masks, mask), 'MaxPool2d': lambda module_masks, mask: maxpool2d_inshape(module_masks, mask), 'aten::max_pool2d': lambda module_masks, mask: maxpool2d_inshape(module_masks, mask), 'aten::avg_pool2d': lambda module_masks, mask: maxpool2d_inshape(module_masks, mask), + 'aten::adaptive_avg_pool2d': lambda module_masks, mask: maxpool2d_inshape(module_masks, mask), 'AvgPool2d': lambda module_masks, mask: maxpool2d_inshape(module_masks, mask), 'AdaptiveAvgPool2d': lambda module_masks, mask: maxpool2d_inshape(module_masks, mask), 'aten::size': lambda module_masks, mask: size_inshape(module_masks, mask), 'aten::view': lambda module_masks, mask, shape: view_inshape(module_masks, mask, shape), - 'aten::flatten': lambda module_masks, mask, shape: view_inshape(module_masks, mask, shape), # support only start_dim=1 + 'aten::reshape': lambda module_masks, mask, shape: view_inshape(module_masks, mask, shape), + # support only start_dim=1 + 'aten::flatten': lambda module_masks, mask, shape: view_inshape(module_masks, mask, shape), 'Linear': lambda module_masks, mask: linear_inshape(module_masks, mask), - 'BatchNorm2d': lambda module_masks, mask: batchnorm2d_inshape(module_masks, mask) + 'BatchNorm2d': lambda module_masks, mask: batchnorm2d_inshape(module_masks, mask), + 'aten::add_': lambda module_masks, mask: add_inshape(module_masks, mask), + 'aten::add': lambda module_mask, mask: add_inshape(module_mask, mask), + 'aten::cat': lambda module_mask, mask, cat_info, last_visited: cat_inshape(module_mask, mask, cat_info, last_visited), + 'aten::mean': lambda module_masks, mask, shape: mean_inshape(module_masks, mask, shape), + 'Dropout': lambda module_masks, mask: dropout_inshape(module_masks, mask) } """ @@ -170,6 +250,120 @@ def __repr__(self): 'Conv2d': lambda module_masks, mask: conv2d_outshape(module_masks, mask) } +def dropout_inshape(module_masks, mask): + if module_masks.input_mask is None: + module_masks.set_input_mask(mask) + module_masks.set_output_mask(mask) + return module_masks.output_mask + # if alreay visited + assert module_masks.input_mask <= mask + if module_masks.input_mask == mask: + return None + module_masks.set_input_mask(mask) + module_masks.set_output_mask(mask) + return module_masks.output_mask + + + +def cat_inshape(module_masks, mask, cat_info, last_visited): + """ + Inference the output mask of the cat operation from the + input mask. + + Parameters + ---------- + module_masks : ModuleMasks + The ModuleMasks instance of the batchnorm2d + mask : CoarseMask + The mask of its input tensor + cat_info: dict + Dict object that records the necessary information + of cat operation, such as the order of the input + tensors. + last_visited: str + The unique_name of the last visited node group. + + Returns + ------- + CoarseMask + The mask of its output tensor + + """ + assert isinstance(mask, CoarseMask) + out_shape = cat_info['out_shape'] + cat_dim = cat_info['cat_dim'] + in_order = cat_info['in_order'] + in_shape = cat_info['in_shape'] + if module_masks.output_mask is None: + # First visit to this cat node + # initialize the mask based on + # the number of the output channel. + output_mask = CoarseMask(num_dim=len(out_shape)) + for dim, _ in enumerate(out_shape): + if dim == cat_dim: + if mask.mask_index[dim] is None: + continue + device = mask.mask_index[dim].device + # calculate the offset of the mask + pos = in_order.index(last_visited) + offsets = [in_shape[i][cat_dim] + for i, _ in enumerate(in_shape)] + offset = 0 + for i in range(pos): + offset += offsets[i] + _tmp_mask = (mask.mask_index[dim] + offset).to(device) + output_mask.mask_index[dim] = _tmp_mask + else: + # directly copy the mask + if mask.mask_index[dim] is not None: + output_mask.mask_index[dim] = mask.mask_index[dim].data.clone( + ) + module_masks.set_output_mask(output_mask) + + return module_masks.output_mask + # If this cat node is already visited, we need + # validating if the mask is legel, for cat operation, + # the mask on the 'cat_dim' dimension should be stitched + # together. In the other dimensions, the mask should be + # the same, else the mask is not legal. + for dim, _ in enumerate(out_shape): + if dim == cat_dim: + if mask.mask_index[dim] is None: + continue + pos = in_order.index(last_visited) + offsets = [in_shape[i][cat_dim] for i, _ in enumerate(in_shape)] + offset = 0 + for i in range(pos): + offset += offsets[i] + device = mask.mask_index[dim].device + new_mask = mask.mask_index[dim] + offset + module_masks.output_mask.mask_index[dim] = CoarseMask.merge_index( + module_masks.output_mask.mask_index[dim], new_mask).to(device) + else: + assert module_masks.output_mask.eq_on_dim(mask, dim) + + return module_masks.output_mask + + +def add_inshape(module_masks, mask): + """ + Inference the output mask of the add operation from the + input mask. + """ + assert isinstance(mask, CoarseMask) + if module_masks.input_mask is None: + module_masks.set_input_mask(mask) + module_masks.set_output_mask(mask) + # module_masks.input_mask = mask + return mask + # If alreay visited, validate if have the conflict + # if the mask is different with previous input_mask + # then there is a mask confilct. + if mask != module_masks.input_mask: + raise Exception('Mask conflict happenes!') + return None + + def batchnorm2d_inshape(module_masks, mask): """ We assume only the second dimension has coarse grained mask @@ -199,6 +393,7 @@ def batchnorm2d_inshape(module_masks, mask): module_masks.set_param_masks('bias', weight_cmask) return mask + def linear_inshape(module_masks, mask): """ Coarse grained input mask does not change the shape of weights and output tensor @@ -221,6 +416,7 @@ def linear_inshape(module_masks, mask): module_masks.set_input_mask(mask) return None + def view_inshape(module_masks, mask, shape): """ This is a limited support @@ -246,7 +442,8 @@ def view_inshape(module_masks, mask, shape): assert shape['in_shape'][0] == shape['out_shape'][0] assert len(shape['in_shape']) == 4 assert len(shape['out_shape']) == 2 - assert shape['out_shape'][1] == shape['in_shape'][1]*shape['in_shape'][2]*shape['in_shape'][3] + assert shape['out_shape'][1] == shape['in_shape'][1] * \ + shape['in_shape'][2]*shape['in_shape'][3] assert isinstance(mask, CoarseMask) assert mask.mask_index[1] is not None @@ -260,7 +457,7 @@ def view_inshape(module_masks, mask, shape): step_size = shape['in_shape'][2] * shape['in_shape'][3] for loc in mask.mask_index[1]: index.extend([loc * step_size + i for i in range(step_size)]) - output_cmask.add_index_mask(dim=1, index=torch.tensor(index)) # pylint: disable=not-callable + output_cmask.add_index_mask(dim=1, index=torch.tensor(index)) # pylint: disable=not-callable module_masks.set_output_mask(output_cmask) return output_cmask @@ -271,6 +468,28 @@ def size_inshape(module_masks, mask): """ return None +def mean_inshape(module_masks, mask, shape): + """ + Similar to view operation, currently mask inference only supports + the mean operation on the 3rd and 4th dimensions. + """ + assert shape['in_shape'][0] == shape['out_shape'][0] + assert shape['out_shape'][1] == shape['in_shape'][1] + assert len(shape['in_shape']) == 4 + assert len(shape['out_shape']) == 2 + + assert isinstance(mask, CoarseMask) + assert mask.mask_index[1] is not None + assert mask.mask_index[0] is None + assert mask.mask_index[2] is None + assert mask.mask_index[3] is None + module_masks.set_input_mask(mask) + + output_cmask = CoarseMask(num_dim=2) + output_cmask.add_index_mask(dim=1, index=mask.mask_index[1]) + module_masks.set_output_mask(output_cmask) + return output_cmask + def maxpool2d_inshape(module_masks, mask): """ Assume only the second dimension is masked @@ -292,11 +511,14 @@ def maxpool2d_inshape(module_masks, mask): assert mask.mask_index[0] is None assert mask.mask_index[2] is None assert mask.mask_index[3] is None - assert module_masks.input_mask is None + if module_masks.input_mask is not None: + assert module_masks.input_mask <= mask + # assert module_masks.input_mask is None module_masks.set_input_mask(mask) module_masks.set_output_mask(mask) return mask + def relu_inshape(module_masks, mask): """ Parameters @@ -313,11 +535,17 @@ def relu_inshape(module_masks, mask): """ assert isinstance(mask, CoarseMask) # TODO: double check this assert, is it possible that a module is passed twice - assert module_masks.input_mask is None, "A relu op can only be processed once" + if module_masks.input_mask is not None: + # check if has a mask conflict + assert module_masks.input_mask == mask + # No need to pass the mask again + return None + # assert module_masks.input_mask is None, "A relu op can only be processed once" module_masks.set_input_mask(mask) module_masks.set_output_mask(mask) return mask + def batchnorm2d_mask(module_masks, mask): """ Infer input and output shape from weight mask @@ -353,6 +581,7 @@ def batchnorm2d_mask(module_masks, mask): module_masks.set_output_mask(output_cmask) return input_cmask, output_cmask + def conv2d_mask(module_masks, mask): """ Infer input and output shape from weight mask @@ -429,6 +658,7 @@ def convert_to_coarse_mask(mask): module_masks.output_mask.merge(output_cmask) return None, module_masks.output_mask + def conv2d_inshape(module_masks, mask): """ Shape change of input tensor does not affect the shape of its output tensor @@ -446,10 +676,16 @@ def conv2d_inshape(module_masks, mask): The mask of its output tensor """ assert isinstance(mask, CoarseMask) - assert module_masks.input_mask is None - module_masks.set_input_mask(mask) + if module_masks.input_mask is None: + module_masks.set_input_mask(mask) + else: + # the same conv layer may be accessed more + # than once, such as a concat operation. + assert module_masks.input_mask <= mask + module_masks.input_mask.merge(mask) return None + def conv2d_outshape(module_masks, mask): """ Assume only the second dimension is masked @@ -487,4 +723,3 @@ def conv2d_outshape(module_masks, mask): module_masks.set_param_masks('bias', bias_cmask) # input shape is not changed return None - \ No newline at end of file diff --git a/src/sdk/pynni/nni/compression/torch/utils/mask_conflict.py b/src/sdk/pynni/nni/compression/torch/utils/mask_conflict.py index 626283d43d..28412b1d9f 100644 --- a/src/sdk/pynni/nni/compression/torch/utils/mask_conflict.py +++ b/src/sdk/pynni/nni/compression/torch/utils/mask_conflict.py @@ -1,51 +1,231 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. +import os import logging import torch import numpy as np -from .shape_dependency import ChannelDependency +from .shape_dependency import ChannelDependency, GroupDependency, CatPaddingDependency # logging.basicConfig(level = logging.DEBUG) _logger = logging.getLogger('FixMaskConflict') -class MaskConflict: - def __init__(self, mask_file, model=None, dummy_input=None, graph=None): +def fix_mask_conflict(masks, model=None, dummy_input=None, traced=None): + """ + MaskConflict fix the mask conflict for the channel dependencies + and group dependency. + + Parameters + ---------- + masks : dict/str + A dict object that stores the masks or the path of the mask file + model : torch.nn.Module + model to fix the mask conflict + dummy_input : torch.Tensor + input example to trace the model + traced : torch._C.torch.jit.TopLevelTracedModule + the traced model of the target model, is this parameter is not None, + we donnot use the model and dummpy_input to get the trace graph. + """ + if isinstance(masks, str): + # if the input is the path of the mask_file + assert os.path.exists(masks) + masks = torch.load(masks) + # if the user uses the model and dummy_input to trace the model, we + # should get the traced model handly, so that, we only trace the + # model once, GroupMaskConflict and ChannelMaskConflict will reuse + # this traced model. + if traced is None: + assert model is not None and dummy_input is not None + with torch.onnx.set_training(model, False): + # We need to trace the model in this way, else it will have problems + traced = torch.jit.trace(model, dummy_input) + + fix_group_mask = GroupMaskConflict(masks, model, dummy_input, traced) + masks = fix_group_mask.fix_mask() + fix_channel_mask = ChannelMaskConflict(masks, model, dummy_input, traced) + masks = fix_channel_mask.fix_mask() + padding_cat_mask = CatMaskPadding(masks, model, dummy_input, traced) + masks = padding_cat_mask.fix_mask() + return masks + +class MaskFix: + def __init__(self, masks, model=None, dummy_input=None, traced=None): + # check if the parameters are valid + parameter_valid = False + if traced is not None: + parameter_valid = True + elif (model is not None) and (dummy_input is not None): + parameter_valid = True + if not parameter_valid: + raise Exception('The input parameters is invalid!') + self.model = model + self.dummy_input = dummy_input + self.traced = traced + self.masks = masks + + def fix_mask(self): + raise NotImplementedError + + def export(self, path): + """ + Export the masks after fixing the conflict to file. + """ + torch.save(self.masks, path) + +class CatMaskPadding(MaskFix): + def __init__(self, masks, model, dummy_input=None, traced=None): + """ + CatMaskPadding find the layers whose output tensor is passed + to the same cat operation. The cat operation concatnates the + masks of the input tensors as the output mask, so when some + of the input layers of the cat operation are not pruned, we still + need to pass the masks of these non-pruned layers(the mask are + all ones) to the cat operation to ensure the shape of the output + mask is right. + + Parameters + ---------- + masks : dict + a dict object that stores the masks + model : torch.nn.Module + model to fix the mask conflict + dummy_input : torch.Tensor + input example to trace the model + traced : torch._C.torch.jit.TopLevelTracedModule + the traced model of the target model, is this parameter is not None, + we donnot use the model and dummpy_input to get the trace graph. + """ + super(CatMaskPadding, self).__init__(masks, model, dummy_input, traced) + + def fix_mask(self): + cat_padding_depen = CatPaddingDependency(self.model, self.dummy_input, self.traced) + name_to_module = {} + for name, module in self.model.named_modules(): + name_to_module[name] = module + depen = cat_padding_depen.dependency_sets + for layers in depen: + device = None + count = 0 + for layer in layers: + if layer in self.masks: + count += 1 + if device is None: + device = self.masks[layer]['weight'].device + if count == 0: + # no layer is pruned + continue + elif count == len(layers): + # all the layers have been pruned + continue + # pad the mask for the non-pruned layers + for layer in layers: + module = name_to_module[layer] + w_shape = module.weight.data.size() + w_mask = torch.ones(w_shape).to(device) + b_mask = None + if hasattr(module, 'bias'): + b_shape = module.bias.data.size() + b_mask = torch.ones(b_shape).to(device) + self.masks[layer] = {'weight':w_mask, 'bias':b_mask} + return self.masks + + + +class GroupMaskConflict(MaskFix): + def __init__(self, masks, model=None, dummy_input=None, traced=None): + """ + GroupMaskConflict fix the mask conflict between the layers that + has group dependecy with each other. + + Parameters + ---------- + masks : dict + a dict object that stores the masks + model : torch.nn.Module + model to fix the mask conflict + dummy_input : torch.Tensor + input example to trace the model + traced : torch._C.torch.jit.TopLevelTracedModule + the traced model of the target model, is this parameter is not None, + we donnot use the model and dummpy_input to get the trace graph. + """ + super(GroupMaskConflict, self).__init__(masks, model, dummy_input, traced) + + + def fix_mask(self): + """ + Fix the mask conflict before the mask inference for the layers that + has group dependencies. This function should be called before the + mask inference of the 'speedup' module. + """ + group_depen = GroupDependency(self.model, self.dummy_input, self.traced) + depens = group_depen.dependency + _logger.info(depens) + for layername in depens: + group = depens[layername] + if layername not in self.masks: + # this layer not pruned + continue + w_mask = self.masks[layername]['weight'] + shape = w_mask.size() + count = np.prod(shape[1:]) + all_ones = (w_mask.flatten(1).sum(-1) == count).nonzero().squeeze(1).tolist() + all_zeros = (w_mask.flatten(1).sum(-1) == 0).nonzero().squeeze(1).tolist() + if len(all_ones) + len(all_zeros) < w_mask.size(0): + # In fine-grained pruning, skip this layer + _logger.info('Layers %s using fine-grained pruning', layername) + continue + assert shape[0] % group == 0 + # Find the number of masked filter for each group (mini_masked). + # Because we have to keep the pruned filter can still + # be divided into the same number of groups, so we only can + # prune mini_masked filters for each group. + step = shape[0] / group + group_masked = [] + for i in range(group): + _start = step * i + _end = step * (i+1) + _tmp_list = list(filter(lambda x: _start <= x and x < _end, all_zeros)) + group_masked.append(_tmp_list) + mini_masked = min([len(x) for x in group_masked]) + for gm in group_masked: + for i in range(mini_masked, len(gm)): + # To keep the output channel number still being divisible to + # groups, we set the masks of following filters to be zero. + pos = gm[i] + self.masks[layername]['weight'][pos] = torch.ones(shape[1:]) + if hasattr(self.masks[layername], 'bias'): + self.masks[layername]['bias'][pos] = 1 + return self.masks + + + +class ChannelMaskConflict(MaskFix): + def __init__(self, masks, model=None, dummy_input=None, traced=None): """ - MaskConflict fix the mask conflict between the layers that + ChannelMaskConflict fix the mask conflict between the layers that has channel dependecy with each other. Parameters ---------- + masks : dict + a dict object that stores the masks model : torch.nn.Module model to fix the mask conflict dummy_input : torch.Tensor input example to trace the model - mask_file : str - the path of the original mask file - graph : torch._C.Graph + graph : torch._C.torch.jit.TopLevelTracedModule the traced graph of the target model, is this parameter is not None, we donnot use the model and dummpy_input to get the trace graph. """ - # check if the parameters are valid - parameter_valid = False - if graph is not None: - parameter_valid = True - elif (model is not None) and (dummy_input is not None): - parameter_valid = True - if not parameter_valid: - raise Exception('The input parameters is invalid!') - self.model = model - self.dummy_input = dummy_input - self.graph = graph - self.mask_file = mask_file - self.masks = torch.load(self.mask_file) + super(ChannelMaskConflict, self).__init__(masks, model, dummy_input, traced) - def fix_mask_conflict(self): + def fix_mask(self): """ Fix the mask conflict before the mask inference for the layers that has shape dependencies. This function should be called before the mask inference of the 'speedup' module. """ - channel_depen = ChannelDependency(self.model, self.dummy_input, self.graph) + channel_depen = ChannelDependency(self.model, self.dummy_input, self.traced) depen_sets = channel_depen.dependency_sets for dset in depen_sets: if len(dset) == 1: @@ -53,11 +233,18 @@ def fix_mask_conflict(self): continue channel_remain = set() fine_grained = False + out_channels = None + # A flag that represents if all the layers in + # the dependency set are pruned + all_pruned = True for name in dset: if name not in self.masks: # this layer is not pruned + all_pruned = False continue w_mask = self.masks[name]['weight'] + if out_channels is None: + out_channels = w_mask.size(0) shape = w_mask.size() count = np.prod(shape[1:]) all_ones = (w_mask.flatten(1).sum(-1) == count).nonzero().squeeze(1).tolist() @@ -74,8 +261,19 @@ def fix_mask_conflict(self): # Update the masks for the layers in the dependency set if fine_grained: continue + if not all_pruned: + # if some layer are not pruned at all + # then all the layers in this dependency set + # cannot be pruned due to the shape dependency. + channel_remain.update(range(out_channels)) ori_channels = 0 for name in dset: + if name not in self.masks: + # this layer is not pruned at all + # in this case, all_pruned is False + # and the other layers in the same dset + # will not be pruned either. + continue mask = self.masks[name] w_shape = mask['weight'].size() ori_channels = w_shape[0] @@ -88,9 +286,3 @@ def fix_mask_conflict(self): pruned_filters = set(list(range(ori_channels)))-channel_remain _logger.info(str(sorted(pruned_filters))) return self.masks - - def export(self, path): - """ - Export the masks after fixing the conflict to file. - """ - torch.save(self.masks, path) diff --git a/src/sdk/pynni/nni/compression/torch/utils/shape_dependency.py b/src/sdk/pynni/nni/compression/torch/utils/shape_dependency.py index 8922ec483e..49aa32b7c9 100644 --- a/src/sdk/pynni/nni/compression/torch/utils/shape_dependency.py +++ b/src/sdk/pynni/nni/compression/torch/utils/shape_dependency.py @@ -6,6 +6,7 @@ from nni._graph_utils import TorchModuleGraph +__all__ = ['ChannelDependency', 'GroupDependency', 'CatPaddingDependency'] CONV_TYPE = 'aten::_convolution' ADD_TYPES = ['aten::add', 'aten::add_'] @@ -13,7 +14,27 @@ logger = logging.getLogger('Shape_Dependency') -class ChannelDependency: +class Dependency: + def __init__(self, model=None, dummy_input=None, traced_model=None): + """ + Build the graph for the model. + """ + # check if the input is legal + if traced_model is None: + # user should provide model & dummy_input to trace + # the model or a already traced model + assert model is not None and dummy_input is not None + self.graph = TorchModuleGraph(model, dummy_input, traced_model) + self.dependency = dict() + self.build_dependency() + + def build_dependency(self): + raise NotImplementedError + + def export(self, filepath): + raise NotImplementedError + +class ChannelDependency(Dependency): def __init__(self, model=None, dummy_input=None, traced_model=None): """ This model analyze the channel dependencis between the conv @@ -29,13 +50,7 @@ def __init__(self, model=None, dummy_input=None, traced_model=None): if we alreay has the traced graph of the target model, we donnot need to trace the model again. """ - # check if the input is legal - if traced_model is None: - # user should provide model & dummy_input to trace the model or a already traced model - assert model is not None and dummy_input is not None - self.graph = TorchModuleGraph(model, dummy_input, traced_model) - self.dependency = dict() - self.build_channel_dependency() + super(ChannelDependency, self).__init__(model, dummy_input, traced_model) def _get_parent_layers(self, node): """ @@ -66,7 +81,7 @@ def _get_parent_layers(self, node): queue.append(parent) return parent_layers - def build_channel_dependency(self): + def build_dependency(self): """ Build the channel dependency for the conv layers in the model. @@ -119,7 +134,7 @@ def export(self, filepath): Set 2,layer1.0.conv1 Set 3,layer1.1.conv1 """ - header = ['Dependency Set', 'Convolutional Layers'] + header = ['Dependency Set', 'Layers'] setid = 0 visited = set() with open(filepath, 'w') as csvf: @@ -166,3 +181,200 @@ def dependency_sets(self): tmp_set.add(other) d_sets.append(tmp_set) return d_sets + +class CatPaddingDependency(ChannelDependency): + def __init__(self, model=None, dummy_input=None, traced_model=None): + super(CatPaddingDependency, self).__init__(model, dummy_input, traced_model) + + def build_dependency(self): + """ + Build the cat padding dependencies. + If the output features of several layers are stitched together + by cat operation, then these layers have cat padding dependencies. + This is because when inferring the cat mask, we need all the input + masks for the cat operation. At this time we need to know the source + of all input vectors of a cat operation. + """ + for node in self.graph.nodes_py.nodes_op: + parent_layers = [] + if node.op_type == CAT_TYPE: + parent_layers = self._get_parent_layers(node) + dependency_set = set(parent_layers) + # merge the dependencies + for parent in parent_layers: + if parent in self.dependency: + dependency_set.update(self.dependency[parent]) + # save the dependencies + for _node in dependency_set: + self.dependency[_node] = dependency_set + + @property + def dependency_sets(self): + d_sets = [] + visited = set() + for nodename in self.dependency: + if nodename in visited: + continue + d_sets.append(self.dependency[nodename]) + return d_sets + + def export(self, filepath): + """ + Export the dependencies into a file. + In the output file, each line contains a set of layers + whose output features are stitched together by the cat + operation. + + output example: + Dependency Set, Layers + set1, Conv1, Conv2 + set2, Conv3, Conv4 + """ + header = ['Dependency Set', 'Layers'] + setid = 0 + with open(filepath, 'w') as csvf: + csv_w = csv.writer(csvf, delimiter=',') + csv_w.writerow(header) + for layers in self.dependency_sets: + setid += 1 + row = ['Set %d' % setid] + row.extend(list(layers)) + csv_w.writerow(row) + +class GroupDependency(Dependency): + def __init__(self, model=None, dummy_input=None, traced_model=None): + """ + This model analyze the group dependencis between the conv + layers in a model. + + Parameters + ---------- + model : torch.nn.Module + The model to be analyzed. + data : torch.Tensor + The example input data to trace the network architecture. + traced_model : torch._C.Graph + if we alreay has the traced graph of the target model, we donnot + need to trace the model again. + """ + super(GroupDependency, self).__init__(model, dummy_input, traced_model) + + def _get_parent_convs(self, node): + """ + Find the nearest father conv layers for the target node. + + Parameters + --------- + node : torch._C.Node + target node. + + Returns + ------- + parent_layers : list + nearest father conv layers for the target node. Due to the group + dependency only exists between the conv layers, so we only find + the parent conv layers. + """ + parent_layers = [] + # the input node is a Conv node + predeessors = self.graph.find_predecessors(node.unique_name) + predeessors = [self.graph.name_to_node[x] for x in predeessors] + queue = predeessors + while queue: + curnode = queue.pop(0) + if curnode.op_type == 'Conv2d': + # find the first met conv + parent_layers.append(curnode.name) + continue + parents = self.graph.find_predecessors(curnode.unique_name) + parents = [self.graph.name_to_node[name] for name in parents] + for parent in parents: + queue.append(parent) + return parent_layers + + def _get_conv_groups(self, node_group): + """ + Get the number of groups for a convolutional layer. + + Parameters + ---------- + node_group : NodePyGroup + target node. + + Returns + ------- + group : int + the number of the groups of the target conv layer. + """ + cpp_conv = list(filter(lambda x: x.kind() == CONV_TYPE, node_group.node_cpps)) + assert len(cpp_conv) == 1 + cpp_conv = cpp_conv[0] + inputs = list(cpp_conv.inputs()) + # get the number of the group from the input parameters + group = inputs[8].toIValue() + return group + + def build_dependency(self): + """ + Build the channel dependency for the conv layers + in the model. This function return the group number + of each conv layers. Note that, here, the group count + of conv layers may be larger than their originl groups. + This is because that the input channel will also be grouped + for the group conv layers. To make this clear, assume we + have two group conv layers: conv1(group=2), conv2(group=4). + conv2 takes the output features of conv1 as input. + Then we have to the filters of conv1 can still be + divided into 4 groups after filter pruning, because + the input channels of conv2 shoule be divided into + 4 groups. + + Returns + ------- + self.dependency : dict + key: the name of conv layers, value: the minimum value that the number of + filters should be divisible to. + """ + for node in self.graph.nodes_py.nodes_op: + if node.op_type == 'Conv2d': + group = self._get_conv_groups(node) + if node.name in self.dependency: + # the conv layer whose group is larger than 1 will require that + # it's number of output channel to be divisible by the number of group. + self.dependency[node.name] = max(self.dependency[node.name], group) + else: + self.dependency[node.name] = group + if group > 1: + # for the conv layer whose group is larger than 1, it will require the number + # of output channels of their parent conv layer to be divisible by group. + parent_convs = self._get_parent_convs(node) + for parent in parent_convs: + if parent in self.dependency: + self.dependency[parent] = max(self.dependency[parent], group) + else: + self.dependency[parent] = group + return self.dependency + + def export(self, filepath): + """ + export the group dependency to a csv file. + Each line describes a convolution layer, the + first part of each line is the Pytorch module + name of the conv layer. The second part of each + line is the group count of the filters in this layer. + Note that, the group count may be larger than this + layers original group number. + + output example: + Conv layer, Groups + Conv1, 1 + Conv2, 2 + Conv3, 4 + """ + header = ['Conv Layer Name', 'Group'] + with open(filepath, 'w') as csvf: + csv_w = csv.writer(csvf, delimiter=',') + csv_w.writerow(header) + for name in self.dependency: + group = self.dependency[name] + csv_w.writerow([name, group]) diff --git a/src/sdk/pynni/tests/test_compression_utils.py b/src/sdk/pynni/tests/test_compression_utils.py index 803666a50c..90c88db573 100644 --- a/src/sdk/pynni/tests/test_compression_utils.py +++ b/src/sdk/pynni/tests/test_compression_utils.py @@ -11,13 +11,13 @@ from nni.compression.torch import L1FilterPruner from nni.compression.torch.utils.shape_dependency import ChannelDependency -from nni.compression.torch.utils.mask_conflict import MaskConflict +from nni.compression.torch.utils.mask_conflict import fix_mask_conflict device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') prefix = 'analysis_test' model_names = ['alexnet', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg19', 'resnet18', 'resnet34', 'squeezenet1_1', - 'shufflenet_v2_x1_0', 'mobilenet_v2', 'wide_resnet50_2'] + 'mobilenet_v2', 'wide_resnet50_2'] channel_dependency_ground_truth = { 'resnet18': [{'layer1.0.conv2', 'layer1.1.conv2', 'conv1'}, @@ -49,8 +49,12 @@ 'vgg13': [], 'vgg19': [], 'squeezenet1_1': [], - 'googlenet': [], - 'shufflenet_v2_x1_0': [] + 'googlenet': [] + # comments the shufflenet temporary + # because it has the listunpack operation which + # will lead to a graph construction error. + # support the listunpack in the next release. + # 'shufflenet_v2_x1_0': [] } unittest.TestLoader.sortTestMethodsUsing = None @@ -111,9 +115,8 @@ def test_mask_conflict(self): pruner.export_model(ck_file, mask_file) pruner._unwrap_model() # Fix the mask conflict - mf = MaskConflict(mask_file, net, dummy_input) - fixed_mask = mf.fix_mask_conflict() - mf.export(os.path.join(outdir, '%s_fixed_mask' % name)) + fixed_mask = fix_mask_conflict(mask_file, net, dummy_input) + # use the channel dependency groud truth to check if # fix the mask conflict successfully for dset in channel_dependency_ground_truth[name]: diff --git a/src/sdk/pynni/tests/test_model_speedup.py b/src/sdk/pynni/tests/test_model_speedup.py index e33bd70b10..a06f991c97 100644 --- a/src/sdk/pynni/tests/test_model_speedup.py +++ b/src/sdk/pynni/tests/test_model_speedup.py @@ -4,6 +4,7 @@ import os import numpy as np import torch +import torchvision.models as models import torch.nn as nn import torch.nn.functional as F from torchvision.models.vgg import vgg16 @@ -13,7 +14,17 @@ from nni.compression.torch import L1FilterPruner, apply_compression_results, ModelSpeedup torch.manual_seed(0) - +device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') +BATCH_SIZE = 2 +# the relative distance +RELATIVE_THRESHOLD = 0.01 +# Because of the precision of floating-point numbers, some errors +# between the original output tensors(without speedup) and the output +# tensors of the speedup model are normal. When the output tensor itself +# is small, such errors may exceed the relative threshold, so we also add +# an absolute threshold to determine whether the final result is correct. +# The error should meet the RELATIVE_THREHOLD or the ABSOLUTE_THRESHOLD. +ABSOLUTE_THRESHOLD = 0.0001 class BackboneModel1(nn.Module): def __init__(self): super().__init__() @@ -72,6 +83,27 @@ def prune_model_l1(model): pruner.compress() pruner.export_model(model_path=MODEL_FILE, mask_path=MASK_FILE) +def generate_random_sparsity(model): + cfg_list = [] + for name, module in model.named_modules(): + if isinstance(module, nn.Conv2d): + sparsity = np.random.uniform(0.5, 0.99) + cfg_list.append({'op_types': ['Conv2d'], 'op_names': [name], + 'sparsity': sparsity}) + return cfg_list + +def zero_bn_bias(model): + with torch.no_grad(): + for name, module in model.named_modules(): + if isinstance(module, nn.BatchNorm2d) \ + or isinstance(module, nn.BatchNorm3d) \ + or isinstance(module, nn.BatchNorm1d): + shape = module.bias.data.size() + device = module.bias.device + module.bias.data = torch.zeros(shape).to(device) + shape = module.running_mean.data.size() + module.running_mean = torch.zeros(shape).to(device) + class SpeedupTestCase(TestCase): def test_speedup_vgg16(self): prune_model_l1(vgg16()) @@ -85,10 +117,6 @@ def test_speedup_vgg16(self): assert model.features[2].out_channels == int(orig_model.features[2].out_channels * SPARSITY) assert model.classifier[0].in_features == int(orig_model.classifier[0].in_features * SPARSITY) - #def test_speedup_resnet(self): - #TODO support resnet - #model = resnet18() - def test_speedup_bigmodel(self): prune_model_l1(BigModel()) model = BigModel() @@ -116,6 +144,36 @@ def test_speedup_bigmodel(self): assert model.backbone2.conv2.out_channels == int(orig_model.backbone2.conv2.out_channels * SPARSITY) assert model.backbone2.fc1.in_features == int(orig_model.backbone2.fc1.in_features * SPARSITY) + def test_speedup_integration(self): + for model_name in ['resnet18', 'squeezenet1_1', 'mobilenet_v2']: + Model = getattr(models, model_name) + net = Model(pretrained=True, progress=False).to(device) + net.eval() # this line is necessary + # random generate the prune config for the pruner + cfgs = generate_random_sparsity(net) + pruner = L1FilterPruner(net, cfgs) + pruner.compress() + pruner.export_model(MODEL_FILE, MASK_FILE) + pruner._unwrap_model() + speedup_model = Model().to(device) + speedup_model.eval() + state_dict = torch.load(MODEL_FILE) + speedup_model.load_state_dict(state_dict) + zero_bn_bias(net) + zero_bn_bias(speedup_model) + + data = torch.ones(BATCH_SIZE, 3, 224, 224).to(device) + ms = ModelSpeedup(speedup_model, data, MASK_FILE) + ms.speedup_model() + ori_out = net(data) + speeded_out = speedup_model(data) + ori_sum = torch.sum(ori_out).item() + speeded_sum = torch.sum(speeded_out).item() + print('Sum of the output of %s (before speedup):'%model_name, ori_sum) + print('Sum of the output of %s (after speedup):'%model_name, speeded_sum) + assert (abs(ori_sum - speeded_sum) / abs(ori_sum) < RELATIVE_THRESHOLD) or \ + (abs(ori_sum - speeded_sum) < ABSOLUTE_THRESHOLD) + def tearDown(self): os.remove(MODEL_FILE) os.remove(MASK_FILE) From eee2f532c120f3ef3b3edfd709f1a61caea827a4 Mon Sep 17 00:00:00 2001 From: Ningxin Zheng <49771382+zheng-ningxin@users.noreply.github.com> Date: Sat, 27 Jun 2020 10:46:29 +0800 Subject: [PATCH 14/17] Add schema and tesorboard into the doc building dependency. (#2594) --- docs/requirements.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/requirements.txt b/docs/requirements.txt index 54e21d307c..8a976b3348 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -9,5 +9,7 @@ json_tricks numpy scipy coverage +schema +tensorboard scikit-learn==0.20 https://download.pytorch.org/whl/cpu/torch-1.3.1%2Bcpu-cp37-cp37m-linux_x86_64.whl From 9a1fb17bd46c30110ebeadecda7d2adf9c26b289 Mon Sep 17 00:00:00 2001 From: QuanluZhang Date: Sat, 27 Jun 2020 12:39:31 +0800 Subject: [PATCH 15/17] support tf2 NAS with non-weight-sharing mode (#2541) --- docs/en_US/NAS/ClassicNas.md | 33 +++ docs/en_US/NAS/NasGuide.md | 113 +-------- docs/en_US/NAS/Overview.md | 54 +++-- docs/en_US/NAS/QuickStart.md | 66 ------ docs/en_US/NAS/WriteSearchSpace.md | 66 ++++++ docs/en_US/NAS/one_shot_nas.rst | 17 ++ docs/en_US/Tuner/BuiltinTuner.md | 2 +- docs/en_US/nas.rst | 12 +- examples/nas/classic_nas-tf/config_ppo.yml | 18 ++ .../classic_nas-tf/config_random_search.yml | 18 ++ examples/nas/classic_nas-tf/train.py | 130 +++++++++++ ...onfig_nas.yml => config_random_search.yml} | 0 .../nas/tensorflow/classic_nas/__init__.py | 4 + .../nni/nas/tensorflow/classic_nas/mutator.py | 215 ++++++++++++++++++ src/sdk/pynni/nni/nas/tensorflow/mutables.py | 13 ++ src/sdk/pynni/nni/ppo_tuner/distri.py | 3 +- src/sdk/pynni/nni/ppo_tuner/model.py | 3 +- src/sdk/pynni/nni/ppo_tuner/policy.py | 3 +- src/sdk/pynni/nni/ppo_tuner/util.py | 3 +- test/config/examples/classic-nas-pytorch.yml | 21 ++ test/config/examples/classic-nas-tf2.yml | 21 ++ test/config/integration_tests.yml | 17 ++ test/config/integration_tests_tf2.yml | 18 ++ test/nni_test/nnitest/run_tests.py | 4 + test/scripts/nas.sh | 2 +- 25 files changed, 649 insertions(+), 207 deletions(-) create mode 100644 docs/en_US/NAS/ClassicNas.md delete mode 100644 docs/en_US/NAS/QuickStart.md create mode 100644 docs/en_US/NAS/WriteSearchSpace.md create mode 100644 docs/en_US/NAS/one_shot_nas.rst create mode 100644 examples/nas/classic_nas-tf/config_ppo.yml create mode 100644 examples/nas/classic_nas-tf/config_random_search.yml create mode 100644 examples/nas/classic_nas-tf/train.py rename examples/nas/classic_nas/{config_nas.yml => config_random_search.yml} (100%) create mode 100644 src/sdk/pynni/nni/nas/tensorflow/classic_nas/__init__.py create mode 100644 src/sdk/pynni/nni/nas/tensorflow/classic_nas/mutator.py create mode 100644 test/config/examples/classic-nas-pytorch.yml create mode 100644 test/config/examples/classic-nas-tf2.yml diff --git a/docs/en_US/NAS/ClassicNas.md b/docs/en_US/NAS/ClassicNas.md new file mode 100644 index 0000000000..6185d48d32 --- /dev/null +++ b/docs/en_US/NAS/ClassicNas.md @@ -0,0 +1,33 @@ +# Classic NAS Algorithms + +In classic NAS algorithms, each architecture is trained as a trial and the NAS algorithm acts as a tuner. Thus, this training mode naturally fits within the NNI hyper-parameter tuning framework, where Tuner generates new architecture for the next trial and trials run in the training service. + +## Quick Start + +The following example shows how to use classic NAS algorithms. You can see it is quite similar to NNI hyper-parameter tuning. + +```python +model = Net() + +# get the chosen architecture from tuner and apply it on model +get_and_apply_next_architecture(model) +train(model) # your code for training the model +acc = test(model) # test the trained model +nni.report_final_result(acc) # report the performance of the chosen architecture +``` + +First, instantiate the model. Search space has been defined in this model through `LayerChoice` and `InputChoice`. After that, user should invoke `get_and_apply_next_architecture(model)` to settle down to a specific architecture. This function receives the architecture from tuner (i.e., the classic NAS algorithm) and applies the architecture to `model`. At this point, `model` becomes a specific architecture rather than a search space. Then users are free to train this model just like training a normal PyTorch model. After get the accuracy of this model, users should invoke `nni.report_final_result(acc)` to report the result to the tuner. + +At this point, trial code is ready. Then, we can prepare an NNI experiment, i.e., search space file and experiment config file. Different from NNI hyper-parameter tuning, search space file is automatically generated from the trial code by running the command (the detailed usage of this command can be found [here](../Tutorial/Nnictl.md)): + +`nnictl ss_gen --trial_command="the command for running your trial code"` + +A file named `nni_auto_gen_search_space.json` is generated by this command. Then put the path of the generated search space in the field `searchSpacePath` of the experiment config file. The other fields of the config file can be filled by referring [this tutorial](../Tutorial/QuickStart.md). + +Currently, we only support [PPO Tuner](../Tuner/BuiltinTuner.md) and [random tuner](https://github.com/microsoft/nni/tree/master/examples/tuners/random_nas_tuner) for classic NAS. More classic NAS algorithms will be supported soon. + +The complete examples can be found [here](https://github.com/microsoft/nni/tree/master/examples/nas/classic_nas) for PyTorch and [here](https://github.com/microsoft/nni/tree/master/examples/nas/classic_nas-tf) for TensorFlow. + +## Standalone mode for easy debugging + +We support a standalone mode for easy debugging, where you can directly run the trial command without launching an NNI experiment. This is for checking whether your trial code can correctly run. The first candidate(s) are chosen for `LayerChoice` and `InputChoice` in this standalone mode. \ No newline at end of file diff --git a/docs/en_US/NAS/NasGuide.md b/docs/en_US/NAS/NasGuide.md index 4af65e309a..ffcdadb2d4 100644 --- a/docs/en_US/NAS/NasGuide.md +++ b/docs/en_US/NAS/NasGuide.md @@ -1,85 +1,10 @@ -# Guide: Using NAS on NNI +# One-shot NAS algorithms -```eval_rst -.. contents:: - -.. Note:: The APIs are in an experimental stage. The current programing interface is subject to change. -``` - -![](../../img/nas_abstract_illustration.png) - -Modern Neural Architecture Search (NAS) methods usually incorporate [three dimensions][1]: search space, search strategy, and performance estimation strategy. Search space often contains a limited number of neural network architectures to explore, while the search strategy samples architectures from search space, gets estimations of their performance, and evolves itself. Ideally, the search strategy should find the best architecture in the search space and report it to users. After users obtain the "best architecture", many methods use a "retrain step", which trains the network with the same pipeline as any traditional model. - -## Implement a Search Space - -Assuming we've got a baseline model, what should we do to be empowered with NAS? Take [MNIST on PyTorch](https://github.com/pytorch/examples/blob/master/mnist/main.py) as an example, the code might look like this: - -```python -from nni.nas.pytorch import mutables - -class Net(nn.Module): - def __init__(self): - super(Net, self).__init__() - self.conv1 = mutables.LayerChoice([ - nn.Conv2d(1, 32, 3, 1), - nn.Conv2d(1, 32, 5, 3) - ]) # try 3x3 kernel and 5x5 kernel - self.conv2 = nn.Conv2d(32, 64, 3, 1) - self.dropout1 = nn.Dropout2d(0.25) - self.dropout2 = nn.Dropout2d(0.5) - self.fc1 = nn.Linear(9216, 128) - self.fc2 = nn.Linear(128, 10) - - def forward(self, x): - x = self.conv1(x) - x = F.relu(x) - # ... same as original ... - return output -``` - -The example above adds an option of choosing conv5x5 at conv1. The modification is as simple as declaring a `LayerChoice` with the original conv3x3 and a new conv5x5 as its parameter. That's it! You don't have to modify the forward function in any way. You can imagine conv1 as any other module without NAS. - -So how about the possibilities of connections? This can be done using `InputChoice`. To allow for a skip connection on the MNIST example, we add another layer called conv3. In the following example, a possible connection from conv2 is added to the output of conv3. - -```python -from nni.nas.pytorch import mutables - -class Net(nn.Module): - def __init__(self): - # ... same ... - self.conv2 = nn.Conv2d(32, 64, 3, 1) - self.conv3 = nn.Conv2d(64, 64, 1, 1) - # declaring that there is exactly one candidate to choose from - # search strategy will choose one or None - self.skipcon = mutables.InputChoice(n_candidates=1) - # ... same ... - - def forward(self, x): - x = self.conv1(x) - x = F.relu(x) - x = self.conv2(x) - x0 = self.skipcon([x]) # choose one or none from [x] - x = self.conv3(x) - if x0 is not None: # skipconnection is open - x += x0 - x = F.max_pool2d(x, 2) - # ... same ... - return output -``` - -Input choice can be thought of as a callable module that receives a list of tensors and outputs the concatenation/sum/mean of some of them (sum by default), or `None` if none is selected. Like layer choices, input choices should be **initialized in `__init__` and called in `forward`**. We will see later that this is to allow search algorithms to identify these choices and do necessary preparations. - -`LayerChoice` and `InputChoice` are both **mutables**. Mutable means "changeable". As opposed to traditional deep learning layers/modules which have fixed operation types once defined, models with mutable are essentially a series of possible models. - -Users can specify a **key** for each mutable. By default, NNI will assign one for you that is globally unique, but in case users want to share choices (for example, there are two `LayerChoice`s with the same candidate operations and you want them to have the same choice, i.e., if first one chooses the i-th op, the second one also chooses the i-th op), they can give them the same key. The key marks the identity for this choice and will be used in the dumped checkpoint. So if you want to increase the readability of your exported architecture, manually assigning keys to each mutable would be a good idea. For advanced usage on mutables, see [Mutables](./NasReference.md). - -## Use a Search Algorithm - -Aside from using a search space, there are at least two other ways users can do search. One runs NAS distributedly, which can be as naive as enumerating all the architectures and training each one from scratch, or can involve leveraging more advanced technique, such as [SMASH][8], [ENAS][2], [DARTS][1], [FBNet][3], [ProxylessNAS][4], [SPOS][5], [Single-Path NAS][6], [Understanding One-shot][7] and [GDAS][9]. Since training many different architectures is known to be expensive, another family of methods, called one-shot NAS, builds a supernet containing every candidate in the search space as its subnetwork, and in each step, a subnetwork or combination of several subnetworks is trained. +Besides [classic NAS algorithms](./ClassicNas.md), users also apply more advanced one-shot NAS algorithms to find better models from a search space. There are lots of related works about one-shot NAS algorithms, such as [SMASH][8], [ENAS][2], [DARTS][1], [FBNet][3], [ProxylessNAS][4], [SPOS][5], [Single-Path NAS][6], [Understanding One-shot][7] and [GDAS][9]. One-shot NAS algorithms usually build a supernet containing every candidate in the search space as its subnetwork, and in each step, a subnetwork or combination of several subnetworks is trained. Currently, several one-shot NAS methods are supported on NNI. For example, `DartsTrainer`, which uses SGD to train architecture weights and model weights iteratively, and `ENASTrainer`, which [uses a controller to train the model][2]. New and more efficient NAS trainers keep emerging in research community and some will be implemented in future releases of NNI. -### One-Shot NAS +## Search with One-shot NAS Algorithms Each one-shot NAS algorithm implements a trainer, for which users can find usage details in the description of each algorithm. Here is a simple example, demonstrating how users can use `EnasTrainer`. @@ -100,7 +25,7 @@ def top1_accuracy(output, target): def metrics_fn(output, target): # metrics function receives output and target and computes a dict of metrics - return {"acc1": reward_accuracy(output, target)} + return {"acc1": top1_accuracy(output, target)} from nni.nas.pytorch import enas trainer = enas.EnasTrainer(model, @@ -117,35 +42,13 @@ trainer.train() # training trainer.export(file="model_dir/final_architecture.json") # export the final architecture to file ``` -Users can directly run their training file through `python3 train.py` without `nnictl`. After training, users can export the best one of the found models through `trainer.export()`. - -Normally, the trainer exposes a few arguments that you can customize. For example, the loss function, the metrics function, the optimizer, and the datasets. These should satisfy most usages needs and we do our best to make sure our built-in trainers work on as many models, tasks, and datasets as possible. But there is no guarantee. For example, some trainers have the assumption that the task is a classification task; some trainers might have a different definition of "epoch" (e.g., an ENAS epoch = some child steps + some controller steps); most trainers do not have support for distributed training: they won't wrap your model with `DataParallel` or `DistributedDataParallel` to do that. So after a few tryouts, if you want to actually use the trainers on your very customized applications, you might need to [customize your trainer](./Advanced.md#extend-the-ability-of-one-shot-trainers). - -Furthermore, one-shot NAS can be visualized with our NAS UI. [See more details.](./Visualization.md) - -### Distributed NAS - -Neural architecture search was originally executed by running each child model independently as a trial job. We also support this searching approach, and it naturally fits within the NNI hyper-parameter tuning framework, where Tuner generates child models for the next trial and trials run in the training service. - -To use this mode, there is no need to change the search space expressed with the NNI NAS API (i.e., `LayerChoice`, `InputChoice`, `MutableScope`). After the model is initialized, apply the function `get_and_apply_next_architecture` on the model. One-shot NAS trainers are not used in this mode. Here is a simple example: - -```python -model = Net() - -# get the chosen architecture from tuner and apply it on model -get_and_apply_next_architecture(model) -train(model) # your code for training the model -acc = test(model) # test the trained model -nni.report_final_result(acc) # report the performance of the chosen architecture -``` - -The search space should be generated and sent to Tuner. As with the NNI NAS API, the search space is embedded in the user code. Users can use "[nnictl ss_gen](../Tutorial/Nnictl.md)" to generate the search space file. Then put the path of the generated search space in the field `searchSpacePath` of `config.yml`. The other fields in `config.yml` can be filled by referring [this tutorial](../Tutorial/QuickStart.md). +`model` is the one with [user defined search space](./WriteSearchSpace.md). Then users should prepare training data and model evaluation metrics. To search from the defined search space, a one-shot algorithm is instantiated, called trainer (e.g., EnasTrainer). The trainer exposes a few arguments that you can customize. For example, the loss function, the metrics function, the optimizer, and the datasets. These should satisfy most usage requirements and we do our best to make sure our built-in trainers work on as many models, tasks, and datasets as possible. -You can use the [NNI tuners](../Tuner/BuiltinTuner.md) to do the search. Currently, only PPO Tuner supports NAS search spaces. +**Note that** when using one-shot NAS algorithms, there is no need to start an NNI experiment. Users can directly run this Python script (i.e., `train.py`) through `python3 train.py` without `nnictl`. After training, users can export the best one of the found models through `trainer.export()`. -We support a standalone mode for easy debugging, where you can directly run the trial command without launching an NNI experiment. This is for checking whether your trial code can correctly run. The first candidate(s) are chosen for `LayerChoice` and `InputChoice` in this standalone mode. +Each trainer in NNI has its targeted scenario and usage. Some trainers have the assumption that the task is a classification task; some trainers might have a different definition of "epoch" (e.g., an ENAS epoch = some child steps + some controller steps). Most trainers do not have support for distributed training: they won't wrap your model with `DataParallel` or `DistributedDataParallel` to do that. So after a few tryouts, if you want to actually use the trainers on your very customized applications, you might need to [customize your trainer](./Advanced.md#extend-the-ability-of-one-shot-trainers). -A complete example can be found [here](https://github.com/microsoft/nni/tree/master/examples/nas/classic_nas/config_nas.yml). +Furthermore, one-shot NAS can be visualized with our NAS UI. [See more details.](./Visualization.md) ### Retrain with Exported Architecture diff --git a/docs/en_US/NAS/Overview.md b/docs/en_US/NAS/Overview.md index 8fac582656..adf1491928 100644 --- a/docs/en_US/NAS/Overview.md +++ b/docs/en_US/NAS/Overview.md @@ -1,28 +1,47 @@ # Neural Architecture Search (NAS) on NNI +```eval_rst +.. contents:: +``` + +## Overview + Automatic neural architecture search is taking an increasingly important role in finding better models. Recent research has proved the feasibility of automatic NAS and has lead to models that beat many manually designed and tuned models. Some representative works are [NASNet][2], [ENAS][1], [DARTS][3], [Network Morphism][4], and [Evolution][5]. Further, new innovations keep emerging. However, it takes a great effort to implement NAS algorithms, and it's hard to reuse the code base of existing algorithms for new ones. To facilitate NAS innovations (e.g., the design and implementation of new NAS models, the comparison of different NAS models side-by-side, etc.), an easy-to-use and flexible programming interface is crucial. With this motivation, our ambition is to provide a unified architecture in NNI, accelerate innovations on NAS, and apply state-of-the-art algorithms to real-world problems faster. -With the unified interface, there are two different modes for architecture search. [One](#supported-one-shot-nas-algorithms) is the so-called one-shot NAS, where a super-net is built based on a search space and one-shot training is used to generate a good-performing child model. [The other](#supported-distributed-nas-algorithms) is the traditional search-based approach, where each child model within the search space runs as an independent trial. The performance result is then sent to Tuner and the tuner generates a new child model. +With the unified interface, there are two different modes for architecture search. [One](#supported-one-shot-nas-algorithms) is the so-called one-shot NAS, where a super-net is built based on a search space and one-shot training is used to generate a good-performing child model. [The other](#supported-classic-nas-algorithms) is the traditional search-based approach, where each child model within the search space runs as an independent trial. We call it classic NAS. + +NNI also provides dedicated [visualization tool](#nas-visualization) for users to check the status of the neural architecture search process. + +## Supported Classic NAS Algorithms + +The procedure of classic NAS algorithms is similar to hyper-parameter tuning, users use `nnictl` to start experiments and each model runs as a trial. The difference is that search space file is automatically generated from user model (with search space in it) by running `nnictl ss_gen`. The following table listed supported tuning algorihtms for classic NAS mode. More algorihtms will be supported in future release. + +|Name|Brief Introduction of Algorithm| +|---|---| +| [Random Search](https://github.com/microsoft/nni/tree/master/examples/tuners/random_nas_tuner) | Randomly pick a model from search space | +| [PPO Tuner](https://nni.readthedocs.io/en/latest/Tuner/BuiltinTuner.html#PPOTuner) | PPO Tuner is a Reinforcement Learning tuner based on PPO algorithm. [Reference Paper](https://arxiv.org/abs/1707.06347) | + +Please refer to [here](ClassicNas.md) for the usage of classic NAS algorithms. ## Supported One-shot NAS Algorithms -NNI currently supports the NAS algorithms listed below and is adding more. Users can reproduce an algorithm or use it on their own dataset. We also encourage users to implement other algorithms with [NNI API](#use-nni-api), to benefit more people. +NNI currently supports the one-shot NAS algorithms listed below and is adding more. Users can reproduce an algorithm or use it on their own dataset. We also encourage users to implement other algorithms with [NNI API](#use-nni-api), to benefit more people. |Name|Brief Introduction of Algorithm| |---|---| -| [ENAS](ENAS.md) | [Efficient Neural Architecture Search via Parameter Sharing](https://arxiv.org/abs/1802.03268). In ENAS, a controller learns to discover neural network architectures by searching for an optimal subgraph within a large computational graph. It uses parameter sharing between child models to achieve fast speed and excellent performance. | -| [DARTS](DARTS.md) | [DARTS: Differentiable Architecture Search](https://arxiv.org/abs/1806.09055) introduces a novel algorithm for differentiable network architecture search on bilevel optimization. | -| [P-DARTS](PDARTS.md) | [Progressive Differentiable Architecture Search: Bridging the Depth Gap between Search and Evaluation](https://arxiv.org/abs/1904.12760) is based on DARTS. It introduces an efficient algorithm which allows the depth of searched architectures to grow gradually during the training procedure. | -| [SPOS](SPOS.md) | [Single Path One-Shot Neural Architecture Search with Uniform Sampling](https://arxiv.org/abs/1904.00420) constructs a simplified supernet trained with a uniform path sampling method and applies an evolutionary algorithm to efficiently search for the best-performing architectures. | -| [CDARTS](CDARTS.md) | [Cyclic Differentiable Architecture Search](https://arxiv.org/abs/****) builds a cyclic feedback mechanism between the search and evaluation networks. It introduces a cyclic differentiable architecture search framework which integrates the two networks into a unified architecture.| -| [ProxylessNAS](Proxylessnas.md) | [ProxylessNAS: Direct Neural Architecture Search on Target Task and Hardware](https://arxiv.org/abs/1812.00332). It removes proxy, directly learns the architectures for large-scale target tasks and target hardware platforms. | -| [TextNAS](TextNAS.md) | [TextNAS: A Neural Architecture Search Space tailored for Text Representation](https://arxiv.org/pdf/1912.10729.pdf). It is a neural architecture search algorithm tailored for text representation. | +| [ENAS](https://nni.readthedocs.io/en/latest/NAS/ENAS.html) | [Efficient Neural Architecture Search via Parameter Sharing](https://arxiv.org/abs/1802.03268). In ENAS, a controller learns to discover neural network architectures by searching for an optimal subgraph within a large computational graph. It uses parameter sharing between child models to achieve fast speed and excellent performance. | +| [DARTS](https://nni.readthedocs.io/en/latest/NAS/DARTS.html) | [DARTS: Differentiable Architecture Search](https://arxiv.org/abs/1806.09055) introduces a novel algorithm for differentiable network architecture search on bilevel optimization. | +| [P-DARTS](https://nni.readthedocs.io/en/latest/NAS/PDARTS.html) | [Progressive Differentiable Architecture Search: Bridging the Depth Gap between Search and Evaluation](https://arxiv.org/abs/1904.12760) is based on DARTS. It introduces an efficient algorithm which allows the depth of searched architectures to grow gradually during the training procedure. | +| [SPOS](https://nni.readthedocs.io/en/latest/NAS/SPOS.html) | [Single Path One-Shot Neural Architecture Search with Uniform Sampling](https://arxiv.org/abs/1904.00420) constructs a simplified supernet trained with a uniform path sampling method and applies an evolutionary algorithm to efficiently search for the best-performing architectures. | +| [CDARTS](https://nni.readthedocs.io/en/latest/NAS/CDARTS.html) | [Cyclic Differentiable Architecture Search](https://arxiv.org/abs/****) builds a cyclic feedback mechanism between the search and evaluation networks. It introduces a cyclic differentiable architecture search framework which integrates the two networks into a unified architecture.| +| [ProxylessNAS](https://nni.readthedocs.io/en/latest/NAS/Proxylessnas.html) | [ProxylessNAS: Direct Neural Architecture Search on Target Task and Hardware](https://arxiv.org/abs/1812.00332). It removes proxy, directly learns the architectures for large-scale target tasks and target hardware platforms. | +| [TextNAS](https://nni.readthedocs.io/en/latest/NAS/TextNAS.html) | [TextNAS: A Neural Architecture Search Space tailored for Text Representation](https://arxiv.org/pdf/1912.10729.pdf). It is a neural architecture search algorithm tailored for text representation. | -One-shot algorithms run **standalone without nnictl**. Only the PyTorch version has been implemented. Tensorflow 2.x will be supported in a future release. +One-shot algorithms run **standalone without nnictl**. NNI supports both PyTorch and Tensorflow 2.X. Here are some common dependencies to run the examples. PyTorch needs to be above 1.2 to use ``BoolTensor``. @@ -30,26 +49,19 @@ Here are some common dependencies to run the examples. PyTorch needs to be above * PyTorch 1.2+ * git -One-shot NAS can be visualized with our visualization tool. Learn more details [here](./Visualization.md). - -## Supported Distributed NAS Algorithms +Please refer to [here](NasGuide.md) for the usage of one-shot NAS algorithms. -|Name|Brief Introduction of Algorithm| -|---|---| -| [SPOS's 2nd stage](SPOS.md) | [Single Path One-Shot Neural Architecture Search with Uniform Sampling](https://arxiv.org/abs/1904.00420) constructs a simplified supernet trained with a uniform path sampling method, and applies an evolutionary algorithm to efficiently search for the best-performing architectures.| +One-shot NAS can be visualized with our visualization tool. Learn more details [here](./Visualization.md). -```eval_rst -.. Note:: SPOS is a two-stage algorithm, whose first stage is one-shot and the second stage is distributed, leveraging the result of the first stage as a checkpoint. -``` -## Using the NNI API +## Using NNI API to Write Your Search Space The programming interface of designing and searching a model is often demanded in two scenarios. 1. When designing a neural network, there may be multiple operation choices on a layer, sub-model, or connection, and it's undetermined which one or combination performs best. So, it needs an easy way to express the candidate layers or sub-models. 2. When applying NAS on a neural network, it needs a unified way to express the search space of architectures, so that it doesn't need to update trial code for different search algorithms. -[Here](./NasGuide.md) is the user guide to get started with using NAS on NNI. +For using NNI NAS, we suggest users to first go through [the tutorial of NAS API for building search space](./WriteSearchSpace.md). ## NAS Visualization diff --git a/docs/en_US/NAS/QuickStart.md b/docs/en_US/NAS/QuickStart.md deleted file mode 100644 index d8c77a7950..0000000000 --- a/docs/en_US/NAS/QuickStart.md +++ /dev/null @@ -1,66 +0,0 @@ -# NAS Quick Start - -The NAS feature provided by NNI has two key components: APIs for expressing the search space and NAS training approaches. The former is for users to easily specify a class of models (i.e., the candidate models specified by the search space) which may perform well. The latter is for users to easily apply state-of-the-art NAS training approaches on their own model. - -Here we use a simple example to demonstrate how to tune your model architecture with the NNI NAS APIs step by step. The complete code of this example can be found [here](https://github.com/microsoft/nni/tree/master/examples/nas/naive). - -## Write your model with NAS APIs - -Instead of writing a concrete neural model, you can write a class of neural models using two of the NAS APIs library functions, `LayerChoice` and `InputChoice`. For example, if you think either of two options might work in the first convolution layer, then you can get one from them using `LayerChoice` as shown by `self.conv1` in the code. Similarly, the second convolution layer `self.conv2` also chooses one from two options. To this line, four candidate neural networks are specified. `self.skipconnect` uses `InputChoice` to specify two choices, adding a skip connection or not. - -```python -import torch.nn as nn -from nni.nas.pytorch.mutables import LayerChoice, InputChoice - -class Net(nn.Module): - def __init__(self): - super(Net, self).__init__() - self.conv1 = LayerChoice([nn.Conv2d(3, 6, 3, padding=1), nn.Conv2d(3, 6, 5, padding=2)]) - self.pool = nn.MaxPool2d(2, 2) - self.conv2 = LayerChoice([nn.Conv2d(6, 16, 3, padding=1), nn.Conv2d(6, 16, 5, padding=2)]) - self.conv3 = nn.Conv2d(16, 16, 1) - - self.skipconnect = InputChoice(n_candidates=1) - self.bn = nn.BatchNorm2d(16) - - self.gap = nn.AdaptiveAvgPool2d(4) - self.fc1 = nn.Linear(16 * 4 * 4, 120) - self.fc2 = nn.Linear(120, 84) - self.fc3 = nn.Linear(84, 10) -``` - -For a detailed description of `LayerChoice` and `InputChoice`, please refer to [the NAS guide](NasGuide.md) - -## Choose a NAS trainer - -After the model is instantiated, it is time to train the model using a NAS trainer. Different trainers use different approaches to search for the best one from a class of neural models that you specified. NNI provides several popular NAS training approaches such as DARTS and ENAS. Here we use `DartsTrainer` in the example below. After the trainer is instantiated, invoke `trainer.train()` to do the search. - -```python -trainer = DartsTrainer(net, - loss=criterion, - metrics=accuracy, - optimizer=optimizer, - num_epochs=2, - dataset_train=dataset_train, - dataset_valid=dataset_valid, - batch_size=64, - log_frequency=10) -trainer.train() -``` - -## Export the best model - -After the search (i.e., `trainer.train()`) is done, to get the best performing model we simply call `trainer.export("final_arch.json")` to export the found neural architecture to a file. - -## NAS visualization - -We are working on NAS visualization and will release this feature soon. - -## Retrain the exported best model - -It is simple to retrain the found (exported) neural architecture. Step one, instantiate the model you defined above. Step two, invoke `apply_fixed_architecture` to the model. Then the model becomes the found (exported) one. Afterward, you can use traditional training to train this model. - -```python -model = Net() -apply_fixed_architecture(model, "final_arch.json") -``` diff --git a/docs/en_US/NAS/WriteSearchSpace.md b/docs/en_US/NAS/WriteSearchSpace.md new file mode 100644 index 0000000000..c466e476c1 --- /dev/null +++ b/docs/en_US/NAS/WriteSearchSpace.md @@ -0,0 +1,66 @@ +# Write A Search Space + +Genrally, a search space describes candiate architectures from which users want to find the best one. Different search algorithms, no matter classic NAS or one-shot NAS, can be applied on the search space. NNI provides APIs to unified the expression of neural architecture search space. + +A search space can be built on a base model. This is also a common practice when a user wants to apply NAS on an existing model. Take [MNIST on PyTorch](https://github.com/pytorch/examples/blob/master/mnist/main.py) as an example. Note that NNI provides the same APIs for expressing search space on PyTorch and TensorFlow. + +```python +from nni.nas.pytorch import mutables + +class Net(nn.Module): + def __init__(self): + super(Net, self).__init__() + self.conv1 = mutables.LayerChoice([ + nn.Conv2d(1, 32, 3, 1), + nn.Conv2d(1, 32, 5, 3) + ]) # try 3x3 kernel and 5x5 kernel + self.conv2 = nn.Conv2d(32, 64, 3, 1) + self.dropout1 = nn.Dropout2d(0.25) + self.dropout2 = nn.Dropout2d(0.5) + self.fc1 = nn.Linear(9216, 128) + self.fc2 = nn.Linear(128, 10) + + def forward(self, x): + x = self.conv1(x) + x = F.relu(x) + # ... same as original ... + return output +``` + +The example above adds an option of choosing conv5x5 at conv1. The modification is as simple as declaring a `LayerChoice` with the original conv3x3 and a new conv5x5 as its parameter. That's it! You don't have to modify the forward function in any way. You can imagine conv1 as any other module without NAS. + +So how about the possibilities of connections? This can be done using `InputChoice`. To allow for a skip connection on the MNIST example, we add another layer called conv3. In the following example, a possible connection from conv2 is added to the output of conv3. + +```python +from nni.nas.pytorch import mutables + +class Net(nn.Module): + def __init__(self): + # ... same ... + self.conv2 = nn.Conv2d(32, 64, 3, 1) + self.conv3 = nn.Conv2d(64, 64, 1, 1) + # declaring that there is exactly one candidate to choose from + # search strategy will choose one or None + self.skipcon = mutables.InputChoice(n_candidates=1) + # ... same ... + + def forward(self, x): + x = self.conv1(x) + x = F.relu(x) + x = self.conv2(x) + x0 = self.skipcon([x]) # choose one or none from [x] + x = self.conv3(x) + if x0 is not None: # skipconnection is open + x += x0 + x = F.max_pool2d(x, 2) + # ... same ... + return output +``` + +Input choice can be thought of as a callable module that receives a list of tensors and outputs the concatenation/sum/mean of some of them (sum by default), or `None` if none is selected. Like layer choices, input choices should be **initialized in `__init__` and called in `forward`**. This is to allow search algorithms to identify these choices and do necessary preparations. + +`LayerChoice` and `InputChoice` are both **mutables**. Mutable means "changeable". As opposed to traditional deep learning layers/modules which have fixed operation types once defined, models with mutable are essentially a series of possible models. + +Users can specify a **key** for each mutable. By default, NNI will assign one for you that is globally unique, but in case users want to share choices (for example, there are two `LayerChoice`s with the same candidate operations and you want them to have the same choice, i.e., if first one chooses the i-th op, the second one also chooses the i-th op), they can give them the same key. The key marks the identity for this choice and will be used in the dumped checkpoint. So if you want to increase the readability of your exported architecture, manually assigning keys to each mutable would be a good idea. For advanced usage on mutables (e.g., `LayerChoice` and `InputChoice`), see [Mutables](./NasReference.md). + +With search space defined, the next step is searching for the best model from it. Please refer to [classic NAS algorithms](./ClassicNas.md) and [one-shot NAS algorithms](./NasGuide.md) for how to search from your defined search space. \ No newline at end of file diff --git a/docs/en_US/NAS/one_shot_nas.rst b/docs/en_US/NAS/one_shot_nas.rst new file mode 100644 index 0000000000..cc7fa688b6 --- /dev/null +++ b/docs/en_US/NAS/one_shot_nas.rst @@ -0,0 +1,17 @@ +One-shot NAS Algorithms +======================= + +One-shot NAS algorithms leverage weight sharing among models in neural architecture search space to train a supernet, and use this supernet to guide the selection of better models. This type of algorihtms greatly reduces computational resource compared to independently training each model from scratch (which we call "Classic NAS"). NNI has supported many popular One-shot NAS algorithms as following. + + +.. toctree:: + :maxdepth: 1 + + Quick Start + ENAS + DARTS + P-DARTS + SPOS + CDARTS + ProxylessNAS + TextNAS \ No newline at end of file diff --git a/docs/en_US/Tuner/BuiltinTuner.md b/docs/en_US/Tuner/BuiltinTuner.md index 2cff5cded3..516b5c2adc 100644 --- a/docs/en_US/Tuner/BuiltinTuner.md +++ b/docs/en_US/Tuner/BuiltinTuner.md @@ -421,7 +421,7 @@ tuner: > Built-in Tuner Name: **PPOTuner** -Note that the only acceptable types within the search space is `mutable_layer`. `optional_input_size` can only be 0, 1, or [0, 1]. +Note that the only acceptable types within the search space are `layer_choice` and `input_choice`. For `input_choice`, `n_chosen` can only be 0, 1, or [0, 1]. Note, the search space file for NAS is usually automatically generated through the command [`nnictl ss_gen`](../Tutorial/Nnictl.md). **Suggested scenario** diff --git a/docs/en_US/nas.rst b/docs/en_US/nas.rst index cf2aa7f81a..c40224f681 100644 --- a/docs/en_US/nas.rst +++ b/docs/en_US/nas.rst @@ -18,15 +18,9 @@ For details, please refer to the following tutorials: :maxdepth: 2 Overview - Quick Start - Tutorial - ENAS - DARTS - P-DARTS - SPOS - CDARTS - ProxylessNAS - TextNAS + Write A Search Space + Classic NAS + One-shot NAS Customize a NAS Algorithm NAS Visualization API Reference diff --git a/examples/nas/classic_nas-tf/config_ppo.yml b/examples/nas/classic_nas-tf/config_ppo.yml new file mode 100644 index 0000000000..8725d20a0b --- /dev/null +++ b/examples/nas/classic_nas-tf/config_ppo.yml @@ -0,0 +1,18 @@ +authorName: default +experimentName: example_mnist +trialConcurrency: 1 +maxExecDuration: 100h +maxTrialNum: 1000 +#choice: local, remote, pai +trainingServicePlatform: local +#please use `nnictl ss_gen` to generate search space file first +searchSpacePath: nni_auto_gen_search_space.json +useAnnotation: False +tuner: + builtinTunerName: PPOTuner + classArgs: + optimize_mode: maximize +trial: + command: python3 train.py + codeDir: . + gpuNum: 0 diff --git a/examples/nas/classic_nas-tf/config_random_search.yml b/examples/nas/classic_nas-tf/config_random_search.yml new file mode 100644 index 0000000000..b7a04eb8ba --- /dev/null +++ b/examples/nas/classic_nas-tf/config_random_search.yml @@ -0,0 +1,18 @@ +authorName: default +experimentName: example_mnist +trialConcurrency: 1 +maxExecDuration: 1h +maxTrialNum: 10 +#choice: local, remote, pai +trainingServicePlatform: local +#please use `nnictl ss_gen` to generate search space file first +searchSpacePath: nni_auto_gen_search_space.json +useAnnotation: False +tuner: + codeDir: ../../tuners/random_nas_tuner + classFileName: random_nas_tuner.py + className: RandomNASTuner +trial: + command: python3 train.py + codeDir: . + gpuNum: 0 diff --git a/examples/nas/classic_nas-tf/train.py b/examples/nas/classic_nas-tf/train.py new file mode 100644 index 0000000000..04bfd30a8e --- /dev/null +++ b/examples/nas/classic_nas-tf/train.py @@ -0,0 +1,130 @@ +import argparse +import tensorflow as tf +from tensorflow.keras import Model +from tensorflow.keras.layers import (AveragePooling2D, BatchNormalization, Conv2D, Dense, MaxPool2D) +from tensorflow.keras.losses import Reduction, SparseCategoricalCrossentropy +from tensorflow.keras.optimizers import SGD + +import nni +from nni.nas.tensorflow.mutables import LayerChoice, InputChoice +from nni.nas.tensorflow.classic_nas import get_and_apply_next_architecture + +tf.get_logger().setLevel('ERROR') + +class Net(Model): + def __init__(self): + super().__init__() + self.conv1 = LayerChoice([ + Conv2D(6, 3, padding='same', activation='relu'), + Conv2D(6, 5, padding='same', activation='relu'), + ]) + self.pool = MaxPool2D(2) + self.conv2 = LayerChoice([ + Conv2D(16, 3, padding='same', activation='relu'), + Conv2D(16, 5, padding='same', activation='relu'), + ]) + self.conv3 = Conv2D(16, 1) + + self.skipconnect = InputChoice(n_candidates=2, n_chosen=1) + self.bn = BatchNormalization() + + self.gap = AveragePooling2D(2) + self.fc1 = Dense(120, activation='relu') + self.fc2 = Dense(84, activation='relu') + self.fc3 = Dense(10) + + def call(self, x): + bs = x.shape[0] + + t = self.conv1(x) + x = self.pool(t) + x0 = self.conv2(x) + x1 = self.conv3(x0) + + x0 = self.skipconnect([x0, None]) + if x0 is not None: + x1 += x0 + x = self.pool(self.bn(x1)) + + x = self.gap(x) + x = tf.reshape(x, [bs, -1]) + x = self.fc1(x) + x = self.fc2(x) + x = self.fc3(x) + return x + +loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) + +def loss(model, x, y, training): + # training=training is needed only if there are layers with different + # behavior during training versus inference (e.g. Dropout). + y_ = model(x, training=training) + + return loss_object(y_true=y, y_pred=y_) + +def grad(model, inputs, targets): + with tf.GradientTape() as tape: + loss_value = loss(model, inputs, targets, training=True) + return loss_value, tape.gradient(loss_value, model.trainable_variables) + +def train(net, train_dataset, optimizer, num_epochs): + train_loss_results = [] + train_accuracy_results = [] + + for epoch in range(num_epochs): + epoch_loss_avg = tf.keras.metrics.Mean() + epoch_accuracy = tf.keras.metrics.SparseCategoricalAccuracy() + + for x, y in train_dataset: + loss_value, grads = grad(net, x, y) + optimizer.apply_gradients(zip(grads, net.trainable_variables)) + epoch_loss_avg.update_state(loss_value) + epoch_accuracy.update_state(y, net(x, training=True)) + + train_loss_results.append(epoch_loss_avg.result()) + train_accuracy_results.append(epoch_accuracy.result()) + + if epoch % 1 == 0: + print("Epoch {:03d}: Loss: {:.3f}, Accuracy: {:.3%}".format(epoch, + epoch_loss_avg.result(), + epoch_accuracy.result())) + +def test(model, test_dataset): + test_accuracy = tf.keras.metrics.Accuracy() + + for (x, y) in test_dataset: + # training=False is needed only if there are layers with different + # behavior during training versus inference (e.g. Dropout). + logits = model(x, training=False) + prediction = tf.argmax(logits, axis=1, output_type=tf.int32) + test_accuracy(prediction, y) + + print("Test set accuracy: {:.3%}".format(test_accuracy.result())) + return test_accuracy.result() + +if __name__ == '__main__': + # Training settings + parser = argparse.ArgumentParser(description='PyTorch MNIST Example') + parser.add_argument('--epochs', type=int, default=10, metavar='N', + help='number of epochs to train (default: 10)') + args, _ = parser.parse_known_args() + + cifar10 = tf.keras.datasets.cifar10 + (x_train, y_train), (x_test, y_test) = cifar10.load_data() + x_train, x_test = x_train / 255.0, x_test / 255.0 + split = int(len(x_train) * 0.9) + dataset_train = tf.data.Dataset.from_tensor_slices((x_train[:split], y_train[:split])).batch(64) + dataset_valid = tf.data.Dataset.from_tensor_slices((x_train[split:], y_train[split:])).batch(64) + dataset_test = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(64) + + net = Net() + + get_and_apply_next_architecture(net) + + optimizer = tf.keras.optimizers.SGD(learning_rate=0.01) + + train(net, dataset_train, optimizer, args.epochs) + + acc = test(net, dataset_test) + + nni.report_final_result(acc.numpy()) diff --git a/examples/nas/classic_nas/config_nas.yml b/examples/nas/classic_nas/config_random_search.yml similarity index 100% rename from examples/nas/classic_nas/config_nas.yml rename to examples/nas/classic_nas/config_random_search.yml diff --git a/src/sdk/pynni/nni/nas/tensorflow/classic_nas/__init__.py b/src/sdk/pynni/nni/nas/tensorflow/classic_nas/__init__.py new file mode 100644 index 0000000000..ec3f5a4894 --- /dev/null +++ b/src/sdk/pynni/nni/nas/tensorflow/classic_nas/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +from .mutator import get_and_apply_next_architecture diff --git a/src/sdk/pynni/nni/nas/tensorflow/classic_nas/mutator.py b/src/sdk/pynni/nni/nas/tensorflow/classic_nas/mutator.py new file mode 100644 index 0000000000..4b9212f342 --- /dev/null +++ b/src/sdk/pynni/nni/nas/tensorflow/classic_nas/mutator.py @@ -0,0 +1,215 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +import json +import logging +import os +import sys + +import tensorflow as tf + +import nni +from nni.env_vars import trial_env_vars +from nni.nas.tensorflow.mutables import LayerChoice, InputChoice, MutableScope +from nni.nas.tensorflow.mutator import Mutator + +logger = logging.getLogger(__name__) + +NNI_GEN_SEARCH_SPACE = "NNI_GEN_SEARCH_SPACE" +LAYER_CHOICE = "layer_choice" +INPUT_CHOICE = "input_choice" + + +def get_and_apply_next_architecture(model): + """ + Wrapper of :class:`~nni.nas.tensorflow.classic_nas.mutator.ClassicMutator` to make it more meaningful, + similar to ``get_next_parameter`` for HPO. + Tt will generate search space based on ``model``. + If env ``NNI_GEN_SEARCH_SPACE`` exists, this is in dry run mode for + generating search space for the experiment. + If not, there are still two mode, one is nni experiment mode where users + use ``nnictl`` to start an experiment. The other is standalone mode + where users directly run the trial command, this mode chooses the first + one(s) for each LayerChoice and InputChoice. + Parameters + ---------- + model : nn.Module + User's model with search space (e.g., LayerChoice, InputChoice) embedded in it. + """ + ClassicMutator(model) + + +class ClassicMutator(Mutator): + """ + This mutator is to apply the architecture chosen from tuner. + It implements the forward function of LayerChoice and InputChoice, + to only activate the chosen ones. + Parameters + ---------- + model : nn.Module + User's model with search space (e.g., LayerChoice, InputChoice) embedded in it. + """ + + def __init__(self, model): + super(ClassicMutator, self).__init__(model) + self._chosen_arch = {} + self._search_space = self._generate_search_space() + if NNI_GEN_SEARCH_SPACE in os.environ: + # dry run for only generating search space + self._dump_search_space(os.environ[NNI_GEN_SEARCH_SPACE]) + sys.exit(0) + + if trial_env_vars.NNI_PLATFORM is None: + logger.warning("This is in standalone mode, the chosen are the first one(s).") + self._chosen_arch = self._standalone_generate_chosen() + else: + # get chosen arch from tuner + self._chosen_arch = nni.get_next_parameter() + if self._chosen_arch is None: + if trial_env_vars.NNI_PLATFORM == "unittest": + # happens if NNI_PLATFORM is intentionally set, e.g., in UT + logger.warning("`NNI_PLATFORM` is set but `param` is None. Falling back to standalone mode.") + self._chosen_arch = self._standalone_generate_chosen() + else: + raise RuntimeError("Chosen architecture is None. This may be a platform error.") + self.reset() + + def _sample_layer_choice(self, mutable, idx, value, search_space_item): + """ + Convert layer choice to tensor representation. + Parameters + ---------- + mutable : Mutable + idx : int + Number `idx` of list will be selected. + value : str + The verbose representation of the selected value. + search_space_item : list + The list for corresponding search space. + """ + # doesn't support multihot for layer choice yet + assert 0 <= idx < len(mutable) and search_space_item[idx] == value, \ + "Index '{}' in search space '{}' is not '{}'".format(idx, search_space_item, value) + mask = tf.one_hot(idx, len(mutable)) + return tf.cast(tf.reshape(mask, [-1]), tf.bool) + + def _sample_input_choice(self, mutable, idx, value, search_space_item): + """ + Convert input choice to tensor representation. + Parameters + ---------- + mutable : Mutable + idx : int + Number `idx` of list will be selected. + value : str + The verbose representation of the selected value. + search_space_item : list + The list for corresponding search space. + """ + candidate_repr = search_space_item["candidates"] + multihot_list = [False] * mutable.n_candidates + for i, v in zip(idx, value): + assert 0 <= i < mutable.n_candidates and candidate_repr[i] == v, \ + "Index '{}' in search space '{}' is not '{}'".format(i, candidate_repr, v) + assert not multihot_list[i], "'{}' is selected twice in '{}', which is not allowed.".format(i, idx) + multihot_list[i] = True + return tf.cast(multihot_list, tf.bool) # pylint: disable=not-callable + + def sample_search(self): + """ + See :meth:`sample_final`. + """ + return self.sample_final() + + def sample_final(self): + """ + Convert the chosen arch and apply it on model. + """ + assert set(self._chosen_arch.keys()) == set(self._search_space.keys()), \ + "Unmatched keys, expected keys '{}' from search space, found '{}'.".format(self._search_space.keys(), + self._chosen_arch.keys()) + result = dict() + for mutable in self.mutables: + if isinstance(mutable, (LayerChoice, InputChoice)): + assert mutable.key in self._chosen_arch, \ + "Expected '{}' in chosen arch, but not found.".format(mutable.key) + data = self._chosen_arch[mutable.key] + assert isinstance(data, dict) and "_value" in data and "_idx" in data, \ + "'{}' is not a valid choice.".format(data) + if isinstance(mutable, LayerChoice): + result[mutable.key] = self._sample_layer_choice(mutable, data["_idx"], data["_value"], + self._search_space[mutable.key]["_value"]) + elif isinstance(mutable, InputChoice): + result[mutable.key] = self._sample_input_choice(mutable, data["_idx"], data["_value"], + self._search_space[mutable.key]["_value"]) + elif isinstance(mutable, MutableScope): + logger.info("Mutable scope '%s' is skipped during parsing choices.", mutable.key) + else: + raise TypeError("Unsupported mutable type: '%s'." % type(mutable)) + return result + + def _standalone_generate_chosen(self): + """ + Generate the chosen architecture for standalone mode, + i.e., choose the first one(s) for LayerChoice and InputChoice. + :: + { key_name: {"_value": "conv1", + "_idx": 0} } + { key_name: {"_value": ["in1"], + "_idx": [0]} } + Returns + ------- + dict + the chosen architecture + """ + chosen_arch = {} + for key, val in self._search_space.items(): + if val["_type"] == LAYER_CHOICE: + choices = val["_value"] + chosen_arch[key] = {"_value": choices[0], "_idx": 0} + elif val["_type"] == INPUT_CHOICE: + choices = val["_value"]["candidates"] + n_chosen = val["_value"]["n_chosen"] + if n_chosen is None: + n_chosen = len(choices) + chosen_arch[key] = {"_value": choices[:n_chosen], "_idx": list(range(n_chosen))} + else: + raise ValueError("Unknown key '%s' and value '%s'." % (key, val)) + return chosen_arch + + def _generate_search_space(self): + """ + Generate search space from mutables. + Here is the search space format: + :: + { key_name: {"_type": "layer_choice", + "_value": ["conv1", "conv2"]} } + { key_name: {"_type": "input_choice", + "_value": {"candidates": ["in1", "in2"], + "n_chosen": 1}} } + Returns + ------- + dict + the generated search space + """ + search_space = {} + for mutable in self.mutables: + # for now we only generate flattened search space + if isinstance(mutable, LayerChoice): + key = mutable.key + val = mutable.names + search_space[key] = {"_type": LAYER_CHOICE, "_value": val} + elif isinstance(mutable, InputChoice): + key = mutable.key + search_space[key] = {"_type": INPUT_CHOICE, + "_value": {"candidates": mutable.choose_from, + "n_chosen": mutable.n_chosen}} + elif isinstance(mutable, MutableScope): + logger.info("Mutable scope '%s' is skipped during generating search space.", mutable.key) + else: + raise TypeError("Unsupported mutable type: '%s'." % type(mutable)) + return search_space + + def _dump_search_space(self, file_path): + with open(file_path, "w") as ss_file: + json.dump(self._search_space, ss_file, sort_keys=True, indent=2) diff --git a/src/sdk/pynni/nni/nas/tensorflow/mutables.py b/src/sdk/pynni/nni/nas/tensorflow/mutables.py index 1665112732..b83b6f6325 100644 --- a/src/sdk/pynni/nni/nas/tensorflow/mutables.py +++ b/src/sdk/pynni/nni/nas/tensorflow/mutables.py @@ -2,6 +2,7 @@ # Licensed under the MIT license. import logging +from collections import OrderedDict from tensorflow.keras import Model @@ -77,6 +78,18 @@ def __call__(self, *args, **kwargs): class LayerChoice(Mutable): def __init__(self, op_candidates, reduction='sum', return_mask=False, key=None): super().__init__(key=key) + self.names = [] + if isinstance(op_candidates, OrderedDict): + for name, _ in op_candidates.items(): + assert name not in ["length", "reduction", "return_mask", "_key", "key", "names"], \ + "Please don't use a reserved name '{}' for your module.".format(name) + self.names.append(name) + elif isinstance(op_candidates, list): + for i, _ in enumerate(op_candidates): + self.names.append(str(i)) + else: + raise TypeError("Unsupported op_candidates type: {}".format(type(op_candidates))) + self.length = len(op_candidates) self.choices = op_candidates self.reduction = reduction diff --git a/src/sdk/pynni/nni/ppo_tuner/distri.py b/src/sdk/pynni/nni/ppo_tuner/distri.py index 9af2e1add9..8a2a5ed20c 100644 --- a/src/sdk/pynni/nni/ppo_tuner/distri.py +++ b/src/sdk/pynni/nni/ppo_tuner/distri.py @@ -5,7 +5,8 @@ functions for sampling from hidden state """ -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() from .util import fc diff --git a/src/sdk/pynni/nni/ppo_tuner/model.py b/src/sdk/pynni/nni/ppo_tuner/model.py index db6c6bd571..c6a8479c6d 100644 --- a/src/sdk/pynni/nni/ppo_tuner/model.py +++ b/src/sdk/pynni/nni/ppo_tuner/model.py @@ -5,7 +5,8 @@ the main model of policy/value network """ -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() from .util import initialize, get_session diff --git a/src/sdk/pynni/nni/ppo_tuner/policy.py b/src/sdk/pynni/nni/ppo_tuner/policy.py index fd9031cad0..a35e514eae 100644 --- a/src/sdk/pynni/nni/ppo_tuner/policy.py +++ b/src/sdk/pynni/nni/ppo_tuner/policy.py @@ -5,7 +5,8 @@ build policy/value network from model """ -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() from .distri import CategoricalPdType from .util import lstm_model, fc, observation_placeholder, adjust_shape diff --git a/src/sdk/pynni/nni/ppo_tuner/util.py b/src/sdk/pynni/nni/ppo_tuner/util.py index 00d6fff85a..605292de40 100644 --- a/src/sdk/pynni/nni/ppo_tuner/util.py +++ b/src/sdk/pynni/nni/ppo_tuner/util.py @@ -9,7 +9,8 @@ import random import multiprocessing import numpy as np -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() from gym.spaces import Discrete, Box, MultiDiscrete def set_global_seeds(i): diff --git a/test/config/examples/classic-nas-pytorch.yml b/test/config/examples/classic-nas-pytorch.yml new file mode 100644 index 0000000000..7643b48acf --- /dev/null +++ b/test/config/examples/classic-nas-pytorch.yml @@ -0,0 +1,21 @@ +authorName: nni +experimentName: default_test +maxExecDuration: 10m +maxTrialNum: 1 +trialConcurrency: 1 +searchSpacePath: nni-nas-search-space.json + +tuner: + builtinTunerName: PPOTuner + classArgs: + optimize_mode: maximize +trial: + command: python3 mnist.py --epochs 1 + codeDir: ../../../examples/nas/classic_nas + gpuNum: 0 + +useAnnotation: false +multiPhase: false +multiThread: false + +trainingServicePlatform: local \ No newline at end of file diff --git a/test/config/examples/classic-nas-tf2.yml b/test/config/examples/classic-nas-tf2.yml new file mode 100644 index 0000000000..708d655857 --- /dev/null +++ b/test/config/examples/classic-nas-tf2.yml @@ -0,0 +1,21 @@ +authorName: nni +experimentName: default_test +maxExecDuration: 10m +maxTrialNum: 1 +trialConcurrency: 1 +searchSpacePath: nni-nas-search-space-tf2.json + +tuner: + builtinTunerName: PPOTuner + classArgs: + optimize_mode: maximize +trial: + command: python3 train.py --epochs 1 + codeDir: ../../../examples/nas/classic_nas-tf + gpuNum: 0 + +useAnnotation: false +multiPhase: false +multiThread: false + +trainingServicePlatform: local \ No newline at end of file diff --git a/test/config/integration_tests.yml b/test/config/integration_tests.yml index c6c5b44fa3..1f5ccc35d8 100644 --- a/test/config/integration_tests.yml +++ b/test/config/integration_tests.yml @@ -72,6 +72,23 @@ testCases: - name: nested-ss configFile: test/config/examples/mnist-nested-search-space.yml +- name: classic-nas-gen-ss + configFile: test/config/examples/classic-nas-pytorch.yml + launchCommand: nnictl ss_gen --trial_command="python3 mnist.py --epochs 1" --trial_dir=../examples/nas/classic_nas --file=config/examples/nni-nas-search-space.json + stopCommand: + experimentStatusCheck: False + +- name: classic-nas-install-ppo + configFile: test/config/examples/classic-nas-pytorch.yml + launchCommand: nnictl package install --name=PPOTuner + stopCommand: + experimentStatusCheck: False + +- name: classic-nas-pytorch + configFile: test/config/examples/classic-nas-pytorch.yml + # remove search space file + stopCommand: nnictl stop + onExitCommand: python3 -c 'import os; os.remove("config/examples/nni-nas-search-space.json")' ######################################################################### # nni features test diff --git a/test/config/integration_tests_tf2.yml b/test/config/integration_tests_tf2.yml index 1c3e375990..797c9c114c 100644 --- a/test/config/integration_tests_tf2.yml +++ b/test/config/integration_tests_tf2.yml @@ -51,6 +51,24 @@ testCases: command: python3 main.py --epochs 1 --batches 1 gpuNum: 0 +- name: classic-nas-gen-ss + configFile: test/config/examples/classic-nas-tf2.yml + launchCommand: nnictl ss_gen --trial_command="python3 train.py --epochs 1" --trial_dir=../examples/nas/classic_nas-tf --file=config/examples/nni-nas-search-space-tf2.json + stopCommand: + experimentStatusCheck: False + +- name: classic-nas-install-ppo + configFile: test/config/examples/classic-nas-tf2.yml + launchCommand: nnictl package install --name=PPOTuner + stopCommand: + experimentStatusCheck: False + +- name: classic-nas-tensorflow2 + configFile: test/config/examples/classic-nas-tf2.yml + # remove search space file + stopCommand: nnictl stop + onExitCommand: python3 -c 'import os; os.remove("config/examples/nni-nas-search-space-tf2.json")' + ######################################################################### # nni features test ######################################################################### diff --git a/test/nni_test/nnitest/run_tests.py b/test/nni_test/nnitest/run_tests.py index d817eb4465..542a4a932d 100644 --- a/test/nni_test/nnitest/run_tests.py +++ b/test/nni_test/nnitest/run_tests.py @@ -76,6 +76,10 @@ def run_test_case(test_case_config, it_config, args): print('Stop command:', stop_command, flush=True) if stop_command: subprocess.run(shlex.split(stop_command)) + exit_command = get_command(test_case_config, 'onExitCommand') + print('Exit command:', exit_command, flush=True) + if exit_command: + subprocess.run(shlex.split(exit_command), check=True) # remove tmp config file if os.path.exists(new_config_file): os.remove(new_config_file) diff --git a/test/scripts/nas.sh b/test/scripts/nas.sh index ac23b57235..12a07012a7 100644 --- a/test/scripts/nas.sh +++ b/test/scripts/nas.sh @@ -6,7 +6,7 @@ echo "" echo "===========================Testing: NAS===========================" EXAMPLE_DIR=${CWD}/../examples/nas -echo "testing classic nas..." +echo "testing nnictl ss_gen (classic nas)..." cd $EXAMPLE_DIR/classic_nas SEARCH_SPACE_JSON=nni_auto_gen_search_space.json if [ -f $SEARCH_SPACE_JSON ]; then From 258b4011731d462e8c6847461c306287709faa1f Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Sun, 28 Jun 2020 16:31:30 +0800 Subject: [PATCH 16/17] Fix tensorflow.examples.tutorial import (#2602) --- test/pipelines/pipelines-it-local.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/pipelines/pipelines-it-local.yml b/test/pipelines/pipelines-it-local.yml index 00fb8e11e5..c66a9b7186 100644 --- a/test/pipelines/pipelines-it-local.yml +++ b/test/pipelines/pipelines-it-local.yml @@ -14,7 +14,7 @@ jobs: python3 -m pip install torchvision==0.4.1 --user python3 -m pip install torch==1.3.1 --user python3 -m pip install keras==2.1.6 --user - python3 -m pip install tensorflow-gpu==1.15 --user + python3 -m pip install tensorflow-gpu==1.15.2 --user sudo apt-get install swig -y PATH=$HOME/.local/bin:$PATH nnictl package install --name=SMAC PATH=$HOME/.local/bin:$PATH nnictl package install --name=BOHB From 885a25804672b6af6eb5a6f56805edcb376658f6 Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Sun, 28 Jun 2020 23:20:18 +0800 Subject: [PATCH 17/17] Fix local pipeline (#2604) --- src/sdk/pynni/nni/ppo_tuner/requirements.txt | 1 - test/pipelines/pipelines-it-local-tf2.yml | 2 +- test/pipelines/pipelines-it-local-windows.yml | 2 +- test/pipelines/pipelines-it-local.yml | 2 +- 4 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/sdk/pynni/nni/ppo_tuner/requirements.txt b/src/sdk/pynni/nni/ppo_tuner/requirements.txt index 138951469b..a58dfb12f2 100644 --- a/src/sdk/pynni/nni/ppo_tuner/requirements.txt +++ b/src/sdk/pynni/nni/ppo_tuner/requirements.txt @@ -1,3 +1,2 @@ enum34 gym -tensorflow \ No newline at end of file diff --git a/test/pipelines/pipelines-it-local-tf2.yml b/test/pipelines/pipelines-it-local-tf2.yml index 95421a8ce4..b7eb5a6c7c 100644 --- a/test/pipelines/pipelines-it-local-tf2.yml +++ b/test/pipelines/pipelines-it-local-tf2.yml @@ -12,7 +12,7 @@ jobs: set -e python3 -m pip install scikit-learn==0.20.0 --user python3 -m pip install torch==1.3.1 torchvision==0.4.1 -f https://download.pytorch.org/whl/torch_stable.html --user - python3 -m pip install tensorflow-gpu==2.2.0 --user + python3 -m pip install tensorflow-gpu==2.2.0 tensorflow-estimator==2.2.0 --force --user python3 -m pip install keras==2.4.2 --user sudo apt-get install swig -y PATH=$HOME/.local/bin:$PATH nnictl package install --name=SMAC diff --git a/test/pipelines/pipelines-it-local-windows.yml b/test/pipelines/pipelines-it-local-windows.yml index cce7616353..bb54e9421a 100644 --- a/test/pipelines/pipelines-it-local-windows.yml +++ b/test/pipelines/pipelines-it-local-windows.yml @@ -10,7 +10,7 @@ jobs: python -m pip install scikit-learn==0.20.0 --user python -m pip install keras==2.1.6 --user python -m pip install torchvision===0.4.1 torch===1.3.1 -f https://download.pytorch.org/whl/torch_stable.html --user - python -m pip install tensorflow-gpu==1.15.2 --user + python -m pip install tensorflow-gpu==1.15.2 tensorflow-estimator==1.15.1 --force --user displayName: 'Install dependencies for integration tests' - script: | cd test diff --git a/test/pipelines/pipelines-it-local.yml b/test/pipelines/pipelines-it-local.yml index c66a9b7186..30d86ddb16 100644 --- a/test/pipelines/pipelines-it-local.yml +++ b/test/pipelines/pipelines-it-local.yml @@ -14,7 +14,7 @@ jobs: python3 -m pip install torchvision==0.4.1 --user python3 -m pip install torch==1.3.1 --user python3 -m pip install keras==2.1.6 --user - python3 -m pip install tensorflow-gpu==1.15.2 --user + python3 -m pip install tensorflow-gpu==1.15.2 tensorflow-estimator==1.15.1 --force --user sudo apt-get install swig -y PATH=$HOME/.local/bin:$PATH nnictl package install --name=SMAC PATH=$HOME/.local/bin:$PATH nnictl package install --name=BOHB