diff --git a/README.md b/README.md
index e9c56dea09..afc778fddc 100644
--- a/README.md
+++ b/README.md
@@ -27,8 +27,10 @@ The tool dispatches and runs trial jobs generated by tuning algorithms to search
+
+
- Supported Frameworks
+ Frameworks & Libraries
@@ -42,26 +44,52 @@ The tool dispatches and runs trial jobs generated by tuning algorithms to search
+
+
## **Who should consider using NNI**
* Those who want to try different AutoML algorithms in their training code (model) at their local machine.
@@ -235,60 +297,69 @@ Maybe you want to read:
* [NNI overview](docs/en_US/Overview.md)
* [Quick start](docs/en_US/Tutorial/QuickStart.md)
-* [Contributing](docs/en_US/Tutorial/Contributing.md)
-* [Examples](docs/en_US/examples.rst)
-* [References](docs/en_US/reference.rst)
* [WebUI tutorial](docs/en_US/Tutorial/WebUI.md)
+* [Contributing](docs/en_US/Tutorial/Contributing.md)
+
## **How to**
* [Install NNI](docs/en_US/Tutorial/Installation.md)
* [Use command line tool nnictl](docs/en_US/Tutorial/Nnictl.md)
-* [Use NNIBoard](docs/en_US/Tutorial/WebUI.md)
-* [How to define search space](docs/en_US/Tutorial/SearchSpaceSpec.md)
-* [How to define a trial](docs/en_US/TrialExample/Trials.md)
-* [How to choose tuner/search-algorithm](docs/en_US/Tuner/BuiltinTuner.md)
+* [Define a trial](docs/en_US/TrialExample/Trials.md)
* [Config an experiment](docs/en_US/Tutorial/ExperimentConfig.md)
-* [How to use annotation](docs/en_US/TrialExample/Trials.md#nni-python-annotation)
+* [Define search space](docs/en_US/Tutorial/SearchSpaceSpec.md)
+* [choose tuner/search-algorithm](docs/en_US/Tuner/BuiltinTuner.md)
+* [Use annotation](docs/en_US/TrialExample/Trials.md#nni-python-annotation)
+* [Use NNIBoard](docs/en_US/Tutorial/WebUI.md)
+
## **Tutorials**
+* [Run an experiment on local (with multiple GPUs)](docs/en_US/TrainingService/LocalMode.md)
* [Run an experiment on OpenPAI](docs/en_US/TrainingService/PaiMode.md)
* [Run an experiment on Kubeflow](docs/en_US/TrainingService/KubeflowMode.md)
-* [Run an experiment on local (with multiple GPUs)](docs/en_US/TrainingService/LocalMode.md)
* [Run an experiment on multiple machines](docs/en_US/TrainingService/RemoteMachineMode.md)
* [Try different tuners](docs/en_US/Tuner/BuiltinTuner.md)
* [Try different assessors](docs/en_US/Assessor/BuiltinAssessor.md)
* [Implement a customized tuner](docs/en_US/Tuner/CustomizeTuner.md)
* [Implement a customized assessor](docs/en_US/Assessor/CustomizeAssessor.md)
+* [Implement TrainingService in NNI](docs/en_US/TrainingService/HowToImplementTrainingService.md)
* [Use Genetic Algorithm to find good model architectures for Reading Comprehension task](docs/en_US/TrialExample/SquadEvolutionExamples.md)
+* [Advanced Neural Architecture Search](docs/en_US/AdvancedFeature/AdvancedNas.md)
+
## **Contribute**
This project welcomes contributions and there are many ways in which you can participate in the project, for example:
-* Review [source code changes](https://github.com/microsoft/nni/pulls)
-* Review the [documentation](https://github.com/microsoft/nni/tree/master/docs) and make pull requests for anything from typos to new content
+* Open [bug reports](https://github.com/microsoft/nni/issues/new/choose).
+* Request a [new feature](https://github.com/microsoft/nni/issues/new/choose).
+* Suggest or ask some questions on the [How to Debug](docs/en_US/Tutorial/HowToDebug.md) guidance document.
* Find the issues tagged with ['good first issue'](https://github.com/Microsoft/nni/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22) or ['help-wanted'](https://github.com/microsoft/nni/issues?q=is%3Aopen+is%3Aissue+label%3A%22help+wanted%22), these are simple and easy to start , we recommend new contributors to start with.
-Before providing your hacks, there are a few simple guidelines that you need to follow:
+
+Before providing your hacks, you can review the [Contributing Instruction](docs/en_US/Tutorial/Contributing.md) to get more information. In addition, we also provide you with the following documents:
+* [NNI developer environment installation tutorial](docs/en_US/Tutorial/SetupNniDeveloperEnvironment.md)
* [How to debug](docs/en_US/Tutorial/HowToDebug.md)
-* [Code Styles & Naming Conventions](docs/en_US/Tutorial/Contributing.md)
-* How to Set up [NNI developer environment](docs/en_US/Tutorial/SetupNniDeveloperEnvironment.md)
-* Review the [Contributing Instruction](docs/en_US/Tutorial/Contributing.md) and get familiar with the NNI Code Contribution Guideline
+* [Customize Your Own Advisor](docs/en_US/Tuner/CustomizeAdvisor.md)
+* [Customize Your Own Tuner](docs/en_US/Tuner/CustomizeTuner.md)
+* [Implement customized TrainingService](docs/en_US/TrainingService/HowToImplementTrainingService.md)
+
## **External Repositories**
Now we have some external usage examples run in NNI from our contributors. Thanks our lovely contributors. And welcome more and more people to join us!
* Run [ENAS](examples/tuners/enas_nni/README.md) in NNI
* Run [Neural Network Architecture Search](examples/trials/nas_cifar10/README.md) in NNI
+* [Automatic Feature Engineering](examples/trials/auto-feature-engineering/README.md) in NNI
## **Feedback**
-* Open [bug reports](https://github.com/microsoft/nni/issues/new/choose).
-* Request a [new feature](https://github.com/microsoft/nni/issues/new/choose).
* Discuss on the NNI [Gitter](https://gitter.im/Microsoft/nni?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) in NNI
-* Ask a question with NNI tags on [Stack Overflow](https://stackoverflow.com/questions/tagged/nni?sort=Newest&edited=true)or [file an issue](https://github.com/microsoft/nni/issues/new/choose)on GitHub.
-* We are in construction of the instruction for [How to Debug](docs/en_US/Tutorial/HowToDebug.md), you are also welcome to contribute questions or suggestions on this area.
+* Ask a question with NNI tags on [Stack Overflow](https://stackoverflow.com/questions/tagged/nni?sort=Newest&edited=true)
+* [File an issue](https://github.com/microsoft/nni/issues/new/choose) on GitHub.
+
+
## **License**
The entire codebase is under [MIT license](LICENSE)
+
diff --git a/README_zh_CN.md b/README_zh_CN.md
index cdd5bf7cff..75f1181a37 100644
--- a/README_zh_CN.md
+++ b/README_zh_CN.md
@@ -4,7 +4,7 @@
* * *
-[![MIT 许可证](https://img.shields.io/badge/license-MIT-brightgreen.svg)](LICENSE) [![生成状态](https://msrasrg.visualstudio.com/NNIOpenSource/_apis/build/status/Microsoft.nni)](https://msrasrg.visualstudio.com/NNIOpenSource/_build/latest?definitionId=6) [![问题](https://img.shields.io/github/issues-raw/Microsoft/nni.svg)](https://github.com/Microsoft/nni/issues?q=is%3Aissue+is%3Aopen) [![Bug](https://img.shields.io/github/issues/Microsoft/nni/bug.svg)](https://github.com/Microsoft/nni/issues?q=is%3Aissue+is%3Aopen+label%3Abug) [![拉取请求](https://img.shields.io/github/issues-pr-raw/Microsoft/nni.svg)](https://github.com/Microsoft/nni/pulls?q=is%3Apr+is%3Aopen) [![版本](https://img.shields.io/github/release/Microsoft/nni.svg)](https://github.com/Microsoft/nni/releases) [![进入 https://gitter.im/Microsoft/nni 聊天室提问](https://badges.gitter.im/Microsoft/nni.svg)](https://gitter.im/Microsoft/nni?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) [![文档状态](https://readthedocs.org/projects/nni/badge/?version=latest)](https://nni.readthedocs.io/en/latest/?badge=latest)
+[![MIT 许可证](https://img.shields.io/badge/license-MIT-brightgreen.svg)](LICENSE) [![生成状态](https://msrasrg.visualstudio.com/NNIOpenSource/_apis/build/status/Microsoft.nni)](https://msrasrg.visualstudio.com/NNIOpenSource/_build/latest?definitionId=6) [![问题](https://img.shields.io/github/issues-raw/Microsoft/nni.svg)](https://github.com/Microsoft/nni/issues?q=is%3Aissue+is%3Aopen) [![Bug](https://img.shields.io/github/issues/Microsoft/nni/bug.svg)](https://github.com/Microsoft/nni/issues?q=is%3Aissue+is%3Aopen+label%3Abug) [![拉取请求](https://img.shields.io/github/issues-pr-raw/Microsoft/nni.svg)](https://github.com/Microsoft/nni/pulls?q=is%3Apr+is%3Aopen) [![版本](https://img.shields.io/github/release/Microsoft/nni.svg)](https://github.com/Microsoft/nni/releases) [![进入 https://gitter.im/Microsoft/nni 聊天室提问](https://badges.gitter.im/Microsoft/nni.svg)](https://gitter.im/Microsoft/nni?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) [![文档状态](https://readthedocs.org/projects/nni/badge/?version=latest)](https://nni.readthedocs.io/zh/latest/?badge=latest)
[English](README.md)
@@ -47,40 +47,40 @@ NNI (Neural Network Intelligence) 是自动机器学习(AutoML)的工具包
@@ -226,34 +226,34 @@ You can use these commands to get more information about the experiment
点击阅读:
* [NNI 概述](docs/zh_CN/Overview.md)
-* [快速入门](docs/en_US/Tutorial/QuickStart.md)
-* [贡献](docs/en_US/Tutorial/Contributing.md)
-* [示例](docs/en_US/examples.rst)
-* [参考](docs/en_US/reference.rst)
-* [Web 界面教程](docs/en_US/Tutorial/WebUI.md)
+* [快速入门](docs/zh_CN/Tutorial/QuickStart.md)
+* [贡献](docs/zh_CN/Tutorial/Contributing.md)
+* [示例](docs/zh_CN/examples.rst)
+* [参考](docs/zh_CN/reference.rst)
+* [Web 界面教程](docs/zh_CN/Tutorial/WebUI.md)
## **入门**
-* [安装 NNI](docs/en_US/Tutorial/Installation.md)
-* [使用命令行工具 nnictl](docs/en_US/Tutorial/Nnictl.md)
-* [使用 NNIBoard](docs/en_US/Tutorial/WebUI.md)
-* [如何定义搜索空间](docs/en_US/Tutorial/SearchSpaceSpec.md)
-* [如何实现 Trial 代码](docs/en_US/TrialExample/Trials.md)
-* [如何选择 Tuner、搜索算法](docs/en_US/Tuner/BuiltinTuner.md)
-* [配置 Experiment](docs/en_US/Tutorial/ExperimentConfig.md)
-* [如何使用 Annotation](docs/en_US/TrialExample/Trials.md#nni-python-annotation)
+* [安装 NNI](docs/zh_CN/Tutorial/Installation.md)
+* [使用命令行工具 nnictl](docs/zh_CN/Tutorial/Nnictl.md)
+* [使用 NNIBoard](docs/zh_CN/Tutorial/WebUI.md)
+* [如何定义搜索空间](docs/zh_CN/Tutorial/SearchSpaceSpec.md)
+* [如何实现 Trial 代码](docs/zh_CN/TrialExample/Trials.md)
+* [如何选择 Tuner、搜索算法](docs/zh_CN/Tuner/BuiltinTuner.md)
+* [配置 Experiment](docs/zh_CN/Tutorial/ExperimentConfig.md)
+* [如何使用 Annotation](docs/zh_CN/TrialExample/Trials.md#nni-python-annotation)
## **教程**
-* [在 OpenPAI 上运行 Experiment](docs/en_US/TrainingService/PaiMode.md)
-* [在 Kubeflow 上运行 Experiment](docs/en_US/TrainingService/KubeflowMode.md)
-* [在本机运行 Experiment (支持多 GPU 卡)](docs/en_US/TrainingService/LocalMode.md)
-* [在多机上运行 Experiment](docs/en_US/TrainingService/RemoteMachineMode.md)
-* [尝试不同的 Tuner](docs/en_US/Tuner/BuiltinTuner.md)
-* [尝试不同的 Assessor](docs/en_US/Assessor/BuiltinAssessor.md)
-* [实现自定义 Tuner](docs/en_US/Tuner/CustomizeTuner.md)
-* [实现自定义 Assessor](docs/en_US/Assessor/CustomizeAssessor.md)
-* [使用进化算法为阅读理解任务找到好模型](docs/en_US/TrialExample/SquadEvolutionExamples.md)
+* [在 OpenPAI 上运行 Experiment](docs/zh_CN/TrainingService/PaiMode.md)
+* [在 Kubeflow 上运行 Experiment](docs/zh_CN/TrainingService/KubeflowMode.md)
+* [在本机运行 Experiment (支持多 GPU 卡)](docs/zh_CN/TrainingService/LocalMode.md)
+* [在多机上运行 Experiment](docs/zh_CN/TrainingService/RemoteMachineMode.md)
+* [尝试不同的 Tuner](docs/zh_CN/Tuner/BuiltinTuner.md)
+* [尝试不同的 Assessor](docs/zh_CN/Assessor/BuiltinAssessor.md)
+* [实现自定义 Tuner](docs/zh_CN/Tuner/CustomizeTuner.md)
+* [实现自定义 Assessor](docs/zh_CN/Assessor/CustomizeAssessor.md)
+* [使用进化算法为阅读理解任务找到好模型](docs/zh_CN/TrialExample/SquadEvolutionExamples.md)
## **贡献**
@@ -265,10 +265,10 @@ You can use these commands to get more information about the experiment
在提交代码前,需要遵循以下的简单准则:
-* [如何调试](docs/en_US/Tutorial/HowToDebug.md)
-* [代码风格和命名约定](docs/en_US/Tutorial/Contributing.md)
+* [如何调试](docs/zh_CN/Tutorial/HowToDebug.md)
+* [代码风格和命名约定](docs/zh_CN/Tutorial/Contributing.md)
* 如何设置 [NNI 开发环境](docs/zh_CN/Tutorial/SetupNniDeveloperEnvironment.md)
-* 查看[贡献说明](docs/en_US/Tutorial/Contributing.md)并熟悉 NNI 的代码贡献指南
+* 查看[贡献说明](docs/zh_CN/Tutorial/Contributing.md)并熟悉 NNI 的代码贡献指南
## **外部代码库**
diff --git a/deployment/docker/Dockerfile b/deployment/docker/Dockerfile
index 4ea2325a10..e4c9d50f28 100644
--- a/deployment/docker/Dockerfile
+++ b/deployment/docker/Dockerfile
@@ -49,7 +49,7 @@ RUN DEBIAN_FRONTEND=noninteractive && \
#
# update pip
#
-RUN python3 -m pip install --upgrade pip
+RUN python3 -m pip install --upgrade pip setuptools==39.1.0
# numpy 1.14.3 scipy 1.1.0
RUN python3 -m pip --no-cache-dir install \
diff --git a/docs/en_US/AdvancedFeature/GeneralNasInterfaces.md b/docs/en_US/AdvancedFeature/GeneralNasInterfaces.md
index 1c53205d1f..81ff43535c 100644
--- a/docs/en_US/AdvancedFeature/GeneralNasInterfaces.md
+++ b/docs/en_US/AdvancedFeature/GeneralNasInterfaces.md
@@ -120,7 +120,7 @@ Here, `nni.training_update` is to do some update on the full graph. In enas_mode
**\*oneshot_mode\***: following the training approach in [this paper][6]. Different from enas_mode which trains the full graph by training large numbers of subgraphs, in oneshot_mode the full graph is built and dropout is added to candidate inputs and also added to candidate ops' outputs. Then this full graph is trained like other DL models. [Detailed Description](#OneshotMode). (currently only supported on tensorflow).
-To use oneshot_mode, you should add one more field in the `trial` config as shown below. In this mode, no need to specify tuner in the config file as it does not need tuner. (Note that you still need to specify a tuner (any tuner) in the config file for now.) Also, no need to add `nni.training_update` in this mode, because no special processing (or update) is needed during training.
+To use oneshot_mode, you should add one more field in the `trial` config as shown below. In this mode, though there is no need to use tuner, you still need to specify a tuner (any tuner) in the config file for now. Also, no need to add `nni.training_update` in this mode, because no special processing (or update) is needed during training.
```diff
trial:
command: your command to run the trial
@@ -132,7 +132,7 @@ trial:
**\*darts_mode\***: following the training approach in [this paper][3]. It is similar to oneshot_mode. There are two differences, one is that darts_mode only add architecture weights to the outputs of candidate ops, the other is that it trains model weights and architecture weights in an interleaved manner. [Detailed Description](#DartsMode).
-To use darts_mode, you should add one more field in the `trial` config as shown below. In this mode, also no need to specify tuner in the config file as it does not need tuner. (Note that you still need to specify a tuner (any tuner) in the config file for now.)
+To use darts_mode, you should add one more field in the `trial` config as shown below. In this mode, though there is no need to use tuner, you still need to specify a tuner (any tuner) in the config file for now.
```diff
trial:
command: your command to run the trial
@@ -156,9 +156,9 @@ for _ in range(num):
### enas_mode
-In enas_mode, the compiled trial code builds the full graph (rather than subgraph), it receives a chosen architecture and training this architecture on the full graph for a mini-batch, then request another chosen architecture. It is supported by [NNI multi-phase](./multiPhase.md).
+In enas_mode, the compiled trial code builds the full graph (rather than subgraph), it receives a chosen architecture and training this architecture on the full graph for a mini-batch, then request another chosen architecture. It is supported by [NNI multi-phase](./MultiPhase.md).
-Specifically, for trials using tensorflow, we create and use tensorflow variable as signals, and tensorflow conditional functions to control the search space (full-graph) to be more flexible, which means it can be changed into different sub-graphs (multiple times) depending on these signals. [Here]() is an example for enas_mode.
+Specifically, for trials using tensorflow, we create and use tensorflow variable as signals, and tensorflow conditional functions to control the search space (full-graph) to be more flexible, which means it can be changed into different sub-graphs (multiple times) depending on these signals. [Here](https://github.com/microsoft/nni/tree/master/examples/trials/mnist-nas/enas_mode) is an example for enas_mode.
@@ -168,7 +168,7 @@ Below is the figure to show where dropout is added to the full graph for one lay
![](../../img/oneshot_mode.png)
-As suggested in the [paper][6], a dropout method is implemented to the inputs for every layer. The dropout rate is set to r^(1/k), where 0 < r < 1 is a hyper-parameter of the model (default to be 0.01) and k is number of optional inputs for a specific layer. The higher the fan-in, the more likely each possible input is to be dropped out. However, the probability of dropping out all optional_inputs of a layer is kept constant regardless of its fan-in. Suppose r = 0.05. If a layer has k = 2 optional_inputs then each one will independently be dropped out with probability 0.051/2 ≈ 0.22 and will be retained with probability 0.78. If a layer has k = 7 optional_inputs then each one will independently be dropped out with probability 0.051/7 ≈ 0.65 and will be retained with probability 0.35. In both cases, the probability of dropping out all of the layer's optional_inputs is 5%. The outputs of candidate ops are dropped out through the same way. [Here]() is an example for oneshot_mode.
+As suggested in the [paper][6], a dropout method is implemented to the inputs for every layer. The dropout rate is set to r^(1/k), where 0 < r < 1 is a hyper-parameter of the model (default to be 0.01) and k is number of optional inputs for a specific layer. The higher the fan-in, the more likely each possible input is to be dropped out. However, the probability of dropping out all optional_inputs of a layer is kept constant regardless of its fan-in. Suppose r = 0.05. If a layer has k = 2 optional_inputs then each one will independently be dropped out with probability 0.051/2 ≈ 0.22 and will be retained with probability 0.78. If a layer has k = 7 optional_inputs then each one will independently be dropped out with probability 0.051/7 ≈ 0.65 and will be retained with probability 0.35. In both cases, the probability of dropping out all of the layer's optional_inputs is 5%. The outputs of candidate ops are dropped out through the same way. [Here](https://github.com/microsoft/nni/tree/master/examples/trials/mnist-nas/oneshot_mode) is an example for oneshot_mode.
@@ -178,7 +178,7 @@ Below is the figure to show where architecture weights are added to the full gra
![](../../img/darts_mode.png)
-In `nni.training_update`, tensorflow MomentumOptimizer is used to train the architecture weights based on the pass `loss` and `feed_dict`. [Here]() is an example for darts_mode.
+In `nni.training_update`, tensorflow MomentumOptimizer is used to train the architecture weights based on the pass `loss` and `feed_dict`. [Here](https://github.com/microsoft/nni/tree/master/examples/trials/mnist-nas/darts_mode) is an example for darts_mode.
### [__TODO__] Multiple trial jobs for One-Shot NAS
diff --git a/docs/en_US/CommunitySharings/HpoComparision.md b/docs/en_US/CommunitySharings/HpoComparision.md
index 61f384c437..c0a92f4798 100644
--- a/docs/en_US/CommunitySharings/HpoComparision.md
+++ b/docs/en_US/CommunitySharings/HpoComparision.md
@@ -1,7 +1,7 @@
-# Hyperparameter Optimization Comparison
+# Hyper Parameter Optimization Comparison
*Posted by Anonymous Author*
-Comparison of Hyperparameter Optimization algorithms on several problems.
+Comparison of Hyperparameter Optimization (HPO) algorithms on several problems.
Hyperparameter Optimization algorithms are list below:
diff --git a/docs/en_US/SupportedFramework_Library.md b/docs/en_US/SupportedFramework_Library.md
new file mode 100644
index 0000000000..10a124f603
--- /dev/null
+++ b/docs/en_US/SupportedFramework_Library.md
@@ -0,0 +1,48 @@
+# Framework and Library Supports
+With the built-in Python API, NNI naturally supports the hyper parameter tuning and neural network search for all the AI frameworks and libraries who support Python models(`version >= 3.5`). NNI had also provided a set of examples and tutorials for some of the popular scenarios to make jump start easier.
+
+## Supported AI Frameworks
+
+* [PyTorch] https://github.com/pytorch/pytorch
+
+* [MXNet] https://github.com/apache/incubator-mxnet
+* [Caffe2] https://github.com/BVLC/caffe
+* [CNTK (Python language)] https://github.com/microsoft/CNTK
+* [Spark MLlib] http://spark.apache.org/mllib/
+* [Chainer] https://chainer.org/
+* [Theano] https://pypi.org/project/Theano/
+
+You are encouraged to [contribute more examples](Tutorial/Contributing.md) for other NNI users.
+
+## Supported Library
+NNI also supports all libraries written in python.Here are some common libraries, including some algorithms based on GBDT: XGBoost, CatBoost and lightGBM.
+* [Scikit-learn] https://scikit-learn.org/stable/
+
+Here is just a small list of libraries that supported by NNI. If you are interested in NNI, you can refer to the [tutorial](TrialExample/Trials.md) to complete your own hacks.
+
+
+
+In addition to the above examples, we also welcome more and more users to apply NNI to your own work, if you have any doubts, please refer [Write a Trial Run on NNI](TrialExample/Trials.md). In particular, if you want to be a contributor of NNI, whether it is the sharing of examples , writing of Tuner or otherwise, we are all looking forward to your participation.More information please refer to [here](Tutorial/Contributing.md).
diff --git a/docs/en_US/TrainingService/PaiMode.md b/docs/en_US/TrainingService/PaiMode.md
index 4a3543236d..78e7aa7984 100644
--- a/docs/en_US/TrainingService/PaiMode.md
+++ b/docs/en_US/TrainingService/PaiMode.md
@@ -54,7 +54,7 @@ Compared with [LocalMode](LocalMode.md) and [RemoteMachineMode](RemoteMachineMod
* shmMB
* Optional key. Set the shmMB configuration of OpenPAI, it set the shared memory for one task in the task role.
* authFile
- * Optional key, Set the auth file path for private registry while using PAI mode, [Refer](https://github.com/microsoft/pai/blob/2ea69b45faa018662bc164ed7733f6fdbb4c42b3/docs/faq.md#q-how-to-use-private-docker-registry-job-image-when-submitting-an-openpai-job).
+ * Optional key, Set the auth file path for private registry while using PAI mode, [Refer](https://github.com/microsoft/pai/blob/2ea69b45faa018662bc164ed7733f6fdbb4c42b3/docs/faq.md#q-how-to-use-private-docker-registry-job-image-when-submitting-an-openpai-job), you can prepare the authFile and simply provide the local path of this file, NNI will upload this file to HDFS for you.
Once complete to fill NNI experiment config file and save (for example, save as exp_pai.yml), then run the following command
```
diff --git a/docs/en_US/TrainingService/SupportTrainingService.md b/docs/en_US/TrainingService/SupportTrainingService.md
new file mode 100644
index 0000000000..50c91173e2
--- /dev/null
+++ b/docs/en_US/TrainingService/SupportTrainingService.md
@@ -0,0 +1,36 @@
+# TrainingService
+
+NNI TrainingService provides the training platform for running NNI trial jobs. NNI supports [local](./LocalMode.md), [remote](./RemoteMachineMode.md), [pai](./PaiMode.md), [kubeflow](./KubeflowMode.md) and [frameworkcontroller](./FrameworkControllerMode.md) built-in training services.
+NNI not only provides few built-in training service options, but also provides a method for customers to build their own training service easily.
+
+## Built-in TrainingService
+|TrainingService|Brief Introduction|
+|---|---|
+|[__Local__](./LocalMode.md)|NNI supports running an experiment on local machine, called local mode. Local mode means that NNI will run the trial jobs and nniManager process in same machine, and support gpu schedule function for trial jobs.|
+|[__Remote__](./RemoteMachineMode.md)|NNI supports running an experiment on multiple machines through SSH channel, called remote mode. NNI assumes that you have access to those machines, and already setup the environment for running deep learning training code. NNI will submit the trial jobs in remote machine, and schedule suitable machine with enouth gpu resource if specified.|
+|[__Pai__](./PaiMode.md)|NNI supports running an experiment on [OpenPAI](https://github.com/Microsoft/pai) (aka pai), called pai mode. Before starting to use NNI pai mode, you should have an account to access an [OpenPAI](https://github.com/Microsoft/pai) cluster. See [here](https://github.com/Microsoft/pai#how-to-deploy) if you don't have any OpenPAI account and want to deploy an OpenPAI cluster. In pai mode, your trial program will run in pai's container created by Docker.|
+|[__Kubeflow__](./KubeflowMode.md)|NNI supports running experiment on [Kubeflow](https://github.com/kubeflow/kubeflow), called kubeflow mode. Before starting to use NNI kubeflow mode, you should have a Kubernetes cluster, either on-premises or [Azure Kubernetes Service(AKS)](https://azure.microsoft.com/en-us/services/kubernetes-service/), a Ubuntu machine on which [kubeconfig](https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig/) is setup to connect to your Kubernetes cluster. If you are not familiar with Kubernetes, [here](https://kubernetes.io/docs/tutorials/kubernetes-basics/) is a good start. In kubeflow mode, your trial program will run as Kubeflow job in Kubernetes cluster.|
+|[__FrameworkController__](./FrameworkControllerMode.md)|NNI supports running experiment using [FrameworkController](https://github.com/Microsoft/frameworkcontroller), called frameworkcontroller mode. FrameworkController is built to orchestrate all kinds of applications on Kubernetes, you don't need to install Kubeflow for specific deep learning framework like tf-operator or pytorch-operator. Now you can use FrameworkController as the training service to run NNI experiment.|
+
+## TrainingService Implementation
+
+TrainingService is designed to be easily implemented, we define an abstract class TrainingService as the parent class of all kinds of TrainingService, users just need to inherit the parent class and complete their own child class if they want to implement customized TrainingService.
+The abstract function in TrainingService is shown below:
+```
+abstract class TrainingService {
+ public abstract listTrialJobs(): Promise;
+ public abstract getTrialJob(trialJobId: string): Promise;
+ public abstract addTrialJobMetricListener(listener: (metric: TrialJobMetric) => void): void;
+ public abstract removeTrialJobMetricListener(listener: (metric: TrialJobMetric) => void): void;
+ public abstract submitTrialJob(form: JobApplicationForm): Promise;
+ public abstract updateTrialJob(trialJobId: string, form: JobApplicationForm): Promise;
+ public abstract get isMultiPhaseJobSupported(): boolean;
+ public abstract cancelTrialJob(trialJobId: string, isEarlyStopped?: boolean): Promise;
+ public abstract setClusterMetadata(key: string, value: string): Promise;
+ public abstract getClusterMetadata(key: string): Promise;
+ public abstract cleanUp(): Promise;
+ public abstract run(): Promise;
+}
+```
+The parent class of TrainingService has a few abstract functions, users need to inherit the parent class and implement all of these abstract functions.
+For more information about how to write your own TrainingService, please [refer](https://github.com/SparkSnail/nni/blob/dev-trainingServiceDoc/docs/en_US/TrainingService/HowToImplementTrainingService.md).
diff --git a/docs/en_US/Tuner/SmacTuner.md b/docs/en_US/Tuner/SmacTuner.md
index 8324e33cf1..301e02da65 100644
--- a/docs/en_US/Tuner/SmacTuner.md
+++ b/docs/en_US/Tuner/SmacTuner.md
@@ -5,4 +5,4 @@ SMAC Tuner on NNI
[SMAC](https://www.cs.ubc.ca/~hutter/papers/10-TR-SMAC.pdf) is based on Sequential Model-Based Optimization (SMBO). It adapts the most prominent previously used model class (Gaussian stochastic process models) and introduces the model class of random forests to SMBO, in order to handle categorical parameters. The SMAC supported by nni is a wrapper on [the SMAC3 github repo](https://github.com/automl/SMAC3).
-Note that SMAC on nni only supports a subset of the types in [search space spec](../Tutorial/SearchSpaceSpec.md), including `choice`, `randint`, `uniform`, `loguniform`, `quniform(q=1)`.
\ No newline at end of file
+Note that SMAC on nni only supports a subset of the types in [search space spec](../Tutorial/SearchSpaceSpec.md), including `choice`, `randint`, `uniform`, `loguniform`, `quniform`.
\ No newline at end of file
diff --git a/docs/en_US/Tutorial/Nnictl.md b/docs/en_US/Tutorial/Nnictl.md
index 75b5d3adba..5b5899e42d 100644
--- a/docs/en_US/Tutorial/Nnictl.md
+++ b/docs/en_US/Tutorial/Nnictl.md
@@ -123,6 +123,7 @@ Debug mode will disable version check function in Trialkeeper.
|------|------|------ |------|
|id| False| |The id of the experiment you want to stop|
|--port, -p| False| |Rest port of the experiment you want to stop|
+ |--all, -a| False| |Stop all of experiments|
* Details & Examples
@@ -144,10 +145,10 @@ Debug mode will disable version check function in Trialkeeper.
nnictl stop --port 8080
```
- 4. Users could use 'nnictl stop all' to stop all experiments.
+ 4. Users could use 'nnictl stop --all' to stop all experiments.
```bash
- nnictl stop all
+ nnictl stop --all
```
5. If the id ends with *, nnictl will stop all experiments whose ids matchs the regular.
diff --git a/docs/en_US/Tutorial/SearchSpaceSpec.md b/docs/en_US/Tutorial/SearchSpaceSpec.md
index e2d742510a..b892ea49c9 100644
--- a/docs/en_US/Tutorial/SearchSpaceSpec.md
+++ b/docs/en_US/Tutorial/SearchSpaceSpec.md
@@ -10,11 +10,11 @@ To define a search space, users should define the name of variable, the type of
```yaml
{
- "dropout_rate":{"_type":"uniform","_value":[0.1,0.5]},
- "conv_size":{"_type":"choice","_value":[2,3,5,7]},
- "hidden_size":{"_type":"choice","_value":[124, 512, 1024]},
- "batch_size":{"_type":"choice","_value":[50, 250, 500]},
- "learning_rate":{"_type":"uniform","_value":[0.0001, 0.1]}
+ "dropout_rate": {"_type": "uniform", "_value": [0.1, 0.5]},
+ "conv_size": {"_type": "choice", "_value": [2, 3, 5, 7]},
+ "hidden_size": {"_type": "choice", "_value": [124, 512, 1024]},
+ "batch_size": {"_type": "choice", "_value": [50, 250, 500]},
+ "learning_rate": {"_type": "uniform", "_value": [0.0001, 0.1]}
}
```
@@ -25,55 +25,54 @@ Take the first line as an example. `dropout_rate` is defined as a variable whose
All types of sampling strategies and their parameter are listed here:
-* {"_type":"choice","_value":options}
+* `{"_type": "choice", "_value": options}`
* Which means the variable's value is one of the options. Here 'options' should be a list. Each element of options is a number of string. It could also be a nested sub-search-space, this sub-search-space takes effect only when the corresponding element is chosen. The variables in this sub-search-space could be seen as conditional variables.
- * An simple [example](https://github.com/microsoft/nni/tree/master/examples/trials/mnist-nested-search-space/search_space.json) of [nested] search space definition. If an element in the options list is a dict, it is a sub-search-space, and for our built-in tuners you have to add a key '_name' in this dict, which helps you to identify which element is chosen. Accordingly, here is a [sample](https://github.com/microsoft/nni/tree/master/examples/trials/mnist-nested-search-space/sample.json) which users can get from nni with nested search space definition. Tuners which support nested search space is as follows:
+ * An simple [example](https://github.com/microsoft/nni/tree/master/examples/trials/mnist-nested-search-space/search_space.json) of [nested] search space definition. If an element in the options list is a dict, it is a sub-search-space, and for our built-in tuners you have to add a key `_name` in this dict, which helps you to identify which element is chosen. Accordingly, here is a [sample](https://github.com/microsoft/nni/tree/master/examples/trials/mnist-nested-search-space/sample.json) which users can get from nni with nested search space definition. Tuners which support nested search space is as follows:
- Random Search
- TPE
- Anneal
- Evolution
-* {"_type":"randint","_value":[lower, upper]}
+* `{"_type": "randint", "_value": [lower, upper]}`
+ * Choosing a random integer from `lower` (inclusive) to `upper` (exclusive).
+ * Note: Different tuners may interpret `randint` differently. Some (e.g., TPE, GridSearch) treat integers from lower
+ to upper as unordered ones, while others respect the ordering (e.g., SMAC). If you want all the tuners to respect
+ the ordering, please use `quniform` with `q=1`.
- * For now, we implement the "randint" distribution with "quniform", which means the variable value is a value like round(uniform(lower, upper)). The type of chosen value is float. If you want to use integer value, please convert it explicitly.
-
-* {"_type":"uniform","_value":[low, high]}
+* `{"_type": "uniform", "_value": [low, high]}`
* Which means the variable value is a value uniformly between low and high.
* When optimizing, this variable is constrained to a two-sided interval.
-* {"_type":"quniform","_value":[low, high, q]}
- * Which means the variable value is a value like clip(round(uniform(low, high) / q) * q, low, high), where the clip operation is used to constraint the generated value in the bound. For example, for _value specified as [0, 10, 2.5], possible values are [0, 2.5, 5.0, 7.5, 10.0]; For _value specified as [2, 10, 5], possible values are [2, 5, 10].
-
+* `{"_type": "quniform", "_value": [low, high, q]}`
+ * Which means the variable value is a value like `clip(round(uniform(low, high) / q) * q, low, high)`, where the clip operation is used to constraint the generated value in the bound. For example, for `_value` specified as [0, 10, 2.5], possible values are [0, 2.5, 5.0, 7.5, 10.0]; For `_value` specified as [2, 10, 5], possible values are [2, 5, 10].
* Suitable for a discrete value with respect to which the objective is still somewhat "smooth", but which should be bounded both above and below. If you want to uniformly choose integer from a range [low, high], you can write `_value` like this: `[low, high, 1]`.
-* {"_type":"loguniform","_value":[low, high]}
+* `{"_type": "loguniform", "_value": [low, high]}`
* Which means the variable value is a value drawn from a range [low, high] according to a loguniform distribution like exp(uniform(log(low), log(high))), so that the logarithm of the return value is uniformly distributed.
* When optimizing, this variable is constrained to be positive.
-* {"_type":"qloguniform","_value":[low, high, q]}
- * Which means the variable value is a value like clip(round(loguniform(low, high) / q) * q, low, high), where the clip operation is used to constraint the generated value in the bound.
+* `{"_type": "qloguniform", "_value": [low, high, q]}`
+ * Which means the variable value is a value like `clip(round(loguniform(low, high) / q) * q, low, high)`, where the clip operation is used to constraint the generated value in the bound.
* Suitable for a discrete variable with respect to which the objective is "smooth" and gets smoother with the size of the value, but which should be bounded both above and below.
-* {"_type":"normal","_value":[mu, sigma]}
-
+* `{"_type": "normal", "_value": [mu, sigma]}`
* Which means the variable value is a real value that's normally-distributed with mean mu and standard deviation sigma. When optimizing, this is an unconstrained variable.
-* {"_type":"qnormal","_value":[mu, sigma, q]}
- * Which means the variable value is a value like round(normal(mu, sigma) / q) * q
+* `{"_type": "qnormal", "_value": [mu, sigma, q]}`
+ * Which means the variable value is a value like `round(normal(mu, sigma) / q) * q`
* Suitable for a discrete variable that probably takes a value around mu, but is fundamentally unbounded.
-* {"_type":"lognormal","_value":[mu, sigma]}
-
- * Which means the variable value is a value drawn according to exp(normal(mu, sigma)) so that the logarithm of the return value is normally distributed. When optimizing, this variable is constrained to be positive.
+* `{"_type": "lognormal", "_value": [mu, sigma]}`
+ * Which means the variable value is a value drawn according to `exp(normal(mu, sigma))` so that the logarithm of the return value is normally distributed. When optimizing, this variable is constrained to be positive.
-* {"_type":"qlognormal","_value":[mu, sigma, q]}
- * Which means the variable value is a value like round(exp(normal(mu, sigma)) / q) * q
+* `{"_type": "qlognormal", "_value": [mu, sigma, q]}`
+ * Which means the variable value is a value like `round(exp(normal(mu, sigma)) / q) * q`
* Suitable for a discrete variable with respect to which the objective is smooth and gets smoother with the size of the variable, which is bounded from one side.
-* {"_type":"mutable_layer","_value":{mutable_layer_infomation}}
+* `{"_type": "mutable_layer", "_value": {mutable_layer_infomation}}`
* Type for [Neural Architecture Search Space][1]. Value is also a dictionary, which contains key-value pairs representing respectively name and search space of each mutable_layer.
* For now, users can only use this type of search space with annotation, which means that there is no need to define a json file for search space since it will be automatically generated according to the annotation in trial code.
* For detailed usage, please refer to [General NAS Interfaces][1].
diff --git a/docs/en_US/Tutorial/WebUI.md b/docs/en_US/Tutorial/WebUI.md
index 64afd5fd7e..438f779182 100644
--- a/docs/en_US/Tutorial/WebUI.md
+++ b/docs/en_US/Tutorial/WebUI.md
@@ -11,7 +11,7 @@ Click the tab "Overview".
* If your experiment have many trials, you can change the refresh interval on here.
![](../../img/webui-img/refresh-interval.png)
-* Support to review and download the experiment result and nni-manager/dispatcher log file from the download.
+* Support to review and download the experiment result and nni-manager/dispatcher log file from the "View" button.
![](../../img/webui-img/download.png)
* You can click the learn about in the error box to track experiment log message if the experiment's status is error.
@@ -50,13 +50,11 @@ Click the tab "Intermediate Result" to see the lines graph.
![](../../img/webui-img/trials_intermeidate.png)
-We set a filter function for the intermediate result graph because that the trials may have many intermediate results in the training progress. You need to provide data if you want to use the filter button to see the trend of some trial.
+The trial may have many intermediate results in the training progress. In order to see the trend of some trials more clearly, we set a filtering function for the intermediate result graph.
-What data should be written in the first input? Maybe you find an intermediate count those trials became better or worse. In other word, it's an important and concerned intermediate count. Just input it into the first input.
+You may find that these trials will get better or worse at one of intermediate results. In other words, this is an important and relevant intermediate result. To take a closer look at the point here, you need to enter its corresponding abscissa value at #Intermediate.
-After selecting the intermeidate count, you should input your focus metric's range on this intermediate count. Yes, it's the min and max value. Like this picture, I choose the intermeidate count is 9 and the metric's range is 60-80.
-
-As a result, I filter these trials that the metric's range is 20-60 on the 13 intermediate count.
+And then input the range of metrics on this intermedia result. Like below picture, it chooses No. 4 intermediate result and set the range of metrics to 0.8-1.
![](../../img/webui-img/filter-intermediate.png)
## View trials status
diff --git a/docs/en_US/conf.py b/docs/en_US/conf.py
index c0a30c6eb0..ce306245ad 100644
--- a/docs/en_US/conf.py
+++ b/docs/en_US/conf.py
@@ -28,7 +28,7 @@
# The short X.Y version
version = ''
# The full version, including alpha/beta/rc tags
-release = 'v0.7'
+release = 'v1.0'
# -- General configuration ---------------------------------------------------
diff --git a/docs/img/webui-img/compare.png b/docs/img/webui-img/compare.png
index 00a25524e3..256bd58837 100644
Binary files a/docs/img/webui-img/compare.png and b/docs/img/webui-img/compare.png differ
diff --git a/docs/img/webui-img/download.png b/docs/img/webui-img/download.png
index fc93949ebc..0c6c13eb92 100644
Binary files a/docs/img/webui-img/download.png and b/docs/img/webui-img/download.png differ
diff --git a/docs/img/webui-img/filter-intermediate.png b/docs/img/webui-img/filter-intermediate.png
index 69d2cb8375..91b51987ea 100644
Binary files a/docs/img/webui-img/filter-intermediate.png and b/docs/img/webui-img/filter-intermediate.png differ
diff --git a/docs/img/webui-img/over1.png b/docs/img/webui-img/over1.png
index 28bd1efbd5..e2c69720fb 100644
Binary files a/docs/img/webui-img/over1.png and b/docs/img/webui-img/over1.png differ
diff --git a/docs/img/webui-img/over2.png b/docs/img/webui-img/over2.png
index ae8fd66249..3ba7e3c6f4 100644
Binary files a/docs/img/webui-img/over2.png and b/docs/img/webui-img/over2.png differ
diff --git a/docs/img/webui-img/refresh-interval.png b/docs/img/webui-img/refresh-interval.png
index 7a65de58fa..1e5d759823 100644
Binary files a/docs/img/webui-img/refresh-interval.png and b/docs/img/webui-img/refresh-interval.png differ
diff --git a/docs/img/webui-img/trials_intermeidate.png b/docs/img/webui-img/trials_intermeidate.png
index 2f3614af34..1f980e920a 100644
Binary files a/docs/img/webui-img/trials_intermeidate.png and b/docs/img/webui-img/trials_intermeidate.png differ
diff --git a/docs/zh_CN/Tuner/SmacTuner.md b/docs/zh_CN/Tuner/SmacTuner.md
index 88b06bec37..c21a77269b 100644
--- a/docs/zh_CN/Tuner/SmacTuner.md
+++ b/docs/zh_CN/Tuner/SmacTuner.md
@@ -4,4 +4,4 @@
[SMAC](https://www.cs.ubc.ca/~hutter/papers/10-TR-SMAC.pdf) 基于 Sequential Model-Based Optimization (SMBO). 它利用使用过的结果好的模型(高斯随机过程模型),并将随机森林引入到 SMBO 中,来处理分类参数。 NNI 的 SMAC 通过包装 [SMAC3](https://github.com/automl/SMAC3) 来支持。
-NNI 中的 SMAC 只支持部分类型的[搜索空间](../Tutorial/SearchSpaceSpec.md),包括`choice`, `randint`, `uniform`, `loguniform`, `quniform(q=1)`。
\ No newline at end of file
+NNI 中的 SMAC 只支持部分类型的[搜索空间](../Tutorial/SearchSpaceSpec.md),包括`choice`, `randint`, `uniform`, `loguniform`, `quniform`。
\ No newline at end of file
diff --git a/src/nni_manager/training_service/common/util.ts b/src/nni_manager/training_service/common/util.ts
index aab276bd30..cfc6f9b26b 100644
--- a/src/nni_manager/training_service/common/util.ts
+++ b/src/nni_manager/training_service/common/util.ts
@@ -46,7 +46,7 @@ export async function validateCodeDir(codeDir: string) : Promise {
}
try {
fileNameValid = await validateFileNameRecursively(codeDir);
- } catch(error) {
+ } catch (error) {
throw new Error(`Validate file name error: ${error}`);
}
@@ -55,23 +55,24 @@ export async function validateCodeDir(codeDir: string) : Promise {
+ ` please check if it's a valid code dir`;
throw new Error(errMessage);
}
-
- if(!fileNameValid) {
- const errMessage: string = `File name in ${codeDir} is not valid, please check file names, only support digit number、alphabet and (.-_) in file name.`;
- throw new Error(errMessage);
+
+ if (!fileNameValid) {
+ const errMessage: string = `File name in ${codeDir} is not valid, please check file names, only support digit number、alphabet and (.-_) in file name.`;
+ throw new Error(errMessage);
}
return fileCount;
}
-
/**
* crete a new directory
* @param directory
*/
-export async function execMkdir(directory: string): Promise {
+export async function execMkdir(directory: string, share: boolean = false): Promise {
if (process.platform === 'win32') {
await cpp.exec(`powershell.exe New-Item -Path ${directory} -ItemType "directory" -Force`);
+ } else if (share) {
+ await cpp.exec(`(umask 0; mkdir -p ${directory})`);
} else {
await cpp.exec(`mkdir -p ${directory}`);
}
diff --git a/src/nni_manager/training_service/local/gpuScheduler.ts b/src/nni_manager/training_service/local/gpuScheduler.ts
index 933235a222..bf05220da0 100644
--- a/src/nni_manager/training_service/local/gpuScheduler.ts
+++ b/src/nni_manager/training_service/local/gpuScheduler.ts
@@ -54,6 +54,9 @@ class GPUScheduler {
} catch (error) {
this.log.error('Read GPU summary failed with error: ', error);
}
+ if (this.gpuSummary !== undefined && this.gpuSummary.gpuCount === 0) {
+ throw new Error('GPU not available. Please check your CUDA configuration');
+ }
await delay(5000);
}
}
@@ -97,7 +100,7 @@ class GPUScheduler {
* used to run in remote machine, and will be deleted after uploaded from local.
*/
private async runGpuMetricsCollectorScript(): Promise {
- await execMkdir(this.gpuMetricCollectorScriptFolder);
+ await execMkdir(this.gpuMetricCollectorScriptFolder, true);
//generate gpu_metrics_collector script
const gpuMetricsCollectorScriptPath: string =
path.join(this.gpuMetricCollectorScriptFolder, getScriptName('gpu_metrics_collector'));
diff --git a/src/nni_manager/training_service/local/localTrainingService.ts b/src/nni_manager/training_service/local/localTrainingService.ts
index 1fb2cb9327..88e006a3f9 100644
--- a/src/nni_manager/training_service/local/localTrainingService.ts
+++ b/src/nni_manager/training_service/local/localTrainingService.ts
@@ -131,7 +131,7 @@ class LocalTrainingService implements TrainingService {
private readonly occupiedGpuIndexNumMap: Map;
private designatedGpuIndices!: Set;
private readonly log: Logger;
- private localTrailConfig?: TrialConfig;
+ private localTrialConfig?: TrialConfig;
private localConfig?: LocalConfig;
private isMultiPhase: boolean;
private readonly jobStreamMap: Map;
@@ -204,7 +204,7 @@ class LocalTrainingService implements TrainingService {
} catch (error) {
//ignore
}
- this.log.debug(`trailJob status update: ${trialJobId}, ${trialJob.status}`);
+ this.log.debug(`trialJob status update: ${trialJobId}, ${trialJob.status}`);
}
}
@@ -302,14 +302,14 @@ class LocalTrainingService implements TrainingService {
}
switch (key) {
case TrialConfigMetadataKey.TRIAL_CONFIG:
- this.localTrailConfig = JSON.parse(value);
+ this.localTrialConfig = JSON.parse(value);
// Parse trial config failed, throw Error
- if (this.localTrailConfig === undefined) {
+ if (this.localTrialConfig === undefined) {
throw new Error('trial config parsed failed');
}
- if (this.localTrailConfig.gpuNum !== undefined) {
- this.log.info(`required GPU number is ${this.localTrailConfig.gpuNum}`);
- if (this.gpuScheduler === undefined && this.localTrailConfig.gpuNum > 0) {
+ if (this.localTrialConfig.gpuNum !== undefined) {
+ this.log.info(`required GPU number is ${this.localTrialConfig.gpuNum}`);
+ if (this.gpuScheduler === undefined && this.localTrialConfig.gpuNum > 0) {
this.gpuScheduler = new GPUScheduler();
}
}
@@ -343,10 +343,10 @@ class LocalTrainingService implements TrainingService {
switch (key) {
case TrialConfigMetadataKey.TRIAL_CONFIG:
let getResult: Promise;
- if (this.localTrailConfig === undefined) {
+ if (this.localTrialConfig === undefined) {
getResult = Promise.reject(new NNIError(NNIErrorNames.NOT_FOUND, `${key} is never set yet`));
} else {
- getResult = Promise.resolve(JSON.stringify(this.localTrailConfig));
+ getResult = Promise.resolve(JSON.stringify(this.localTrialConfig));
}
return getResult;
@@ -359,8 +359,8 @@ class LocalTrainingService implements TrainingService {
this.log.info('Stopping local machine training service...');
this.stopping = true;
for (const stream of this.jobStreamMap.values()) {
- stream.end(0)
- stream.emit('end')
+ stream.end(0);
+ stream.emit('end');
}
if (this.gpuScheduler !== undefined) {
await this.gpuScheduler.stop();
@@ -378,8 +378,8 @@ class LocalTrainingService implements TrainingService {
throw new Error(`Could not find stream in trial ${trialJob.id}`);
}
//Refer https://github.com/Juul/tail-stream/issues/20
- stream.end(0)
- stream.emit('end')
+ stream.end(0);
+ stream.emit('end');
this.jobStreamMap.delete(trialJob.id);
}
}
@@ -427,8 +427,8 @@ class LocalTrainingService implements TrainingService {
}
private tryGetAvailableResource(): [boolean, { gpuIndices: number[]}] {
- if (this.localTrailConfig === undefined) {
- throw new Error('localTrailConfig is not initialized!');
+ if (this.localTrialConfig === undefined) {
+ throw new Error('localTrialConfig is not initialized!');
}
const resource: { gpuIndices: number[] } = { gpuIndices: [] };
@@ -450,11 +450,11 @@ class LocalTrainingService implements TrainingService {
selectedGPUIndices = selectedGPUIndices.filter((index: number) => this.designatedGpuIndices.has(index));
}
- if (selectedGPUIndices.length < this.localTrailConfig.gpuNum) {
+ if (selectedGPUIndices.length < this.localTrialConfig.gpuNum) {
return [false, resource];
}
- selectedGPUIndices.splice(this.localTrailConfig.gpuNum);
+ selectedGPUIndices.splice(this.localTrialConfig.gpuNum);
Object.assign(resource, { gpuIndices: selectedGPUIndices });
return [true, resource];
@@ -494,7 +494,7 @@ class LocalTrainingService implements TrainingService {
if (!success) {
break;
}
-
+
this.occupyResource(resource);
await this.runTrialJob(trialJobId, resource);
}
@@ -512,18 +512,23 @@ class LocalTrainingService implements TrainingService {
}
}
- private getScript(localTrailConfig: TrialConfig, workingDirectory: string): string[] {
+ private getScript(localTrialConfig: TrialConfig, workingDirectory: string): string[] {
const script: string[] = [];
if (process.platform === 'win32') {
script.push(
- `cmd /c ${localTrailConfig.command} 2>${path.join(workingDirectory, 'stderr')}`,
+ `cmd /c ${localTrialConfig.command} 2>${path.join(workingDirectory, 'stderr')}`,
`$NOW_DATE = [int64](([datetime]::UtcNow)-(get-date "1/1/1970")).TotalSeconds`,
`$NOW_DATE = "$NOW_DATE" + (Get-Date -Format fff).ToString()`,
`Write $LASTEXITCODE " " $NOW_DATE | Out-File ${path.join(workingDirectory, '.nni', 'state')} -NoNewline -encoding utf8`);
} else {
- script.push(
- `eval ${localTrailConfig.command} 2>${path.join(workingDirectory, 'stderr')}`,
- `echo $? \`date +%s%3N\` >${path.join(workingDirectory, '.nni', 'state')}`);
+ script.push(`eval ${localTrialConfig.command} 2>${path.join(workingDirectory, 'stderr')}`);
+ if (process.platform === 'darwin') {
+ // https://superuser.com/questions/599072/how-to-get-bash-execution-time-in-milliseconds-under-mac-os-x
+ // Considering the worst case, write 999 to avoid negative duration
+ script.push(`echo $? \`date +%s999\` >${path.join(workingDirectory, '.nni', 'state')}`);
+ } else {
+ script.push(`echo $? \`date +%s%3N\` >${path.join(workingDirectory, '.nni', 'state')}`);
+ }
}
return script;
@@ -531,23 +536,23 @@ class LocalTrainingService implements TrainingService {
private async runTrialJob(trialJobId: string, resource: {gpuIndices: number[]}): Promise {
const trialJobDetail: LocalTrialJobDetail = this.jobMap.get(trialJobId);
- if (this.localTrailConfig === undefined) {
+ if (this.localTrialConfig === undefined) {
throw new Error(`localTrialConfig not initialized!`);
}
- const variables: { key: string; value: string }[] = this.getEnvironmentVariables(trialJobDetail, resource, this.localTrailConfig.gpuNum);
+ const variables: { key: string; value: string }[] = this.getEnvironmentVariables(trialJobDetail, resource, this.localTrialConfig.gpuNum);
- if (this.localTrailConfig === undefined) {
+ if (this.localTrialConfig === undefined) {
throw new Error('trial config is not initialized');
}
const runScriptContent: string[] = [];
if (process.platform !== 'win32') {
runScriptContent.push('#!/bin/bash');
}
- runScriptContent.push(`cd ${this.localTrailConfig.codeDir}`);
+ runScriptContent.push(`cd ${this.localTrialConfig.codeDir}`);
for (const variable of variables) {
runScriptContent.push(setEnvironmentVariable(variable));
}
- const scripts: string[] = this.getScript(this.localTrailConfig, trialJobDetail.workingDirectory);
+ const scripts: string[] = this.getScript(this.localTrialConfig, trialJobDetail.workingDirectory);
scripts.forEach((script: string) => {
runScriptContent.push(script);
});
diff --git a/src/nni_manager/training_service/pai/hdfsClientUtility.ts b/src/nni_manager/training_service/pai/hdfsClientUtility.ts
index f7603afb0c..7c140f8b2f 100644
--- a/src/nni_manager/training_service/pai/hdfsClientUtility.ts
+++ b/src/nni_manager/training_service/pai/hdfsClientUtility.ts
@@ -32,7 +32,7 @@ export namespace HDFSClientUtility {
* Get NNI experiment root directory
* @param hdfsUserName HDFS user name
*/
- function hdfsExpRootDir(hdfsUserName: string): string {
+ export function hdfsExpRootDir(hdfsUserName: string): string {
// tslint:disable-next-line:prefer-template
return '/' + unixPathJoin(hdfsUserName, 'nni', 'experiments', getExperimentId());
}
diff --git a/src/nni_manager/training_service/pai/paiTrainingService.ts b/src/nni_manager/training_service/pai/paiTrainingService.ts
index 09e2a42675..91865d906f 100644
--- a/src/nni_manager/training_service/pai/paiTrainingService.ts
+++ b/src/nni_manager/training_service/pai/paiTrainingService.ts
@@ -74,9 +74,11 @@ class PAITrainingService implements TrainingService {
private paiRestServerPort?: number;
private nniManagerIpConfig?: NNIManagerIpConfig;
private copyExpCodeDirPromise?: Promise;
+ private copyAuthFilePromise?: Promise;
private versionCheck: boolean = true;
private logCollection: string;
private isMultiPhase: boolean = false;
+ private authFileHdfsPath: string | undefined = undefined;
constructor() {
this.log = getLogger();
@@ -292,6 +294,12 @@ class PAITrainingService implements TrainingService {
HDFSClientUtility.getHdfsExpCodeDir(this.paiClusterConfig.userName),
this.hdfsClient
);
+
+ // Upload authFile to hdfs
+ if (this.paiTrialConfig.authFile) {
+ this.authFileHdfsPath = unixPathJoin(HDFSClientUtility.hdfsExpRootDir(this.paiClusterConfig.userName), 'authFile');
+ this.copyAuthFilePromise = HDFSClientUtility.copyFileToHdfs(this.paiTrialConfig.authFile, this.authFileHdfsPath, this.hdfsClient);
+ }
deferred.resolve();
break;
@@ -373,6 +381,10 @@ class PAITrainingService implements TrainingService {
await this.copyExpCodeDirPromise;
}
+ //Make sure authFile is copied from local to HDFS
+ if (this.paiTrialConfig.authFile) {
+ await this.copyAuthFilePromise;
+ }
// Step 1. Prepare PAI job configuration
const trialLocalTempFolder: string = path.join(getExperimentRootDir(), 'trials-local', trialJobId);
@@ -449,7 +461,7 @@ class PAITrainingService implements TrainingService {
// Add Virutal Cluster
this.paiTrialConfig.virtualCluster === undefined ? 'default' : this.paiTrialConfig.virtualCluster.toString(),
//Task auth File
- this.paiTrialConfig.authFile
+ this.authFileHdfsPath
);
// Step 2. Upload code files in codeDir onto HDFS
diff --git a/src/nni_manager/training_service/remote_machine/remoteMachineTrainingService.ts b/src/nni_manager/training_service/remote_machine/remoteMachineTrainingService.ts
index c55c28427b..35631f1ce9 100644
--- a/src/nni_manager/training_service/remote_machine/remoteMachineTrainingService.ts
+++ b/src/nni_manager/training_service/remote_machine/remoteMachineTrainingService.ts
@@ -511,12 +511,16 @@ class RemoteMachineTrainingService implements TrainingService {
// tslint:disable-next-line: no-floating-promises
SSHClientUtility.remoteExeCommand(`bash ${unixPathJoin(remoteGpuScriptCollectorDir, 'gpu_metrics_collector.sh')}`, conn);
- this.timer.subscribe(
+ const disposable: Rx.IDisposable = this.timer.subscribe(
async (tick: number) => {
const cmdresult: RemoteCommandResult = await SSHClientUtility.remoteExeCommand(
`tail -n 1 ${unixPathJoin(remoteGpuScriptCollectorDir, 'gpu_metrics')}`, conn);
if (cmdresult !== undefined && cmdresult.stdout !== undefined) {
rmMeta.gpuSummary = JSON.parse(cmdresult.stdout);
+ if (rmMeta.gpuSummary.gpuCount === 0) {
+ this.log.warning(`No GPU found on remote machine ${rmMeta.ip}`);
+ this.timer.unsubscribe(disposable);
+ }
}
}
);
diff --git a/src/sdk/pynni/nni/bohb_advisor/bohb_advisor.py b/src/sdk/pynni/nni/bohb_advisor/bohb_advisor.py
index cb8c102a83..124f348a99 100644
--- a/src/sdk/pynni/nni/bohb_advisor/bohb_advisor.py
+++ b/src/sdk/pynni/nni/bohb_advisor/bohb_advisor.py
@@ -31,7 +31,7 @@
from nni.protocol import CommandType, send
from nni.msg_dispatcher_base import MsgDispatcherBase
-from nni.utils import OptimizeMode, MetricType, extract_scalar_reward, randint_to_quniform
+from nni.utils import OptimizeMode, MetricType, extract_scalar_reward
from nni.common import multi_phase_enabled
from .config_generator import CG_BOHB
@@ -467,7 +467,6 @@ def handle_update_search_space(self, data):
search space of this experiment
"""
search_space = data
- randint_to_quniform(search_space)
cs = CS.ConfigurationSpace()
for var in search_space:
_type = str(search_space[var]["_type"])
@@ -476,7 +475,7 @@ def handle_update_search_space(self, data):
var, choices=search_space[var]["_value"]))
elif _type == 'randint':
cs.add_hyperparameter(CSH.UniformIntegerHyperparameter(
- var, lower=0, upper=search_space[var]["_value"][0]))
+ var, lower=search_space[var]["_value"][0], upper=search_space[var]["_value"][1] - 1))
elif _type == 'uniform':
cs.add_hyperparameter(CSH.UniformFloatHyperparameter(
var, lower=search_space[var]["_value"][0], upper=search_space[var]["_value"][1]))
diff --git a/src/sdk/pynni/nni/evolution_tuner/evolution_tuner.py b/src/sdk/pynni/nni/evolution_tuner/evolution_tuner.py
index 638a2be30a..edad05663b 100644
--- a/src/sdk/pynni/nni/evolution_tuner/evolution_tuner.py
+++ b/src/sdk/pynni/nni/evolution_tuner/evolution_tuner.py
@@ -26,7 +26,7 @@
import numpy as np
from nni.tuner import Tuner
-from nni.utils import NodeType, OptimizeMode, extract_scalar_reward, split_index, randint_to_quniform
+from nni.utils import NodeType, OptimizeMode, extract_scalar_reward, split_index
import nni.parameter_expressions as parameter_expressions
@@ -175,7 +175,6 @@ def update_search_space(self, search_space):
search_space : dict
"""
self.searchspace_json = search_space
- randint_to_quniform(self.searchspace_json)
self.space = json2space(self.searchspace_json)
self.random_state = np.random.RandomState()
diff --git a/src/sdk/pynni/nni/hyperband_advisor/hyperband_advisor.py b/src/sdk/pynni/nni/hyperband_advisor/hyperband_advisor.py
index a2be0a6e12..f596e5ea3b 100644
--- a/src/sdk/pynni/nni/hyperband_advisor/hyperband_advisor.py
+++ b/src/sdk/pynni/nni/hyperband_advisor/hyperband_advisor.py
@@ -31,7 +31,7 @@
from nni.protocol import CommandType, send
from nni.msg_dispatcher_base import MsgDispatcherBase
from nni.common import init_logger, multi_phase_enabled
-from nni.utils import NodeType, OptimizeMode, MetricType, extract_scalar_reward, randint_to_quniform
+from nni.utils import NodeType, OptimizeMode, MetricType, extract_scalar_reward
import nni.parameter_expressions as parameter_expressions
_logger = logging.getLogger(__name__)
@@ -358,7 +358,6 @@ def handle_update_search_space(self, data):
number of trial jobs
"""
self.searchspace_json = data
- randint_to_quniform(self.searchspace_json)
self.random_state = np.random.RandomState()
def _handle_trial_end(self, parameter_id):
diff --git a/src/sdk/pynni/nni/hyperopt_tuner/hyperopt_tuner.py b/src/sdk/pynni/nni/hyperopt_tuner/hyperopt_tuner.py
index f66425d869..7d1e6f7caa 100644
--- a/src/sdk/pynni/nni/hyperopt_tuner/hyperopt_tuner.py
+++ b/src/sdk/pynni/nni/hyperopt_tuner/hyperopt_tuner.py
@@ -27,7 +27,7 @@
import hyperopt as hp
import numpy as np
from nni.tuner import Tuner
-from nni.utils import NodeType, OptimizeMode, extract_scalar_reward, split_index, randint_to_quniform
+from nni.utils import NodeType, OptimizeMode, extract_scalar_reward, split_index
logger = logging.getLogger('hyperopt_AutoML')
@@ -51,6 +51,8 @@ def json2space(in_x, name=NodeType.ROOT):
_value = json2space(in_x[NodeType.VALUE], name=name)
if _type == 'choice':
out_y = eval('hp.hp.choice')(name, _value)
+ elif _type == 'randint':
+ out_y = hp.hp.randint(name, _value[1] - _value[0])
else:
if _type in ['loguniform', 'qloguniform']:
_value[:2] = np.log(_value[:2])
@@ -93,6 +95,8 @@ def json2parameter(in_x, parameter, name=NodeType.ROOT):
else:
if _type in ['quniform', 'qloguniform']:
out_y = np.clip(parameter[name], in_x[NodeType.VALUE][0], in_x[NodeType.VALUE][1])
+ elif _type == 'randint':
+ out_y = parameter[name] + in_x[NodeType.VALUE][0]
else:
out_y = parameter[name]
else:
@@ -247,7 +251,6 @@ def update_search_space(self, search_space):
search_space : dict
"""
self.json = search_space
- randint_to_quniform(self.json)
search_space_instance = json2space(self.json)
rstate = np.random.RandomState()
@@ -279,7 +282,7 @@ def generate_parameters(self, parameter_id, **kwargs):
total_params = self.get_suggestion(random_search=False)
# avoid generating same parameter with concurrent trials because hyperopt doesn't support parallel mode
if total_params in self.total_data.values():
- # but it can cause deplicate parameter rarely
+ # but it can cause duplicate parameter rarely
total_params = self.get_suggestion(random_search=True)
self.total_data[parameter_id] = total_params
@@ -315,6 +318,10 @@ def receive_trial_result(self, parameter_id, parameters, value, **kwargs):
rval = self.CL_rval
else:
rval = self.rval
+ # ignore duplicated reported final result (due to aware of intermedate result)
+ if parameter_id not in self.running_data:
+ logger.info("Received duplicated final result with parameter id: %s", parameter_id)
+ return
self.running_data.remove(parameter_id)
# update the reward of optimal_y
diff --git a/src/sdk/pynni/nni/hyperopt_tuner/test_hyperopt_tuner.py b/src/sdk/pynni/nni/hyperopt_tuner/test_hyperopt_tuner.py
index 61ad422f5e..3837c6b785 100644
--- a/src/sdk/pynni/nni/hyperopt_tuner/test_hyperopt_tuner.py
+++ b/src/sdk/pynni/nni/hyperopt_tuner/test_hyperopt_tuner.py
@@ -25,7 +25,7 @@
import hyperopt as hp
-from nni.hyperopt_tuner.hyperopt_tuner import json2space, json2parameter, json2vals
+from nni.hyperopt_tuner.hyperopt_tuner import json2space, json2parameter, json2vals, HyperoptTuner
class HyperoptTunerTestCase(TestCase):
@@ -99,6 +99,29 @@ def test_json2vals(self):
self.assertEqual(out_y["root[optimizer]-choice"], 0)
self.assertEqual(out_y["root[learning_rate]-choice"], 1)
+ def test_tuner_generate(self):
+ for algorithm in ["tpe", "random_search", "anneal"]:
+ tuner = HyperoptTuner(algorithm)
+ choice_list = ["a", "b", 1, 2]
+ tuner.update_search_space({
+ "a": {
+ "_type": "randint",
+ "_value": [1, 3]
+ },
+ "b": {
+ "_type": "choice",
+ "_value": choice_list
+ }
+ })
+ for k in range(30):
+ # sample multiple times
+ param = tuner.generate_parameters(k)
+ print(param)
+ self.assertIsInstance(param["a"], int)
+ self.assertGreaterEqual(param["a"], 1)
+ self.assertLessEqual(param["a"], 2)
+ self.assertIn(param["b"], choice_list)
+
if __name__ == '__main__':
main()
diff --git a/src/sdk/pynni/nni/metis_tuner/lib_data.py b/src/sdk/pynni/nni/metis_tuner/lib_data.py
index d24aeed678..c42f3ee735 100644
--- a/src/sdk/pynni/nni/metis_tuner/lib_data.py
+++ b/src/sdk/pynni/nni/metis_tuner/lib_data.py
@@ -55,7 +55,7 @@ def rand(x_bounds, x_types):
temp = x_bounds[i][random.randint(0, len(x_bounds[i]) - 1)]
outputs.append(temp)
elif x_types[i] == "range_int":
- temp = random.randint(x_bounds[i][0], x_bounds[i][1])
+ temp = random.randint(x_bounds[i][0], x_bounds[i][1] -1)
outputs.append(temp)
elif x_types[i] == "range_continuous":
temp = random.uniform(x_bounds[i][0], x_bounds[i][1])
diff --git a/src/sdk/pynni/nni/metis_tuner/metis_tuner.py b/src/sdk/pynni/nni/metis_tuner/metis_tuner.py
index a796ab6163..9cfc9710f9 100644
--- a/src/sdk/pynni/nni/metis_tuner/metis_tuner.py
+++ b/src/sdk/pynni/nni/metis_tuner/metis_tuner.py
@@ -121,13 +121,12 @@ def update_search_space(self, search_space):
key_range = search_space[key]['_value']
idx = self.key_order.index(key)
if key_type == 'quniform':
- if key_range[2] == 1:
- self.x_bounds[idx] = [key_range[0], key_range[1]]
+ if key_range[2] == 1 and key_range[0].is_integer() and key_range[1].is_integer():
+ self.x_bounds[idx] = [key_range[0], key_range[1]+1]
self.x_types[idx] = 'range_int'
else:
- bounds = []
- for value in np.arange(key_range[0], key_range[1], key_range[2]):
- bounds.append(value)
+ low, high, q = key_range
+ bounds = np.clip(np.arange(np.round(low/q), np.round(high/q)+1) * q, low, high)
self.x_bounds[idx] = bounds
self.x_types[idx] = 'discrete_int'
elif key_type == 'randint':
diff --git a/src/sdk/pynni/nni/parameter_expressions.py b/src/sdk/pynni/nni/parameter_expressions.py
index 8aef0e9062..838f1f2484 100644
--- a/src/sdk/pynni/nni/parameter_expressions.py
+++ b/src/sdk/pynni/nni/parameter_expressions.py
@@ -32,12 +32,14 @@ def choice(options, random_state):
return random_state.choice(options)
-def randint(upper, random_state):
+def randint(lower, upper, random_state):
'''
+ Generate a random integer from `lower` (inclusive) to `upper` (exclusive).
+ lower: an int that represent an lower bound
upper: an int that represent an upper bound
random_state: an object of numpy.random.RandomState
'''
- return random_state.randint(upper)
+ return random_state.randint(lower, upper)
def uniform(low, high, random_state):
diff --git a/src/sdk/pynni/nni/smac_tuner/convert_ss_to_scenario.py b/src/sdk/pynni/nni/smac_tuner/convert_ss_to_scenario.py
index a46e128e1d..413c4af7b6 100644
--- a/src/sdk/pynni/nni/smac_tuner/convert_ss_to_scenario.py
+++ b/src/sdk/pynni/nni/smac_tuner/convert_ss_to_scenario.py
@@ -88,10 +88,10 @@ def generate_pcs(nni_search_space_content):
raise RuntimeError('%s has already existed, please make sure search space has no duplicate key.' % key)
categorical_dict[key] = search_space[key]['_value']
elif search_space[key]['_type'] == 'randint':
- # TODO: support lower bound in randint
- pcs_fd.write('%s integer [0, %d] [%d]\n' % (
+ pcs_fd.write('%s integer [%d, %d] [%d]\n' % (
key,
search_space[key]['_value'][0],
+ search_space[key]['_value'][1] - 1,
search_space[key]['_value'][0]))
elif search_space[key]['_type'] == 'uniform':
pcs_fd.write('%s real %s [%s]\n' % (
@@ -105,13 +105,13 @@ def generate_pcs(nni_search_space_content):
key,
json.dumps(search_space[key]['_value']),
json.dumps(search_space[key]['_value'][0])))
- elif search_space[key]['_type'] == 'quniform' \
- and search_space[key]['_value'][2] == 1:
- pcs_fd.write('%s integer [%d, %d] [%d]\n' % (
+ elif search_space[key]['_type'] == 'quniform':
+ low, high, q = search_space[key]['_value'][0:3]
+ vals = np.clip(np.arange(np.round(low/q), np.round(high/q)+1) * q, low, high).tolist()
+ pcs_fd.write('%s ordinal {%s} [%s]\n' % (
key,
- search_space[key]['_value'][0],
- search_space[key]['_value'][1],
- search_space[key]['_value'][0]))
+ json.dumps(vals)[1:-1],
+ json.dumps(vals[0])))
else:
raise RuntimeError('unsupported _type %s' % search_space[key]['_type'])
except:
diff --git a/src/sdk/pynni/nni/smac_tuner/smac_tuner.py b/src/sdk/pynni/nni/smac_tuner/smac_tuner.py
index 2eaf27b2bc..28ddf1a384 100644
--- a/src/sdk/pynni/nni/smac_tuner/smac_tuner.py
+++ b/src/sdk/pynni/nni/smac_tuner/smac_tuner.py
@@ -38,7 +38,7 @@
from .convert_ss_to_scenario import generate_scenario
from nni.tuner import Tuner
-from nni.utils import OptimizeMode, extract_scalar_reward, randint_to_quniform
+from nni.utils import OptimizeMode, extract_scalar_reward
class SMACTuner(Tuner):
@@ -139,7 +139,6 @@ def update_search_space(self, search_space):
search_space:
search space
"""
- randint_to_quniform(search_space)
if not self.update_ss_done:
self.categorical_dict = generate_scenario(search_space)
if self.categorical_dict is None:
diff --git a/src/sdk/pynni/nni/smartparam.py b/src/sdk/pynni/nni/smartparam.py
index 2c75cf07cd..4c90a8703f 100644
--- a/src/sdk/pynni/nni/smartparam.py
+++ b/src/sdk/pynni/nni/smartparam.py
@@ -19,11 +19,11 @@
# ==================================================================================================
-import random
import numpy as np
from .env_vars import trial_env_vars
from . import trial
+from . import parameter_expressions as param_exp
from .nas_utils import classic_mode, enas_mode, oneshot_mode, darts_mode
@@ -47,39 +47,39 @@
if trial_env_vars.NNI_PLATFORM is None:
def choice(*options, name=None):
- return random.choice(options)
+ return param_exp.choice(options, np.random.RandomState())
- def randint(upper, name=None):
- return random.randrange(upper)
+ def randint(lower, upper, name=None):
+ return param_exp.randint(lower, upper, np.random.RandomState())
def uniform(low, high, name=None):
- return random.uniform(low, high)
+ return param_exp.uniform(low, high, np.random.RandomState())
def quniform(low, high, q, name=None):
assert high > low, 'Upper bound must be larger than lower bound'
- return np.clip(round(random.uniform(low, high) / q) * q, low, high)
+ return param_exp.quniform(low, high, q, np.random.RandomState())
def loguniform(low, high, name=None):
assert low > 0, 'Lower bound must be positive'
- return np.exp(random.uniform(np.log(low), np.log(high)))
+ return param_exp.loguniform(low, high, np.random.RandomState())
def qloguniform(low, high, q, name=None):
- return np.clip(round(loguniform(low, high) / q) * q, low, high)
+ return param_exp.qloguniform(low, high, q, np.random.RandomState())
def normal(mu, sigma, name=None):
- return random.gauss(mu, sigma)
+ return param_exp.normal(mu, sigma, np.random.RandomState())
def qnormal(mu, sigma, q, name=None):
- return round(random.gauss(mu, sigma) / q) * q
+ return param_exp.qnormal(mu, sigma, q, np.random.RandomState())
def lognormal(mu, sigma, name=None):
- return np.exp(random.gauss(mu, sigma))
+ return param_exp.lognormal(mu, sigma, np.random.RandomState())
def qlognormal(mu, sigma, q, name=None):
- return round(lognormal(mu, sigma) / q) * q
+ return param_exp.qlognormal(mu, sigma, q, np.random.RandomState())
def function_choice(*funcs, name=None):
- return random.choice(funcs)()
+ return param_exp.choice(funcs, np.random.RandomState())()
def mutable_layer():
raise RuntimeError('Cannot call nni.mutable_layer in this mode')
@@ -89,7 +89,7 @@ def mutable_layer():
def choice(options, name=None, key=None):
return options[_get_param(key)]
- def randint(upper, name=None, key=None):
+ def randint(lower, upper, name=None, key=None):
return _get_param(key)
def uniform(low, high, name=None, key=None):
diff --git a/src/sdk/pynni/nni/utils.py b/src/sdk/pynni/nni/utils.py
index 164d7edcce..ee80a09967 100644
--- a/src/sdk/pynni/nni/utils.py
+++ b/src/sdk/pynni/nni/utils.py
@@ -111,23 +111,3 @@ def init_dispatcher_logger():
if dispatcher_env_vars.NNI_LOG_DIRECTORY is not None:
logger_file_path = os.path.join(dispatcher_env_vars.NNI_LOG_DIRECTORY, logger_file_path)
init_logger(logger_file_path, dispatcher_env_vars.NNI_LOG_LEVEL)
-
-
-def randint_to_quniform(in_x):
- if isinstance(in_x, dict):
- if NodeType.TYPE in in_x.keys():
- if in_x[NodeType.TYPE] == 'randint':
- value = in_x[NodeType.VALUE]
- value.append(1)
-
- in_x[NodeType.TYPE] = 'quniform'
- in_x[NodeType.VALUE] = value
-
- elif in_x[NodeType.TYPE] == 'choice':
- randint_to_quniform(in_x[NodeType.VALUE])
- else:
- for key in in_x.keys():
- randint_to_quniform(in_x[key])
- elif isinstance(in_x, list):
- for temp in in_x:
- randint_to_quniform(temp)
diff --git a/src/webui/src/App.tsx b/src/webui/src/App.tsx
index 9839542853..c3b31d422a 100644
--- a/src/webui/src/App.tsx
+++ b/src/webui/src/App.tsx
@@ -1,11 +1,15 @@
import * as React from 'react';
import { Row, Col } from 'antd';
+import axios from 'axios';
+import { COLUMN, MANAGER_IP } from './static/const';
import './App.css';
import SlideBar from './components/SlideBar';
interface AppState {
interval: number;
whichPageToFresh: string;
+ columnList: Array;
+ concurrency: number;
}
class App extends React.Component<{}, AppState> {
@@ -14,7 +18,9 @@ class App extends React.Component<{}, AppState> {
super(props);
this.state = {
interval: 10, // sendons
- whichPageToFresh: ''
+ whichPageToFresh: '',
+ columnList: COLUMN,
+ concurrency: 1
};
}
@@ -31,25 +37,57 @@ class App extends React.Component<{}, AppState> {
}
}
+ changeColumn = (columnList: Array) => {
+ if (this._isMounted === true) {
+ this.setState(() => ({ columnList: columnList }));
+ }
+ }
+
+ changeConcurrency = (val: number) => {
+ if (this._isMounted === true) {
+ this.setState(() => ({ concurrency: val }));
+ }
+ }
+
+ getConcurrency = () => {
+ axios(`${MANAGER_IP}/experiment`, {
+ method: 'GET'
+ })
+ .then(res => {
+ if (res.status === 200) {
+ const params = res.data.params;
+ if (this._isMounted) {
+ this.setState(() => ({ concurrency: params.trialConcurrency }));
+ }
+ }
+ });
+ }
+
componentDidMount() {
this._isMounted = true;
+ this.getConcurrency();
}
componentWillUnmount() {
this._isMounted = false;
}
render() {
- const { interval, whichPageToFresh } = this.state;
+ const { interval, whichPageToFresh, columnList, concurrency } = this.state;
const reactPropsChildren = React.Children.map(this.props.children, child =>
- // tslint:disable-next-line:no-any
- React.cloneElement(child as React.ReactElement, { interval, whichPageToFresh })
+ React.cloneElement(
+ // tslint:disable-next-line:no-any
+ child as React.ReactElement, {
+ interval, whichPageToFresh,
+ columnList, changeColumn: this.changeColumn,
+ concurrency, changeConcurrency: this.changeConcurrency
+ })
);
return (