Skip to content

Commit

Permalink
Merge pull request microsoft#4 from Microsoft/master
Browse files Browse the repository at this point in the history
pull latest code
  • Loading branch information
chicm-ms authored Nov 27, 2018
2 parents 75fd2f1 + 101b02f commit 10e998f
Show file tree
Hide file tree
Showing 73 changed files with 1,836 additions and 461 deletions.
36 changes: 26 additions & 10 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,30 +3,45 @@
PIP_INSTALL := python3 -m pip install
PIP_UNINSTALL := python3 -m pip uninstall

## Colorful output
_INFO := $(shell echo -e '\e[1;36m')
_WARNING := $(shell echo -e '\e[1;33m')
_END := $(shell echo -e '\e[0m')
# detect OS
UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S), Linux)
OS_SPEC := linux
## Colorful output
_INFO := $(shell echo -e '\e[1;36m')
_WARNING := $(shell echo -e '\e[1;33m')
_END := $(shell echo -e '\e[0m')
else ifeq ($(UNAME_S), Darwin)
OS_SPEC := darwin
else
$(error platform $(UNAME_S) not supported)
endif



## Install directories
ifeq ($(shell id -u), 0) # is root
_ROOT := 1
ROOT_FOLDER ?= $(shell python3 -c 'import site; from pathlib import Path; print(Path(site.getsitepackages()[0]).parents[2])')
BASH_COMP_SCRIPT ?= /usr/share/bash-completion/completions/nnictl
BASH_COMP_PREFIX ?= /usr/share/bash-completion/completions
else # is normal user
ROOT_FOLDER ?= $(shell python3 -c 'import site; from pathlib import Path; print(Path(site.getusersitepackages()).parents[2])')
ifndef VIRTUAL_ENV
PIP_MODE ?= --user
endif
BASH_COMP_SCRIPT ?= ${HOME}/.bash_completion.d/nnictl
BASH_COMP_PREFIX ?= ${HOME}/.bash_completion.d
endif
BASH_COMP_SCRIPT := $(BASH_COMP_PREFIX)/nnictl

NNI_INSTALL_PATH ?= $(INSTALL_PREFIX)/nni
NNI_TMP_PATH ?= /tmp

BIN_FOLDER ?= $(ROOT_FOLDER)/bin
NNI_PKG_FOLDER ?= $(ROOT_FOLDER)/nni

## Dependency information
NNI_NODE_TARBALL ?= /tmp/nni-node-linux-x64.tar.xz
NNI_NODE_FOLDER = /tmp/nni-node-linux-x64
NNI_NODE_TARBALL ?= /tmp/nni-node-$(OS_SPEC)-x64.tar.xz
NNI_NODE_FOLDER = /tmp/nni-node-$(OS_SPEC)-x64
NNI_NODE ?= $(BIN_FOLDER)/node
NNI_YARN_TARBALL ?= /tmp/nni-yarn.tar.gz
NNI_YARN_FOLDER ?= /tmp/nni-yarn
Expand Down Expand Up @@ -120,7 +135,7 @@ clean:

$(NNI_NODE_TARBALL):
#$(_INFO) Downloading Node.js $(_END)
wget https://aka.ms/nodejs-download -O $(NNI_NODE_TARBALL)
wget https://aka.ms/nni/nodejs-download/$(OS_SPEC) -O $(NNI_NODE_TARBALL)

$(NNI_YARN_TARBALL):
#$(_INFO) Downloading Yarn $(_END)
Expand Down Expand Up @@ -176,7 +191,8 @@ dev-install-node-modules:

.PHONY: install-scripts
install-scripts:
install -Dm644 tools/bash-completion $(BASH_COMP_SCRIPT)
mkdir -p $(BASH_COMP_PREFIX)
install -m644 tools/bash-completion $(BASH_COMP_SCRIPT)

.PHONY: update-bash-config
ifndef _ROOT
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
[![Issues](https://img.shields.io/github/issues-raw/Microsoft/nni.svg)](https://github.com/Microsoft/nni/issues?q=is%3Aissue+is%3Aopen)
[![Bugs](https://img.shields.io/github/issues/Microsoft/nni/bug.svg)](https://github.com/Microsoft/nni/issues?q=is%3Aissue+is%3Aopen+label%3Abug)
[![Pull Requests](https://img.shields.io/github/issues-pr-raw/Microsoft/nni.svg)](https://github.com/Microsoft/nni/pulls?q=is%3Apr+is%3Aopen)
[![Version](https://img.shields.io/github/release/Microsoft/nni.svg)](https://github.com/Microsoft/nni/releases)
[![Version](https://img.shields.io/github/release/Microsoft/nni.svg)](https://github.com/Microsoft/nni/releases) [![Join the chat at https://gitter.im/Microsoft/nni](https://badges.gitter.im/Microsoft/nni.svg)](https://gitter.im/Microsoft/nni?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)

NNI (Neural Network Intelligence) is a toolkit to help users run automated machine learning (AutoML) experiments.
The tool dispatches and runs trial jobs generated by tuning algorithms to search the best neural architecture and/or hyper-parameters in different environments like local machine, remote servers and cloud.
Expand Down
25 changes: 1 addition & 24 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
@@ -1,28 +1,5 @@
jobs:

- job: 'Install_through_pip'
pool:
vmImage: 'Ubuntu 16.04'
strategy:
matrix:
Python36:
PYTHON_VERSION: '3.6'

steps:
- script: python3 -m pip install --upgrade pip setuptools
displayName: 'Install python tools'
- script: |
python3 -m pip install nni --user
displayName: 'Install nni toolkit via pip'
- script: |
cd test
PATH=$HOME/.local/bin:$PATH python3 naive_test.py
displayName: 'Integration tests'
- script: |
cd test
PATH=$HOME/.local/bin:$PATH python3 sdk_tuner_test.py
displayName: 'Built-in tuner tests'
- job: 'Install_through_source_code'
pool:
vmImage: 'Ubuntu 16.04'
Expand All @@ -43,5 +20,5 @@ jobs:
displayName: 'Integration tests'
- script: |
cd test
PATH=$HOME/.local/bin:$PATH python3 sdk_tuner_test.py
PATH=$HOME/.local/bin:$PATH python3 sdk_test.py
displayName: 'Built-in tuner tests'
2 changes: 1 addition & 1 deletion deployment/pypi/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@ make
```bash
make upload
```
You may need to input the account and password of htts://pypi.org during this process.
You may need to input the account and password of https://pypi.org during this process.
4 changes: 2 additions & 2 deletions docs/ExperimentConfig.md
Original file line number Diff line number Diff line change
Expand Up @@ -168,10 +168,10 @@ machineList:
* __builtinTunerName__ and __classArgs__
* __builtinTunerName__

__builtinTunerName__ specifies the name of system tuner you want to use, nni sdk provides four kinds of tuner, including {__TPE__, __Random__, __Anneal__, __Evolution__}
__builtinTunerName__ specifies the name of system tuner you want to use, nni sdk provides four kinds of tuner, including {__TPE__, __Random__, __Anneal__, __Evolution__, __BatchTuner__, __GridSearch__}
* __classArgs__

__classArgs__ specifies the arguments of tuner algorithm
__classArgs__ specifies the arguments of tuner algorithm. If the __builtinTunerName__ is in {__TPE__, __Random__, __Anneal__, __Evolution__}, you should set __optimize_mode__.
* __codeDir__, __classFileName__, __className__ and __classArgs__
* __codeDir__

Expand Down
2 changes: 1 addition & 1 deletion src/nni_manager/common/datastore.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ interface TrialJobInfo {
endTime?: number;
hyperParameters?: string[];
logPath?: string;
finalMetricData?: MetricDataRecord;
finalMetricData?: MetricDataRecord[];
stderrPath?: string;
}

Expand Down
3 changes: 2 additions & 1 deletion src/nni_manager/common/manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ interface ExperimentParams {
searchSpace: string;
trainingServicePlatform: string;
multiPhase?: boolean;
multiThread?: boolean;
tuner: {
className: string;
builtinTunerName?: string;
Expand Down Expand Up @@ -75,7 +76,7 @@ interface TrialJobStatistics {
}

interface NNIManagerStatus {
status: 'INITIALIZED' | 'EXPERIMENT_RUNNING' | 'ERROR' | 'STOPPING' | 'STOPPED' | 'DONE';
status: 'INITIALIZED' | 'EXPERIMENT_RUNNING' | 'ERROR' | 'STOPPING' | 'STOPPED' | 'DONE' | 'NO_MORE_TRIAL';
errors: string[];
}

Expand Down
18 changes: 15 additions & 3 deletions src/nni_manager/common/trainingService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
/**
* define TrialJobStatus
*/
type TrialJobStatus = 'UNKNOWN' | 'WAITING' | 'RUNNING' | 'SUCCEEDED' | 'FAILED' | 'USER_CANCELED' | 'SYS_CANCELED';
type TrialJobStatus = 'UNKNOWN' | 'WAITING' | 'RUNNING' | 'SUCCEEDED' | 'FAILED' | 'USER_CANCELED' | 'SYS_CANCELED' | 'EARLY_STOPPED';
type JobType = 'TRIAL' | 'HOST';

interface TrainingServiceMetadata {
Expand Down Expand Up @@ -113,15 +113,27 @@ abstract class TrainingService {
public abstract submitTrialJob(form: JobApplicationForm): Promise<TrialJobDetail>;
public abstract updateTrialJob(trialJobId: string, form: JobApplicationForm): Promise<TrialJobDetail>;
public abstract get isMultiPhaseJobSupported(): boolean;
public abstract cancelTrialJob(trialJobId: string): Promise<void>;
public abstract cancelTrialJob(trialJobId: string, isEarlyStopped?: boolean): Promise<void>;
public abstract setClusterMetadata(key: string, value: string): Promise<void>;
public abstract getClusterMetadata(key: string): Promise<string>;
public abstract cleanUp(): Promise<void>;
public abstract run(): Promise<void>;
}

/**
* the ip of nni manager
*/
class NNIManagerIpConfig {
public readonly nniManagerIp: string;
constructor(nniManagerIp: string){
this.nniManagerIp = nniManagerIp;
}
}

export {
TrainingService, TrainingServiceError, TrialJobStatus, TrialJobApplicationForm,
TrainingServiceMetadata, TrialJobDetail, TrialJobMetric, HyperParameters,
HostJobApplicationForm, JobApplicationForm, JobType
HostJobApplicationForm, JobApplicationForm, JobType, NNIManagerIpConfig
};


25 changes: 22 additions & 3 deletions src/nni_manager/common/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ import * as util from 'util';
import { Database, DataStore } from './datastore';
import { ExperimentStartupInfo, getExperimentId, setExperimentStartupInfo } from './experimentStartupInfo';
import { Manager } from './manager';
import { HyperParameters, TrainingService } from './trainingService';
import { HyperParameters, TrainingService, TrialJobStatus } from './trainingService';

function getExperimentRootDir(): string {
return path.join(os.homedir(), 'nni', 'experiments', getExperimentId());
Expand Down Expand Up @@ -158,12 +158,16 @@ function parseArg(names: string[]): string {
* @param assessor: similiar as tuner
*
*/
function getMsgDispatcherCommand(tuner: any, assessor: any, multiPhase: boolean = false): string {
function getMsgDispatcherCommand(tuner: any, assessor: any, multiPhase: boolean = false, multiThread: boolean = false): string {
let command: string = `python3 -m nni --tuner_class_name ${tuner.className}`;
if (multiPhase) {
command += ' --multi_phase';
}

if (multiThread) {
command += ' --multi_thread';
}

if (tuner.classArgs !== undefined) {
command += ` --tuner_args ${JSON.stringify(JSON.stringify(tuner.classArgs))}`;
}
Expand Down Expand Up @@ -268,5 +272,20 @@ function getIPV4Address(): string {
throw Error('getIPV4Address() failed because no valid IPv4 address found.')
}

export { generateParamFileName, getMsgDispatcherCommand, getLogDir, getExperimentRootDir,
function getRemoteTmpDir(osType: string): string {
if (osType == 'linux') {
return '/tmp';
} else {
throw Error(`remote OS ${osType} not supported`);
}
}

/**
* Get the status of canceled jobs according to the hint isEarlyStopped
*/
function getJobCancelStatus(isEarlyStopped: boolean): TrialJobStatus {
return isEarlyStopped ? 'EARLY_STOPPED' : 'USER_CANCELED';
}

export {getRemoteTmpDir, generateParamFileName, getMsgDispatcherCommand, getLogDir, getExperimentRootDir, getJobCancelStatus,
getDefaultDatabaseDir, getIPV4Address, mkDirP, delay, prepareUnitTest, parseArg, cleanupUnitTest, uniqueString, randomSelect };
3 changes: 3 additions & 0 deletions src/nni_manager/core/commands.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ const ADD_CUSTOMIZED_TRIAL_JOB = 'AD';
const TRIAL_END = 'EN';
const TERMINATE = 'TE';

const INITIALIZED = 'ID';
const NEW_TRIAL_JOB = 'TR';
const SEND_TRIAL_JOB_PARAMETER = 'SP';
const NO_MORE_TRIAL_JOBS = 'NO';
Expand All @@ -39,6 +40,7 @@ const TUNER_COMMANDS: Set<string> = new Set([
ADD_CUSTOMIZED_TRIAL_JOB,
TERMINATE,

INITIALIZED,
NEW_TRIAL_JOB,
SEND_TRIAL_JOB_PARAMETER,
NO_MORE_TRIAL_JOBS
Expand All @@ -61,6 +63,7 @@ export {
ADD_CUSTOMIZED_TRIAL_JOB,
TRIAL_END,
TERMINATE,
INITIALIZED,
NEW_TRIAL_JOB,
NO_MORE_TRIAL_JOBS,
KILL_TRIAL_JOB,
Expand Down
19 changes: 13 additions & 6 deletions src/nni_manager/core/nniDataStore.ts
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ class NNIDataStore implements DataStore {
}
const map: Map<string, TrialJobInfo> = this.getTrialJobsByReplayEvents(trialJobEvents);

const finalMetricsMap: Map<string, MetricDataRecord> = await this.getFinalMetricData(trialJobId);
const finalMetricsMap: Map<string, MetricDataRecord[]> = await this.getFinalMetricData(trialJobId);

for (const key of map.keys()) {
const jobInfo: TrialJobInfo | undefined = map.get(key);
Expand All @@ -181,17 +181,23 @@ class NNIDataStore implements DataStore {
return result;
}

private async getFinalMetricData(trialJobId?: string): Promise<Map<string, MetricDataRecord>> {
const map: Map<string, MetricDataRecord> = new Map();
private async getFinalMetricData(trialJobId?: string): Promise<Map<string, MetricDataRecord[]>> {
const map: Map<string, MetricDataRecord[]> = new Map();
const metrics: MetricDataRecord[] = await this.getMetricData(trialJobId, 'FINAL');

const multiPhase: boolean = await this.isMultiPhase();

for (const metric of metrics) {
if (map.has(metric.trialJobId) && !multiPhase) {
this.log.error(`Found multiple FINAL results for trial job ${trialJobId}`);
const existMetrics: MetricDataRecord[] | undefined = map.get(metric.trialJobId);
if (existMetrics !== undefined) {
if (!multiPhase) {
this.log.error(`Found multiple FINAL results for trial job ${trialJobId}, metrics: ${JSON.stringify(metrics)}`);
} else {
existMetrics.push(metric);
}
} else {
map.set(metric.trialJobId, [metric]);
}
map.set(metric.trialJobId, metric);
}

return map;
Expand Down Expand Up @@ -271,6 +277,7 @@ class NNIDataStore implements DataStore {
case 'FAILED':
case 'USER_CANCELED':
case 'SYS_CANCELED':
case 'EARLY_STOPPED':
if (record.logPath !== undefined) {
jobInfo.logPath = record.logPath;
}
Expand Down
Loading

0 comments on commit 10e998f

Please sign in to comment.