Skip to content
This repository has been archived by the owner on Sep 18, 2024. It is now read-only.

Update ci #175

Merged
merged 27 commits into from
Oct 16, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/trials/ga_squad/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -251,4 +251,4 @@ Every model configuration will has a "layers" section, which is a JSON list of l
* `input_size` is the number of inputs the layer has.
* `input` is the indices of layers taken as input of this layer.
* `output` is the indices of layers use this layer's output as their input.
* `is_delete` means whether the layer is still available.
* `is_delete` means whether the layer is still available.
1 change: 1 addition & 0 deletions src/nni_manager/core/nnimanager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,7 @@ class NNIManager implements Manager {
suspendStartTime = Date.now();
}
this.status.status = 'SUSPENDED';
this.log.info('Experiment suspended.');
} else {
if (this.status.status === 'SUSPENDED') {
assert(suspendStartTime !== 0);
Expand Down
5 changes: 5 additions & 0 deletions test/naive/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
__pycache__

tuner_search_space.json
tuner_result.txt
assessor_result.txt
19 changes: 19 additions & 0 deletions test/naive/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
## Usage
To test before installing:
./run.py --preinstall
To test the integrity of installation:
./run.py
It will print `PASS` in green eventually if everything works well.

## Details
* This test case tests the communication between trials and tuner/assessor.
* The naive trials receive an integer `x` as parameter, and reports `x`, `x²`, `x³`, ... , `x¹⁰` as metrics.
* The naive tuner simply generates the sequence of natural numbers, and print received metrics to `tuner_result.txt`.
* The naive assessor kills trials when `sum(metrics) % 11 == 1`, and print killed trials to `assessor_result.txt`.
* When tuner and assessor exit with exception, they will append `ERROR` to corresponding result file.
* When the experiment is suspended, meaning it is successfully done in this case, `Experiment suspended` can be detected in the nni_manager.log file.

## Issues
* Private APIs are used to detect whether tuner and assessor have terminated successfully.
* The output of REST server is not tested.
* Remote machine training service is not tested.
1 change: 0 additions & 1 deletion test/naive/expected_assessor_result.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,3 @@
5 3
7 2
8 3
DONE
1 change: 0 additions & 1 deletion test/naive/expected_tuner_result.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,3 @@
6 60466176
9 3486784401
10 10000000000
DONE
6 changes: 4 additions & 2 deletions test/naive/naive_assessor.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import logging
import os

from nni.assessor import Assessor, AssessResult

_logger = logging.getLogger('NaiveAssessor')
_logger.info('start')
_result = open('/tmp/nni_assessor_result.txt', 'w')

_pwd = os.path.dirname(__file__)
_result = open(os.path.join(_pwd, 'assessor_result.txt'), 'w')

class NaiveAssessor(Assessor):
def __init__(self, optimize_mode):
Expand All @@ -30,7 +33,6 @@ def assess_trial(self, trial_job_id, trial_history):
return AssessResult.Good

def _on_exit(self):
_result.write('DONE\n')
_result.close()

def _on_error(self):
Expand Down
8 changes: 5 additions & 3 deletions test/naive/naive_tuner.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import json
import logging
import os

from nni.tuner import Tuner

_logger = logging.getLogger('NaiveTuner')
_logger.info('start')
_result = open('/tmp/nni_tuner_result.txt', 'w')

_pwd = os.path.dirname(__file__)
_result = open(os.path.join(_pwd, 'tuner_result.txt'), 'w')

class NaiveTuner(Tuner):
def __init__(self, optimize_mode):
Expand All @@ -24,11 +27,10 @@ def receive_trial_result(self, parameter_id, parameters, reward):

def update_search_space(self, search_space):
_logger.info('update_search_space: %s' % search_space)
with open('/tmp/nni_tuner_search_space.json', 'w') as file_:
with open(os.path.join(_pwd, 'tuner_search_space.json'), 'w') as file_:
json.dump(search_space, file_)

def _on_exit(self):
_result.write('DONE\n')
_result.close()

def _on_error(self):
Expand Down
147 changes: 93 additions & 54 deletions test/naive/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,82 +4,121 @@
import json
import os
import subprocess
import requests
import sys
import time
import traceback

GREEN = '\33[32m'
RED = '\33[31m'
CLEAR = '\33[0m'

def read_last_line(file_name):
try:
*_, last_line = open(file_name)
return last_line.strip()
except (FileNotFoundError, ValueError):
return None

def run():
os.environ['PATH'] = os.environ['PATH'] + ':' + os.environ['PWD']

with contextlib.suppress(FileNotFoundError):
os.remove('tuner_search_space.txt')
with contextlib.suppress(FileNotFoundError):
os.remove('tuner_result.txt')
with contextlib.suppress(FileNotFoundError):
os.remove('/tmp/nni_assessor_result.txt')

proc = subprocess.run(['nnictl', 'create', '--config', 'local.yml'])
assert proc.returncode == 0, '`nnictl create` failed with code %d' % proc.returncode

print('Spawning trials...')
current_trial = 0

for _ in range(60):
class Integration_test():
def __init__(self):
self.experiment_url = 'http://localhost:51188/api/v1/nni/experiment'
self.experiment_id = None
self.experiment_suspended_signal = '"Experiment suspended"'

def read_last_line(self, file_name):
try:
*_, last_line = open(file_name)
return last_line.strip()
except (FileNotFoundError, ValueError):
return None

def fetch_experiment_config(self):
experiment_profile = requests.get(self.experiment_url)
self.experiment_id = json.loads(experiment_profile.text)['id']
self.experiment_path = os.path.join(os.environ['HOME'], 'nni/experiments', self.experiment_id)
self.nnimanager_log_path = os.path.join(self.experiment_path, 'log', 'nnimanager.log')

def check_experiment_status(self):
assert os.path.exists(self.nnimanager_log_path), 'Experiment starts failed'
cmds = ['cat', self.nnimanager_log_path, '|', 'grep', self.experiment_suspended_signal]
completed_process = subprocess.run(' '.join(cmds), shell = True)

return completed_process.returncode == 0

def remove_files(self, file_list):
for file_path in file_list:
with contextlib.suppress(FileNotFoundError):
os.remove(file_path)

def run(self, installed = True):
if not installed:
os.environ['PATH'] = os.environ['PATH'] + ':' + os.environ['PWD']
sdk_path = os.path.abspath('../../src/sdk/pynni')
cmd_path = os.path.abspath('../../tools')
pypath = os.environ.get('PYTHONPATH')
if pypath:
pypath = ':'.join([pypath, sdk_path, cmd_path])
else:
pypath = ':'.join([sdk_path, cmd_path])
os.environ['PYTHONPATH'] = pypath

to_remove = ['tuner_search_space.json', 'tuner_result.txt', 'assessor_result.txt']
self.remove_files(to_remove)

proc = subprocess.run(['nnictl', 'create', '--config', 'local.yml'])
assert proc.returncode == 0, '`nnictl create` failed with code %d' % proc.returncode

print('Spawning trials...')
time.sleep(1)
self.fetch_experiment_config()
current_trial = 0

for _ in range(60):
time.sleep(1)

tuner_status = read_last_line('/tmp/nni_tuner_result.txt')
assessor_status = read_last_line('/tmp/nni_assessor_result.txt')
tuner_status = self.read_last_line('tuner_result.txt')
assessor_status = self.read_last_line('assessor_result.txt')
experiment_status = self.check_experiment_status()

assert tuner_status != 'ERROR', 'Tuner exited with error'
assert assessor_status != 'ERROR', 'Assessor exited with error'
assert tuner_status != 'ERROR', 'Tuner exited with error'
assert assessor_status != 'ERROR', 'Assessor exited with error'

if tuner_status == 'DONE' and assessor_status == 'DONE':
break
if experiment_status:
break

if tuner_status is not None:
for line in open('/tmp/nni_tuner_result.txt'):
if line.strip() in ('DONE', 'ERROR'):
break
trial = int(line.split(' ')[0])
if trial > current_trial:
current_trial = trial
print('Trial #%d done' % trial)
if tuner_status is not None:
for line in open('tuner_result.txt'):
if line.strip() == 'ERROR':
break
trial = int(line.split(' ')[0])
if trial > current_trial:
current_trial = trial
print('Trial #%d done' % trial)

assert tuner_status == 'DONE' and assessor_status == 'DONE', 'Failed to finish in 1 min'
assert experiment_status, 'Failed to finish in 1 min'

ss1 = json.load(open('search_space.json'))
ss2 = json.load(open('/tmp/nni_tuner_search_space.json'))
assert ss1 == ss2, 'Tuner got wrong search space'
ss1 = json.load(open('search_space.json'))
ss2 = json.load(open('tuner_search_space.json'))
assert ss1 == ss2, 'Tuner got wrong search space'

tuner_result = set(open('/tmp/nni_tuner_result.txt'))
expected = set(open('expected_tuner_result.txt'))
# Trials may complete before NNI gets assessor's result,
# so it is possible to have more final result than expected
assert tuner_result.issuperset(expected), 'Bad tuner result'
# Waiting for naive_trial to report_final_result
time.sleep(2)
tuner_result = set(open('tuner_result.txt'))
expected = set(open('expected_tuner_result.txt'))
# Trials may complete before NNI gets assessor's result,
# so it is possible to have more final result than expected
assert tuner_result.issuperset(expected), 'Bad tuner result'

assessor_result = set(open('/tmp/nni_assessor_result.txt'))
expected = set(open('expected_assessor_result.txt'))
assert assessor_result == expected, 'Bad assessor result'
assessor_result = set(open('assessor_result.txt'))
expected = set(open('expected_assessor_result.txt'))
assert assessor_result == expected, 'Bad assessor result'

if __name__ == '__main__':
installed = (sys.argv[-1] != '--preinstall')

ic = Integration_test()
try:
run()
ic.run(installed)
# TODO: check the output of rest server
print(GREEN + 'PASS' + CLEAR)
except Exception as error:
print(RED + 'FAIL' + CLEAR)
print('%r' % error)
traceback.print_exc()
raise error

subprocess.run(['nnictl', 'stop', '--port', '51188'])
sys.exit(1)
finally:
subprocess.run(['nnictl', 'stop'])
2 changes: 2 additions & 0 deletions tools/nnicmd/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ def set_pai_config(experiment_config, port):
if not response or not response.status_code == 200:
if response is not None:
err_message = response.text
with open(STDERR_FULL_PATH, 'a+') as fout:
fout.write(json.dumps(json.loads(err_message), indent=4, sort_keys=True, separators=(',', ':')))
return False, err_message

#set trial_config
Expand Down