From 4b2c56d4509511e8cd3521cccc52f873d3c15f56 Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Thu, 31 Oct 2019 19:22:53 +0800 Subject: [PATCH 01/18] updates (#1672) --- azure-pipelines.yml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 336d2375b8..59b05edd3f 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -8,8 +8,15 @@ jobs: PYTHON_VERSION: '3.6' steps: - - script: python3 -m pip install --upgrade pip setuptools --user + - script: | + python3 -m pip install --upgrade pip setuptools --user + python3 -m pip install pylint==2.3.1 --user displayName: 'Install python tools' + - script: | + python3 -m pylint --rcfile pylintrc tools + python3 -m pylint --rcfile src/sdk/pynni/pylintrc src/sdk/pycli/nnicli + python3 -m pylint --rcfile src/sdk/pynni/pylintrc src/sdk/pynni/nni + displayName: 'Run pylint' - script: | python3 -m pip install torch==0.4.1 --user python3 -m pip install torchvision==0.2.1 --user From f9d2c3b59902e2f43555a9c36999fae8033c69fa Mon Sep 17 00:00:00 2001 From: SparkSnail Date: Thu, 31 Oct 2019 19:26:21 +0800 Subject: [PATCH 02/18] Check pylint for nni_cmd (#1683) * check pylint for nni_cmd * fix id error --- tools/nni_cmd/command_utils.py | 2 +- tools/nni_cmd/common_utils.py | 10 +-- tools/nni_cmd/config_schema.py | 104 ++++++++++++++-------------- tools/nni_cmd/config_utils.py | 31 ++++----- tools/nni_cmd/constants.py | 5 +- tools/nni_cmd/launcher.py | 46 ++++++------ tools/nni_cmd/launcher_utils.py | 20 +++--- tools/nni_cmd/nnictl.py | 16 +++-- tools/nni_cmd/nnictl_utils.py | 66 +++++++++--------- tools/nni_cmd/package_management.py | 6 +- tools/nni_cmd/ssh_utils.py | 6 +- tools/nni_cmd/tensorboard_utils.py | 25 +++---- tools/nni_cmd/updater.py | 2 +- tools/nni_cmd/url_utils.py | 6 +- 14 files changed, 172 insertions(+), 173 deletions(-) diff --git a/tools/nni_cmd/command_utils.py b/tools/nni_cmd/command_utils.py index a3bcb81965..cf13f63eae 100644 --- a/tools/nni_cmd/command_utils.py +++ b/tools/nni_cmd/command_utils.py @@ -3,7 +3,7 @@ import os import signal import psutil -from .common_utils import print_error, print_normal, print_warning +from .common_utils import print_error def check_output_command(file_path, head=None, tail=None): diff --git a/tools/nni_cmd/common_utils.py b/tools/nni_cmd/common_utils.py index 3a5e909ca2..af0fe3efa6 100644 --- a/tools/nni_cmd/common_utils.py +++ b/tools/nni_cmd/common_utils.py @@ -21,10 +21,10 @@ import os import sys import json -import ruamel.yaml as yaml -import psutil import socket from pathlib import Path +import ruamel.yaml as yaml +import psutil from .constants import ERROR_INFO, NORMAL_INFO, WARNING_INFO, COLOR_RED_FORMAT, COLOR_YELLOW_FORMAT def get_yml_content(file_path): @@ -34,6 +34,7 @@ def get_yml_content(file_path): return yaml.load(file, Loader=yaml.Loader) except yaml.scanner.ScannerError as err: print_error('yaml file format error!') + print_error(err) exit(1) except Exception as exception: print_error(exception) @@ -46,6 +47,7 @@ def get_json_content(file_path): return json.load(file) except TypeError as err: print_error('json file format error!') + print_error(err) return None def print_error(content): @@ -70,7 +72,7 @@ def detect_process(pid): def detect_port(port): '''Detect if the port is used''' - socket_test = socket.socket(socket.AF_INET,socket.SOCK_STREAM) + socket_test = socket.socket(socket.AF_INET, socket.SOCK_STREAM) try: socket_test.connect(('127.0.0.1', int(port))) socket_test.close() @@ -79,7 +81,7 @@ def detect_port(port): return False def get_user(): - if sys.platform =='win32': + if sys.platform == 'win32': return os.environ['USERNAME'] else: return os.environ['USER'] diff --git a/tools/nni_cmd/config_schema.py b/tools/nni_cmd/config_schema.py index da943564fb..dded8d1e95 100644 --- a/tools/nni_cmd/config_schema.py +++ b/tools/nni_cmd/config_schema.py @@ -19,13 +19,13 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. import os -from schema import Schema, And, Use, Optional, Regex, Or +from schema import Schema, And, Optional, Regex, Or from .constants import SCHEMA_TYPE_ERROR, SCHEMA_RANGE_ERROR, SCHEMA_PATH_ERROR -def setType(key, type): +def setType(key, valueType): '''check key type''' - return And(type, error=SCHEMA_TYPE_ERROR % (key, type.__name__)) + return And(valueType, error=SCHEMA_TYPE_ERROR % (key, valueType.__name__)) def setChoice(key, *args): '''check choice''' @@ -47,7 +47,7 @@ def setPathCheck(key): 'experimentName': setType('experimentName', str), Optional('description'): setType('description', str), 'trialConcurrency': setNumberRange('trialConcurrency', int, 1, 99999), - Optional('maxExecDuration'): And(Regex(r'^[1-9][0-9]*[s|m|h|d]$',error='ERROR: maxExecDuration format is [digit]{s,m,h,d}')), + Optional('maxExecDuration'): And(Regex(r'^[1-9][0-9]*[s|m|h|d]$', error='ERROR: maxExecDuration format is [digit]{s,m,h,d}')), Optional('maxTrialNum'): setNumberRange('maxTrialNum', int, 1, 99999), 'trainingServicePlatform': setChoice('trainingServicePlatform', 'remote', 'local', 'pai', 'kubeflow', 'frameworkcontroller'), Optional('searchSpacePath'): And(os.path.exists, error=SCHEMA_PATH_ERROR % 'searchSpacePath'), @@ -106,7 +106,7 @@ def setPathCheck(key): 'builtinTunerName': 'NetworkMorphism', Optional('classArgs'): { Optional('optimize_mode'): setChoice('optimize_mode', 'maximize', 'minimize'), - Optional('task'): setChoice('task', 'cv','nlp','common'), + Optional('task'): setChoice('task', 'cv', 'nlp', 'common'), Optional('input_width'): setType('input_width', int), Optional('input_channel'): setType('input_channel', int), Optional('n_output_node'): setType('n_output_node', int), @@ -139,7 +139,7 @@ def setPathCheck(key): Optional('selection_num_warm_up'): setType('selection_num_warm_up', int), Optional('selection_num_starting_points'): setType('selection_num_starting_points', int), }, - Optional('includeIntermediateResults'): setType('includeIntermediateResults', bool), + Optional('includeIntermediateResults'): setType('includeIntermediateResults', bool), Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'), }, 'PPOTuner': { @@ -232,35 +232,35 @@ def setPathCheck(key): } common_trial_schema = { -'trial':{ - 'command': setType('command', str), - 'codeDir': setPathCheck('codeDir'), - Optional('gpuNum'): setNumberRange('gpuNum', int, 0, 99999), - Optional('nasMode'): setChoice('nasMode', 'classic_mode', 'enas_mode', 'oneshot_mode', 'darts_mode') + 'trial':{ + 'command': setType('command', str), + 'codeDir': setPathCheck('codeDir'), + Optional('gpuNum'): setNumberRange('gpuNum', int, 0, 99999), + Optional('nasMode'): setChoice('nasMode', 'classic_mode', 'enas_mode', 'oneshot_mode', 'darts_mode') } } pai_trial_schema = { -'trial':{ - 'command': setType('command', str), - 'codeDir': setPathCheck('codeDir'), - 'gpuNum': setNumberRange('gpuNum', int, 0, 99999), - 'cpuNum': setNumberRange('cpuNum', int, 0, 99999), - 'memoryMB': setType('memoryMB', int), - 'image': setType('image', str), - Optional('authFile'): And(os.path.exists, error=SCHEMA_PATH_ERROR % 'authFile'), - Optional('shmMB'): setType('shmMB', int), - Optional('dataDir'): And(Regex(r'hdfs://(([0-9]{1,3}.){3}[0-9]{1,3})(:[0-9]{2,5})?(/.*)?'),\ - error='ERROR: dataDir format error, dataDir format is hdfs://xxx.xxx.xxx.xxx:xxx'), - Optional('outputDir'): And(Regex(r'hdfs://(([0-9]{1,3}.){3}[0-9]{1,3})(:[0-9]{2,5})?(/.*)?'),\ - error='ERROR: outputDir format error, outputDir format is hdfs://xxx.xxx.xxx.xxx:xxx'), - Optional('virtualCluster'): setType('virtualCluster', str), - Optional('nasMode'): setChoice('nasMode', 'classic_mode', 'enas_mode', 'oneshot_mode', 'darts_mode'), - Optional('portList'): [{ - "label": setType('label', str), - "beginAt": setType('beginAt', int), - "portNumber": setType('portNumber', int) - }] + 'trial':{ + 'command': setType('command', str), + 'codeDir': setPathCheck('codeDir'), + 'gpuNum': setNumberRange('gpuNum', int, 0, 99999), + 'cpuNum': setNumberRange('cpuNum', int, 0, 99999), + 'memoryMB': setType('memoryMB', int), + 'image': setType('image', str), + Optional('authFile'): And(os.path.exists, error=SCHEMA_PATH_ERROR % 'authFile'), + Optional('shmMB'): setType('shmMB', int), + Optional('dataDir'): And(Regex(r'hdfs://(([0-9]{1,3}.){3}[0-9]{1,3})(:[0-9]{2,5})?(/.*)?'),\ + error='ERROR: dataDir format error, dataDir format is hdfs://xxx.xxx.xxx.xxx:xxx'), + Optional('outputDir'): And(Regex(r'hdfs://(([0-9]{1,3}.){3}[0-9]{1,3})(:[0-9]{2,5})?(/.*)?'),\ + error='ERROR: outputDir format error, outputDir format is hdfs://xxx.xxx.xxx.xxx:xxx'), + Optional('virtualCluster'): setType('virtualCluster', str), + Optional('nasMode'): setChoice('nasMode', 'classic_mode', 'enas_mode', 'oneshot_mode', 'darts_mode'), + Optional('portList'): [{ + "label": setType('label', str), + "beginAt": setType('beginAt', int), + "portNumber": setType('portNumber', int) + }] } } @@ -273,7 +273,7 @@ def setPathCheck(key): } kubeflow_trial_schema = { -'trial':{ + 'trial':{ 'codeDir': setPathCheck('codeDir'), Optional('nasMode'): setChoice('nasMode', 'classic_mode', 'enas_mode', 'oneshot_mode', 'darts_mode'), Optional('ps'): { @@ -315,7 +315,7 @@ def setPathCheck(key): 'server': setType('server', str), 'path': setType('path', str) } - },{ + }, { 'operator': setChoice('operator', 'tf-operator', 'pytorch-operator'), 'apiVersion': setType('apiVersion', str), Optional('storage'): setChoice('storage', 'nfs', 'azureStorage'), @@ -363,7 +363,7 @@ def setPathCheck(key): 'server': setType('server', str), 'path': setType('path', str) } - },{ + }, { Optional('storage'): setChoice('storage', 'nfs', 'azureStorage'), Optional('serviceAccountName'): setType('serviceAccountName', str), 'keyVault': { @@ -383,24 +383,24 @@ def setPathCheck(key): } machine_list_schema = { -Optional('machineList'):[Or({ - 'ip': setType('ip', str), - Optional('port'): setNumberRange('port', int, 1, 65535), - 'username': setType('username', str), - 'passwd': setType('passwd', str), - Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'), - Optional('maxTrialNumPerGpu'): setType('maxTrialNumPerGpu', int), - Optional('useActiveGpu'): setType('useActiveGpu', bool) - },{ - 'ip': setType('ip', str), - Optional('port'): setNumberRange('port', int, 1, 65535), - 'username': setType('username', str), - 'sshKeyPath': setPathCheck('sshKeyPath'), - Optional('passphrase'): setType('passphrase', str), - Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'), - Optional('maxTrialNumPerGpu'): setType('maxTrialNumPerGpu', int), - Optional('useActiveGpu'): setType('useActiveGpu', bool) -})] + Optional('machineList'):[Or({ + 'ip': setType('ip', str), + Optional('port'): setNumberRange('port', int, 1, 65535), + 'username': setType('username', str), + 'passwd': setType('passwd', str), + Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'), + Optional('maxTrialNumPerGpu'): setType('maxTrialNumPerGpu', int), + Optional('useActiveGpu'): setType('useActiveGpu', bool) + }, { + 'ip': setType('ip', str), + Optional('port'): setNumberRange('port', int, 1, 65535), + 'username': setType('username', str), + 'sshKeyPath': setPathCheck('sshKeyPath'), + Optional('passphrase'): setType('passphrase', str), + Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'), + Optional('maxTrialNumPerGpu'): setType('maxTrialNumPerGpu', int), + Optional('useActiveGpu'): setType('useActiveGpu', bool) + })] } LOCAL_CONFIG_SCHEMA = Schema({**common_schema, **common_trial_schema}) diff --git a/tools/nni_cmd/config_utils.py b/tools/nni_cmd/config_utils.py index 6b2b8a0cc0..c7c88bcf3e 100644 --- a/tools/nni_cmd/config_utils.py +++ b/tools/nni_cmd/config_utils.py @@ -21,7 +21,6 @@ import os import json -import shutil from .constants import NNICTL_HOME_DIR class Config: @@ -73,29 +72,29 @@ def __init__(self): self.experiment_file = os.path.join(NNICTL_HOME_DIR, '.experiment') self.experiments = self.read_file() - def add_experiment(self, id, port, time, file_name, platform): + def add_experiment(self, expId, port, time, file_name, platform): '''set {key:value} paris to self.experiment''' - self.experiments[id] = {} - self.experiments[id]['port'] = port - self.experiments[id]['startTime'] = time - self.experiments[id]['endTime'] = 'N/A' - self.experiments[id]['status'] = 'INITIALIZED' - self.experiments[id]['fileName'] = file_name - self.experiments[id]['platform'] = platform + self.experiments[expId] = {} + self.experiments[expId]['port'] = port + self.experiments[expId]['startTime'] = time + self.experiments[expId]['endTime'] = 'N/A' + self.experiments[expId]['status'] = 'INITIALIZED' + self.experiments[expId]['fileName'] = file_name + self.experiments[expId]['platform'] = platform self.write_file() - def update_experiment(self, id, key, value): + def update_experiment(self, expId, key, value): '''Update experiment''' - if id not in self.experiments: + if expId not in self.experiments: return False - self.experiments[id][key] = value + self.experiments[expId][key] = value self.write_file() return True - def remove_experiment(self, id): + def remove_experiment(self, expId): '''remove an experiment by id''' if id in self.experiments: - self.experiments.pop(id) + self.experiments.pop(expId) self.write_file() def get_all_experiments(self): @@ -109,7 +108,7 @@ def write_file(self): json.dump(self.experiments, file) except IOError as error: print('Error:', error) - return + return '' def read_file(self): '''load config from local file''' @@ -119,4 +118,4 @@ def read_file(self): return json.load(file) except ValueError: return {} - return {} + return {} diff --git a/tools/nni_cmd/constants.py b/tools/nni_cmd/constants.py index d22a509c46..0777d2db98 100644 --- a/tools/nni_cmd/constants.py +++ b/tools/nni_cmd/constants.py @@ -21,7 +21,7 @@ import os from colorama import Fore -NNICTL_HOME_DIR = os.path.join(os.path.expanduser('~'), '.local', 'nnictl') +NNICTL_HOME_DIR = os.path.join(os.path.expanduser('~'), '.local', 'nnictl') ERROR_INFO = 'ERROR: %s' @@ -58,7 +58,8 @@ '-----------------------------------------------------------------------\n' EXPERIMENT_START_FAILED_INFO = 'There is an experiment running in the port %d, please stop it first or set another port!\n' \ - 'You could use \'nnictl stop --port [PORT]\' command to stop an experiment!\nOr you could use \'nnictl create --config [CONFIG_PATH] --port [PORT]\' to set port!\n' + 'You could use \'nnictl stop --port [PORT]\' command to stop an experiment!\nOr you could ' \ + 'use \'nnictl create --config [CONFIG_PATH] --port [PORT]\' to set port!\n' EXPERIMENT_INFORMATION_FORMAT = '----------------------------------------------------------------------------------------\n' \ ' Experiment information\n' \ diff --git a/tools/nni_cmd/launcher.py b/tools/nni_cmd/launcher.py index e2fac2cb42..f99f8dfe43 100644 --- a/tools/nni_cmd/launcher.py +++ b/tools/nni_cmd/launcher.py @@ -22,22 +22,21 @@ import json import os import sys -import shutil import string -from subprocess import Popen, PIPE, call, check_output, check_call, CalledProcessError +import random +import site +import time import tempfile +from subprocess import Popen, check_call, CalledProcessError +from nni_annotation import expand_annotations, generate_search_space from nni.constants import ModuleName, AdvisorModuleName -from nni_annotation import * from .launcher_utils import validate_all_content -from .rest_utils import rest_put, rest_post, check_rest_server, check_rest_server_quick, check_response +from .rest_utils import rest_put, rest_post, check_rest_server, check_response from .url_utils import cluster_metadata_url, experiment_url, get_local_urls from .config_utils import Config, Experiments -from .common_utils import get_yml_content, get_json_content, print_error, print_normal, print_warning, detect_process, detect_port, get_user, get_python_dir -from .constants import * -import random -import site -import time -from pathlib import Path +from .common_utils import get_yml_content, get_json_content, print_error, print_normal, \ + detect_port, get_user, get_python_dir +from .constants import NNICTL_HOME_DIR, ERROR_INFO, REST_TIME_OUT, EXPERIMENT_SUCCESS_INFO, LOG_HEADER, PACKAGE_REQUIREMENTS from .command_utils import check_output_command, kill_command from .nnictl_utils import update_experiment @@ -83,7 +82,8 @@ def _generate_installation_path(sitepackages_path): python_dir = os.getenv('VIRTUAL_ENV') else: python_sitepackage = site.getsitepackages()[0] - # If system-wide python is used, we will give priority to using `local sitepackage`--"usersitepackages()" given that nni exists there + # If system-wide python is used, we will give priority to using `local sitepackage`--"usersitepackages()" given + # that nni exists there if python_sitepackage.startswith('/usr') or python_sitepackage.startswith('/Library'): python_dir = try_installation_path_sequentially(site.getusersitepackages(), site.getsitepackages()[0]) else: @@ -98,7 +98,6 @@ def _generate_installation_path(sitepackages_path): def start_rest_server(port, platform, mode, config_file_name, experiment_id=None, log_dir=None, log_level=None): '''Run nni manager process''' - nni_config = Config(config_file_name) if detect_port(port): print_error('Port %s is used by another process, please reset the port!\n' \ 'You could use \'nnictl create --help\' to get help information' % port) @@ -114,7 +113,7 @@ def start_rest_server(port, platform, mode, config_file_name, experiment_id=None entry_dir = get_nni_installation_path() entry_file = os.path.join(entry_dir, 'main.js') - + node_command = 'node' if sys.platform == 'win32': node_command = os.path.join(entry_dir[:-3], 'Scripts', 'node.exe') @@ -132,7 +131,7 @@ def start_rest_server(port, platform, mode, config_file_name, experiment_id=None cmds += ['--experiment_id', experiment_id] stdout_full_path, stderr_full_path = get_log_path(config_file_name) with open(stdout_full_path, 'a+') as stdout_file, open(stderr_full_path, 'a+') as stderr_file: - time_now = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())) + time_now = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) #add time information in the header of log files log_header = LOG_HEADER % str(time_now) stdout_file.write(log_header) @@ -212,7 +211,7 @@ def setNNIManagerIp(experiment_config, port, config_file_name): if experiment_config.get('nniManagerIp') is None: return True, None ip_config_dict = dict() - ip_config_dict['nni_manager_ip'] = { 'nniManagerIp' : experiment_config['nniManagerIp'] } + ip_config_dict['nni_manager_ip'] = {'nniManagerIp': experiment_config['nniManagerIp']} response = rest_put(cluster_metadata_url(port), json.dumps(ip_config_dict), REST_TIME_OUT) err_message = None if not response or not response.status_code == 200: @@ -403,11 +402,12 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen stdout_full_path, stderr_full_path = get_log_path(config_file_name) with open(stdout_full_path, 'a+') as stdout_file, open(stderr_full_path, 'a+') as stderr_file: check_call([sys.executable, '-c', 'import %s'%(module_name)], stdout=stdout_file, stderr=stderr_file) - except CalledProcessError as e: + except CalledProcessError: print_error('some errors happen when import package %s.' %(package_name)) print_log_content(config_file_name) if package_name in PACKAGE_REQUIREMENTS: - print_error('If %s is not installed, it should be installed through \'nnictl package install --name %s\''%(package_name, package_name)) + print_error('If %s is not installed, it should be installed through '\ + '\'nnictl package install --name %s\''%(package_name, package_name)) exit(1) log_dir = experiment_config['logDir'] if experiment_config.get('logDir') else None log_level = experiment_config['logLevel'] if experiment_config.get('logLevel') else None @@ -416,7 +416,8 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen if log_level not in ['trace', 'debug'] and (args.debug or experiment_config.get('debug') is True): log_level = 'debug' # start rest server - rest_process, start_time = start_rest_server(args.port, experiment_config['trainingServicePlatform'], mode, config_file_name, experiment_id, log_dir, log_level) + rest_process, start_time = start_rest_server(args.port, experiment_config['trainingServicePlatform'], \ + mode, config_file_name, experiment_id, log_dir, log_level) nni_config.set_config('restServerPid', rest_process.pid) # Deal with annotation if experiment_config.get('useAnnotation'): @@ -450,8 +451,9 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen exit(1) if mode != 'view': # set platform configuration - set_platform_config(experiment_config['trainingServicePlatform'], experiment_config, args.port, config_file_name, rest_process) - + set_platform_config(experiment_config['trainingServicePlatform'], experiment_config, args.port,\ + config_file_name, rest_process) + # start a new experiment print_normal('Starting experiment...') # set debug configuration @@ -478,7 +480,8 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen #save experiment information nnictl_experiment_config = Experiments() - nnictl_experiment_config.add_experiment(experiment_id, args.port, start_time, config_file_name, experiment_config['trainingServicePlatform']) + nnictl_experiment_config.add_experiment(experiment_id, args.port, start_time, config_file_name,\ + experiment_config['trainingServicePlatform']) print_normal(EXPERIMENT_SUCCESS_INFO % (experiment_id, ' '.join(web_ui_url_list))) @@ -503,7 +506,6 @@ def manage_stopped_experiment(args, mode): experiment_config = Experiments() experiment_dict = experiment_config.get_all_experiments() experiment_id = None - experiment_endTime = None #find the latest stopped experiment if not args.id: print_error('Please set experiment id! \nYou could use \'nnictl {0} {id}\' to {0} a stopped experiment!\n' \ diff --git a/tools/nni_cmd/launcher_utils.py b/tools/nni_cmd/launcher_utils.py index da6a668064..f6c849abab 100644 --- a/tools/nni_cmd/launcher_utils.py +++ b/tools/nni_cmd/launcher_utils.py @@ -20,11 +20,11 @@ import os import json -from .config_schema import LOCAL_CONFIG_SCHEMA, REMOTE_CONFIG_SCHEMA, PAI_CONFIG_SCHEMA, KUBEFLOW_CONFIG_SCHEMA, FRAMEWORKCONTROLLER_CONFIG_SCHEMA, \ -tuner_schema_dict, advisor_schema_dict, assessor_schema_dict -from schema import SchemaMissingKeyError, SchemaForbiddenKeyError, SchemaUnexpectedTypeError, SchemaWrongKeyError, SchemaError -from .common_utils import get_json_content, print_error, print_warning, print_normal -from schema import Schema, And, Use, Optional, Regex, Or +from schema import SchemaError +from schema import Schema +from .config_schema import LOCAL_CONFIG_SCHEMA, REMOTE_CONFIG_SCHEMA, PAI_CONFIG_SCHEMA, KUBEFLOW_CONFIG_SCHEMA,\ + FRAMEWORKCONTROLLER_CONFIG_SCHEMA, tuner_schema_dict, advisor_schema_dict, assessor_schema_dict +from .common_utils import print_error, print_warning, print_normal def expand_path(experiment_config, key): '''Change '~' to user home directory''' @@ -164,11 +164,11 @@ def validate_common_content(experiment_config): print_error('Please set correct trainingServicePlatform!') exit(1) schema_dict = { - 'local': LOCAL_CONFIG_SCHEMA, - 'remote': REMOTE_CONFIG_SCHEMA, - 'pai': PAI_CONFIG_SCHEMA, - 'kubeflow': KUBEFLOW_CONFIG_SCHEMA, - 'frameworkcontroller': FRAMEWORKCONTROLLER_CONFIG_SCHEMA + 'local': LOCAL_CONFIG_SCHEMA, + 'remote': REMOTE_CONFIG_SCHEMA, + 'pai': PAI_CONFIG_SCHEMA, + 'kubeflow': KUBEFLOW_CONFIG_SCHEMA, + 'frameworkcontroller': FRAMEWORKCONTROLLER_CONFIG_SCHEMA } separate_schema_dict = { 'tuner': tuner_schema_dict, diff --git a/tools/nni_cmd/nnictl.py b/tools/nni_cmd/nnictl.py index 8da30fdfb7..88ee311423 100644 --- a/tools/nni_cmd/nnictl.py +++ b/tools/nni_cmd/nnictl.py @@ -20,14 +20,18 @@ import argparse +import os import pkg_resources +from colorama import init +from .common_utils import print_error from .launcher import create_experiment, resume_experiment, view_experiment from .updater import update_searchspace, update_concurrency, update_duration, update_trialnum, import_data -from .nnictl_utils import * -from .package_management import * -from .constants import * -from .tensorboard_utils import * -from colorama import init +from .nnictl_utils import stop_experiment, trial_ls, trial_kill, list_experiment, experiment_status,\ + log_trial, experiment_clean, platform_clean, experiment_list, \ + monitor_experiment, export_trials_data, trial_codegen, webui_url, get_config, log_stdout, log_stderr +from .package_management import package_install, package_show +from .constants import DEFAULT_REST_PORT +from .tensorboard_utils import start_tensorboard, stop_tensorboard init(autoreset=True) if os.environ.get('COVERAGE_PROCESS_START'): @@ -38,7 +42,7 @@ def nni_info(*args): if args[0].version: try: print(pkg_resources.get_distribution('nni').version) - except pkg_resources.ResolutionError as err: + except pkg_resources.ResolutionError: print_error('Get version failed, please use `pip3 list | grep nni` to check nni version!') else: print('please run "nnictl {positional argument} --help" to see nnictl guidance') diff --git a/tools/nni_cmd/nnictl_utils.py b/tools/nni_cmd/nnictl_utils.py index b6fada56e8..4cadce182d 100644 --- a/tools/nni_cmd/nnictl_utils.py +++ b/tools/nni_cmd/nnictl_utils.py @@ -20,15 +20,13 @@ import csv import os -import psutil import json -from datetime import datetime, timezone import time import re -from pathlib import Path -from pyhdfs import HdfsClient, HdfsFileNotFoundException import shutil -from subprocess import call, check_output +from datetime import datetime, timezone +from pathlib import Path +from pyhdfs import HdfsClient from nni_annotation import expand_annotations from .rest_utils import rest_get, rest_delete, check_rest_server_quick, check_response from .url_utils import trial_jobs_url, experiment_url, trial_job_id_url, export_data_url @@ -102,7 +100,8 @@ def check_experiment_id(args, update=True): experiment_information = "" for key in running_experiment_list: experiment_information += (EXPERIMENT_DETAIL_FORMAT % (key, experiment_dict[key]['status'], \ - experiment_dict[key]['port'], experiment_dict[key].get('platform'), experiment_dict[key]['startTime'], experiment_dict[key]['endTime'])) + experiment_dict[key]['port'], experiment_dict[key].get('platform'), experiment_dict[key]['startTime'],\ + experiment_dict[key]['endTime'])) print(EXPERIMENT_INFORMATION_FORMAT % experiment_information) exit(1) elif not running_experiment_list: @@ -157,23 +156,24 @@ def parse_ids(args): experiment_information = "" for key in running_experiment_list: experiment_information += (EXPERIMENT_DETAIL_FORMAT % (key, experiment_dict[key]['status'], \ - experiment_dict[key]['port'], experiment_dict[key].get('platform'), experiment_dict[key]['startTime'], experiment_dict[key]['endTime'])) + experiment_dict[key]['port'], experiment_dict[key].get('platform'), experiment_dict[key]['startTime'], \ + experiment_dict[key]['endTime'])) print(EXPERIMENT_INFORMATION_FORMAT % experiment_information) exit(1) else: result_list = running_experiment_list elif args.id.endswith('*'): - for id in running_experiment_list: - if id.startswith(args.id[:-1]): - result_list.append(id) + for expId in running_experiment_list: + if expId.startswith(args.id[:-1]): + result_list.append(expId) elif args.id in running_experiment_list: result_list.append(args.id) else: - for id in running_experiment_list: - if id.startswith(args.id): - result_list.append(id) + for expId in running_experiment_list: + if expId.startswith(args.id): + result_list.append(expId) if len(result_list) > 1: - print_error(args.id + ' is ambiguous, please choose ' + ' '.join(result_list) ) + print_error(args.id + ' is ambiguous, please choose ' + ' '.join(result_list)) return None if not result_list and (args.id or args.port): print_error('There are no experiments matched, please set correct experiment id or restful server port') @@ -235,7 +235,6 @@ def stop_experiment(args): for experiment_id in experiment_id_list: print_normal('Stoping experiment %s' % experiment_id) nni_config = Config(experiment_dict[experiment_id]['fileName']) - rest_port = nni_config.get_config('restServerPort') rest_pid = nni_config.get_config('restServerPid') if rest_pid: kill_command(rest_pid) @@ -249,7 +248,7 @@ def stop_experiment(args): nni_config.set_config('tensorboardPidList', []) print_normal('Stop experiment success.') experiment_config.update_experiment(experiment_id, 'status', 'STOPPED') - time_now = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())) + time_now = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) experiment_config.update_experiment(experiment_id, 'endTime', str(time_now)) def trial_ls(args): @@ -401,9 +400,9 @@ def local_clean(directory): print_normal('removing folder {0}'.format(directory)) try: shutil.rmtree(directory) - except FileNotFoundError as err: + except FileNotFoundError: print_error('{0} does not exist.'.format(directory)) - + def remote_clean(machine_list, experiment_id=None): '''clean up remote data''' for machine in machine_list: @@ -418,7 +417,7 @@ def remote_clean(machine_list, experiment_id=None): sftp = create_ssh_sftp_client(host, port, userName, passwd) print_normal('removing folder {0}'.format(host + ':' + str(port) + remote_dir)) remove_remote_directory(sftp, remote_dir) - + def hdfs_clean(host, user_name, output_dir, experiment_id=None): '''clean up hdfs data''' hdfs_client = HdfsClient(hosts='{0}:80'.format(host), user_name=user_name, webhdfs_path='/webhdfs/api/v1', timeout=5) @@ -475,7 +474,7 @@ def experiment_clean(args): machine_list = nni_config.get_config('experimentConfig').get('machineList') remote_clean(machine_list, experiment_id) elif platform == 'pai': - host = nni_config.get_config('experimentConfig').get('paiConfig').get('host') + host = nni_config.get_config('experimentConfig').get('paiConfig').get('host') user_name = nni_config.get_config('experimentConfig').get('paiConfig').get('userName') output_dir = nni_config.get_config('experimentConfig').get('trial').get('outputDir') hdfs_clean(host, user_name, output_dir, experiment_id) @@ -492,7 +491,7 @@ def experiment_clean(args): experiment_config = Experiments() print_normal('removing metadata of experiment {0}'.format(experiment_id)) experiment_config.remove_experiment(experiment_id) - print_normal('Done.') + print_normal('Done.') def get_platform_dir(config_content): '''get the dir list to be deleted''' @@ -505,8 +504,7 @@ def get_platform_dir(config_content): port = machine.get('port') dir_list.append(host + ':' + str(port) + '/tmp/nni') elif platform == 'pai': - pai_config = config_content.get('paiConfig') - host = config_content.get('paiConfig').get('host') + host = config_content.get('paiConfig').get('host') user_name = config_content.get('paiConfig').get('userName') output_dir = config_content.get('trial').get('outputDir') dir_list.append('server: {0}, path: {1}/nni'.format(host, user_name)) @@ -529,17 +527,15 @@ def platform_clean(args): print_normal('platform {0} not supported.'.format(platform)) exit(0) update_experiment() - experiment_config = Experiments() - experiment_dict = experiment_config.get_all_experiments() - id_list = list(experiment_dict.keys()) dir_list = get_platform_dir(config_content) if not dir_list: print_normal('No folder of NNI caches is found.') exit(1) while True: - print_normal('This command will remove below folders of NNI caches. If other users are using experiments on below hosts, it will be broken.') - for dir in dir_list: - print(' ' + dir) + print_normal('This command will remove below folders of NNI caches. If other users are using experiments' \ + ' on below hosts, it will be broken.') + for value in dir_list: + print(' ' + value) inputs = input('INFO: do you want to continue?[y/N]:') if not inputs.lower() or inputs.lower() in ['n', 'no']: exit(0) @@ -549,11 +545,9 @@ def platform_clean(args): break if platform == 'remote': machine_list = config_content.get('machineList') - for machine in machine_list: - remote_clean(machine_list, None) + remote_clean(machine_list, None) elif platform == 'pai': - pai_config = config_content.get('paiConfig') - host = config_content.get('paiConfig').get('host') + host = config_content.get('paiConfig').get('host') user_name = config_content.get('paiConfig').get('userName') output_dir = config_content.get('trial').get('outputDir') hdfs_clean(host, user_name, output_dir, None) @@ -618,7 +612,8 @@ def show_experiment_info(): return for key in experiment_id_list: print(EXPERIMENT_MONITOR_INFO % (key, experiment_dict[key]['status'], experiment_dict[key]['port'], \ - experiment_dict[key].get('platform'), experiment_dict[key]['startTime'], get_time_interval(experiment_dict[key]['startTime'], experiment_dict[key]['endTime']))) + experiment_dict[key].get('platform'), experiment_dict[key]['startTime'], \ + get_time_interval(experiment_dict[key]['startTime'], experiment_dict[key]['endTime']))) print(TRIAL_MONITOR_HEAD) running, response = check_rest_server_quick(experiment_dict[key]['port']) if running: @@ -627,7 +622,8 @@ def show_experiment_info(): content = json.loads(response.text) for index, value in enumerate(content): content[index] = convert_time_stamp_to_date(value) - print(TRIAL_MONITOR_CONTENT % (content[index].get('id'), content[index].get('startTime'), content[index].get('endTime'), content[index].get('status'))) + print(TRIAL_MONITOR_CONTENT % (content[index].get('id'), content[index].get('startTime'), \ + content[index].get('endTime'), content[index].get('status'))) print(TRIAL_MONITOR_TAIL) def monitor_experiment(args): diff --git a/tools/nni_cmd/package_management.py b/tools/nni_cmd/package_management.py index de8dbe62ec..32ed79496d 100644 --- a/tools/nni_cmd/package_management.py +++ b/tools/nni_cmd/package_management.py @@ -18,12 +18,10 @@ # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -import nni import os -import sys -from subprocess import call +import nni from .constants import PACKAGE_REQUIREMENTS -from .common_utils import print_normal, print_error +from .common_utils import print_error from .command_utils import install_requirements_command def process_install(package_name): diff --git a/tools/nni_cmd/ssh_utils.py b/tools/nni_cmd/ssh_utils.py index da707dac48..7453830323 100644 --- a/tools/nni_cmd/ssh_utils.py +++ b/tools/nni_cmd/ssh_utils.py @@ -20,7 +20,6 @@ import os from .common_utils import print_error -from subprocess import call from .command_utils import install_package_command def check_environment(): @@ -29,6 +28,8 @@ def check_environment(): import paramiko except: install_package_command('paramiko') + import paramiko + return paramiko def copy_remote_directory_to_local(sftp, remote_path, local_path): '''copy remote directory to local machine''' @@ -49,8 +50,7 @@ def copy_remote_directory_to_local(sftp, remote_path, local_path): def create_ssh_sftp_client(host_ip, port, username, password): '''create ssh client''' try: - check_environment() - import paramiko + paramiko = check_environment() conn = paramiko.Transport(host_ip, port) conn.connect(username=username, password=password) sftp = paramiko.SFTPClient.from_transport(conn) diff --git a/tools/nni_cmd/tensorboard_utils.py b/tools/nni_cmd/tensorboard_utils.py index b4578c34b0..9646b4de0e 100644 --- a/tools/nni_cmd/tensorboard_utils.py +++ b/tools/nni_cmd/tensorboard_utils.py @@ -19,21 +19,17 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. import os -import psutil import json -import datetime -import time -from subprocess import call, check_output, Popen, PIPE -from .rest_utils import rest_get, rest_delete, check_rest_server_quick, check_response -from .config_utils import Config, Experiments -from .url_utils import trial_jobs_url, experiment_url, trial_job_id_url, get_local_urls -from .constants import NNICTL_HOME_DIR, EXPERIMENT_INFORMATION_FORMAT, EXPERIMENT_DETAIL_FORMAT, COLOR_GREEN_FORMAT -import time -from .common_utils import print_normal, print_error, print_warning, detect_process, detect_port -from .nnictl_utils import * import re -from .ssh_utils import create_ssh_sftp_client, copy_remote_directory_to_local import tempfile +from subprocess import call, Popen +from .rest_utils import rest_get, check_rest_server_quick, check_response +from .config_utils import Config, Experiments +from .url_utils import trial_jobs_url, get_local_urls +from .constants import COLOR_GREEN_FORMAT, REST_TIME_OUT +from .common_utils import print_normal, print_error, detect_process, detect_port +from .nnictl_utils import check_experiment_id, check_experiment_id +from .ssh_utils import create_ssh_sftp_client, copy_remote_directory_to_local def parse_log_path(args, trial_content): '''parse log path''' @@ -43,7 +39,7 @@ def parse_log_path(args, trial_content): if args.trial_id and args.trial_id != 'all' and trial.get('id') != args.trial_id: continue pattern = r'(?P.+)://(?P.+):(?P.*)' - match = re.search(pattern,trial['logPath']) + match = re.search(pattern, trial['logPath']) if match: path_list.append(match.group('path')) host_list.append(match.group('host')) @@ -94,7 +90,8 @@ def start_tensorboard_process(args, nni_config, path_list, temp_nni_path): if detect_port(args.port): print_error('Port %s is used by another process, please reset port!' % str(args.port)) exit(1) - with open(os.path.join(temp_nni_path, 'tensorboard_stdout'), 'a+') as stdout_file, open(os.path.join(temp_nni_path, 'tensorboard_stderr'), 'a+') as stderr_file: + with open(os.path.join(temp_nni_path, 'tensorboard_stdout'), 'a+') as stdout_file, \ + open(os.path.join(temp_nni_path, 'tensorboard_stderr'), 'a+') as stderr_file: cmds = ['tensorboard', '--logdir', format_tensorboard_log_path(path_list), '--port', str(args.port)] tensorboard_process = Popen(cmds, stdout=stdout_file, stderr=stderr_file) url_list = get_local_urls(args.port) diff --git a/tools/nni_cmd/updater.py b/tools/nni_cmd/updater.py index 9258d73f0a..07ae6123cb 100644 --- a/tools/nni_cmd/updater.py +++ b/tools/nni_cmd/updater.py @@ -25,7 +25,7 @@ from .url_utils import experiment_url, import_data_url from .config_utils import Config from .common_utils import get_json_content, print_normal, print_error, print_warning -from .nnictl_utils import check_experiment_id, get_experiment_port, get_config_filename +from .nnictl_utils import get_experiment_port, get_config_filename from .launcher_utils import parse_time from .constants import REST_TIME_OUT, TUNERS_SUPPORTING_IMPORT_DATA, TUNERS_NO_NEED_TO_IMPORT_DATA diff --git a/tools/nni_cmd/url_utils.py b/tools/nni_cmd/url_utils.py index c50b2551d2..05cfa8e66f 100644 --- a/tools/nni_cmd/url_utils.py +++ b/tools/nni_cmd/url_utils.py @@ -18,8 +18,8 @@ # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +import socket import psutil -from socket import AddressFamily BASE_URL = 'http://localhost' @@ -83,8 +83,8 @@ def tensorboard_url(port): def get_local_urls(port): '''get urls of local machine''' url_list = [] - for name, info in psutil.net_if_addrs().items(): + for _, info in psutil.net_if_addrs().items(): for addr in info: - if AddressFamily.AF_INET == addr.family: + if socket.AddressFamily.AF_INET == addr.family: url_list.append('http://{}:{}'.format(addr.address, port)) return url_list From 3a249ef365b314d0ca54a69332696f0fc5339aec Mon Sep 17 00:00:00 2001 From: Cjkkkk <656569648@qq.com> Date: Thu, 31 Oct 2019 19:28:03 +0800 Subject: [PATCH 03/18] fix tools/nni_gpu_tool pylint (#1680) --- tools/nni_gpu_tool/gpu_metrics_collector.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/tools/nni_gpu_tool/gpu_metrics_collector.py b/tools/nni_gpu_tool/gpu_metrics_collector.py index f58b9b2895..7b7e9fca5b 100644 --- a/tools/nni_gpu_tool/gpu_metrics_collector.py +++ b/tools/nni_gpu_tool/gpu_metrics_collector.py @@ -27,7 +27,8 @@ def check_ready_to_run(): if sys.platform == 'win32': - pgrep_output = subprocess.check_output('wmic process where "CommandLine like \'%nni_gpu_tool.gpu_metrics_collector%\' and name like \'%python%\'" get processId') + pgrep_output = subprocess.check_output( + 'wmic process where "CommandLine like \'%nni_gpu_tool.gpu_metrics_collector%\' and name like \'%python%\'" get processId') pidList = pgrep_output.decode("utf-8").strip().split() pidList.pop(0) # remove the key word 'ProcessId' pidList = list(map(int, pidList)) @@ -69,10 +70,14 @@ def parse_nvidia_smi_result(smi, outputDir): outPut["gpuCount"] = len(gpuList) outPut["gpuInfos"] = [] for gpuIndex, gpu in enumerate(gpuList): - gpuInfo ={} + gpuInfo = {} gpuInfo['index'] = gpuIndex - gpuInfo['gpuUtil'] = gpu.getElementsByTagName('utilization')[0].getElementsByTagName('gpu_util')[0].childNodes[0].data.replace("%", "").strip() - gpuInfo['gpuMemUtil'] = gpu.getElementsByTagName('utilization')[0].getElementsByTagName('memory_util')[0].childNodes[0].data.replace("%", "").strip() + gpuInfo['gpuUtil'] = gpu.getElementsByTagName('utilization')[0]\ + .getElementsByTagName('gpu_util')[0]\ + .childNodes[0].data.replace("%", "").strip() + gpuInfo['gpuMemUtil'] = gpu.getElementsByTagName('utilization')[0]\ + .getElementsByTagName('memory_util')[0]\ + .childNodes[0].data.replace("%", "").strip() processes = gpu.getElementsByTagName('processes') runningProNumber = len(processes[0].getElementsByTagName('process_info')) gpuInfo['activeProcessNum'] = runningProNumber @@ -81,8 +86,8 @@ def parse_nvidia_smi_result(smi, outputDir): print(outPut) outputFile.write("{}\n".format(json.dumps(outPut, sort_keys=True))) outputFile.flush(); - except : - e_info = sys.exc_info() + except: + # e_info = sys.exc_info() print('xmldoc paring error') finally: os.umask(old_umask) From e3bf1c5769c2ff55868efcca9aef27b822169115 Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Thu, 31 Oct 2019 19:28:43 +0800 Subject: [PATCH 04/18] fix pycli pylint (#1671) --- src/sdk/pycli/setup.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/sdk/pycli/setup.py b/src/sdk/pycli/setup.py index 95a6ff2ff1..b85ef7bca8 100644 --- a/src/sdk/pycli/setup.py +++ b/src/sdk/pycli/setup.py @@ -1,18 +1,18 @@ import setuptools setuptools.setup( - name = 'nnicli', - version = '999.0.0-developing', - packages = setuptools.find_packages(), + name='nnicli', + version='999.0.0-developing', + packages=setuptools.find_packages(), - python_requires = '>=3.5', - install_requires = [ + python_requires='>=3.5', + install_requires=[ 'requests' ], - author = 'Microsoft NNI Team', - author_email = 'nni@microsoft.com', - description = 'nnicli for Neural Network Intelligence project', - license = 'MIT', - url = 'https://github.com/Microsoft/nni', + author='Microsoft NNI Team', + author_email='nni@microsoft.com', + description='nnicli for Neural Network Intelligence project', + license='MIT', + url='https://github.com/Microsoft/nni', ) From c5aa919ff67df6f0662d079cbe10634c35815074 Mon Sep 17 00:00:00 2001 From: Tang Lang Date: Thu, 31 Oct 2019 19:29:30 +0800 Subject: [PATCH 05/18] fix-pylint (#1664) --- tools/nni_trial_tool/hdfsClientUtility.py | 4 +- tools/nni_trial_tool/log_utils.py | 6 +- tools/nni_trial_tool/rest_utils.py | 1 - .../test/test_hdfsClientUtility.py | 14 +++-- tools/nni_trial_tool/trial_keeper.py | 61 ++++++++++++------- 5 files changed, 51 insertions(+), 35 deletions(-) diff --git a/tools/nni_trial_tool/hdfsClientUtility.py b/tools/nni_trial_tool/hdfsClientUtility.py index c732d2507c..9369e77a41 100644 --- a/tools/nni_trial_tool/hdfsClientUtility.py +++ b/tools/nni_trial_tool/hdfsClientUtility.py @@ -20,7 +20,6 @@ import os import posixpath -from pyhdfs import HdfsClient from .log_utils import LogType, nni_log def copyHdfsDirectoryToLocal(hdfsDirectory, localDirectory, hdfsClient): @@ -79,7 +78,8 @@ def copyDirectoryToHdfs(localDirectory, hdfsDirectory, hdfsClient): try: result = result and copyDirectoryToHdfs(file_path, hdfs_directory, hdfsClient) except Exception as exception: - nni_log(LogType.Error, 'Copy local directory {0} to hdfs directory {1} error: {2}'.format(file_path, hdfs_directory, str(exception))) + nni_log(LogType.Error, + 'Copy local directory {0} to hdfs directory {1} error: {2}'.format(file_path, hdfs_directory, str(exception))) result = False else: hdfs_file_path = os.path.join(hdfsDirectory, file) diff --git a/tools/nni_trial_tool/log_utils.py b/tools/nni_trial_tool/log_utils.py index 1806b06d79..8b07754ff7 100644 --- a/tools/nni_trial_tool/log_utils.py +++ b/tools/nni_trial_tool/log_utils.py @@ -33,8 +33,7 @@ from queue import Queue -from .rest_utils import rest_get, rest_post, rest_put, rest_delete -from .constants import NNI_EXP_ID, NNI_TRIAL_JOB_ID, STDOUT_API +from .rest_utils import rest_post from .url_utils import gen_send_stdout_url @unique @@ -154,8 +153,7 @@ def _populateQueue(stream, queue): self._is_read_completed = True break - self.pip_log_reader_thread = threading.Thread(target = _populateQueue, - args = (self.pipeReader, self.queue)) + self.pip_log_reader_thread = threading.Thread(target=_populateQueue, args=(self.pipeReader, self.queue)) self.pip_log_reader_thread.daemon = True self.start() self.pip_log_reader_thread.start() diff --git a/tools/nni_trial_tool/rest_utils.py b/tools/nni_trial_tool/rest_utils.py index 71eb353614..9f6227acbb 100644 --- a/tools/nni_trial_tool/rest_utils.py +++ b/tools/nni_trial_tool/rest_utils.py @@ -19,7 +19,6 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -import time import requests def rest_get(url, timeout): diff --git a/tools/nni_trial_tool/test/test_hdfsClientUtility.py b/tools/nni_trial_tool/test/test_hdfsClientUtility.py index 4a54a893c9..68ffe79d8f 100644 --- a/tools/nni_trial_tool/test/test_hdfsClientUtility.py +++ b/tools/nni_trial_tool/test/test_hdfsClientUtility.py @@ -18,16 +18,17 @@ # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +import os +import shutil +import random +import string import unittest import json import sys from pyhdfs import HdfsClient +from tools.nni_trial_tool.hdfsClientUtility import copyFileToHdfs, copyDirectoryToHdfs sys.path.append("..") -from trial.hdfsClientUtility import copyFileToHdfs, copyDirectoryToHdfs -import os -import shutil -import random -import string + class HDFSClientUtilityTest(unittest.TestCase): '''Unit test for hdfsClientUtility.py''' @@ -82,7 +83,8 @@ def test_copy_directory_run(self): with open('./{0}/{1}'.format(directory_name, file_name), 'w') as file: file.write(file_content) - result = copyDirectoryToHdfs('./{}'.format(directory_name), '/{0}/{1}'.format(self.hdfs_config['userName'], directory_name), self.hdfs_client) + result = copyDirectoryToHdfs('./{}'.format(directory_name), + '/{0}/{1}'.format(self.hdfs_config['userName'], directory_name), self.hdfs_client) self.assertTrue(result) directory_list = self.hdfs_client.listdir('/{0}'.format(self.hdfs_config['userName'])) diff --git a/tools/nni_trial_tool/trial_keeper.py b/tools/nni_trial_tool/trial_keeper.py index 23d9c4f1ab..dc37ce9334 100644 --- a/tools/nni_trial_tool/trial_keeper.py +++ b/tools/nni_trial_tool/trial_keeper.py @@ -18,32 +18,31 @@ # ============================================================================================================================== # import argparse -import sys import os -from subprocess import Popen, PIPE +from subprocess import Popen import time import logging import shlex import re import sys -import select import json import threading from pyhdfs import HdfsClient import pkg_resources from .rest_utils import rest_post, rest_get -from .url_utils import gen_send_stdout_url, gen_send_version_url, gen_parameter_meta_url +from .url_utils import gen_send_version_url, gen_parameter_meta_url -from .constants import HOME_DIR, LOG_DIR, NNI_PLATFORM, STDOUT_FULL_PATH, STDERR_FULL_PATH, \ +from .constants import LOG_DIR, NNI_PLATFORM, STDOUT_FULL_PATH, STDERR_FULL_PATH, \ MULTI_PHASE, NNI_TRIAL_JOB_ID, NNI_SYS_DIR, NNI_EXP_ID from .hdfsClientUtility import copyDirectoryToHdfs, copyHdfsDirectoryToLocal, copyHdfsFileToLocal -from .log_utils import LogType, nni_log, RemoteLogger, PipeLogReader, StdOutputType +from .log_utils import LogType, nni_log, RemoteLogger, StdOutputType logger = logging.getLogger('trial_keeper') regular = re.compile('v?(?P[0-9](\.[0-9]){0,1}).*') _hdfs_client = None + def get_hdfs_client(args): global _hdfs_client @@ -62,15 +61,18 @@ def get_hdfs_client(args): if hdfs_host is not None and args.nni_hdfs_exp_dir is not None: try: if args.webhdfs_path: - _hdfs_client = HdfsClient(hosts='{0}:80'.format(hdfs_host), user_name=args.pai_user_name, webhdfs_path=args.webhdfs_path, timeout=5) + _hdfs_client = HdfsClient(hosts='{0}:80'.format(hdfs_host), user_name=args.pai_user_name, + webhdfs_path=args.webhdfs_path, timeout=5) else: # backward compatibility - _hdfs_client = HdfsClient(hosts='{0}:{1}'.format(hdfs_host, '50070'), user_name=args.pai_user_name, timeout=5) + _hdfs_client = HdfsClient(hosts='{0}:{1}'.format(hdfs_host, '50070'), user_name=args.pai_user_name, + timeout=5) except Exception as e: nni_log(LogType.Error, 'Create HDFS client error: ' + str(e)) raise e return _hdfs_client + def main_loop(args): '''main loop logic for trial keeper''' @@ -79,9 +81,11 @@ def main_loop(args): stdout_file = open(STDOUT_FULL_PATH, 'a+') stderr_file = open(STDERR_FULL_PATH, 'a+') - trial_keeper_syslogger = RemoteLogger(args.nnimanager_ip, args.nnimanager_port, 'trial_keeper', StdOutputType.Stdout, args.log_collection) + trial_keeper_syslogger = RemoteLogger(args.nnimanager_ip, args.nnimanager_port, 'trial_keeper', + StdOutputType.Stdout, args.log_collection) # redirect trial keeper's stdout and stderr to syslog - trial_syslogger_stdout = RemoteLogger(args.nnimanager_ip, args.nnimanager_port, 'trial', StdOutputType.Stdout, args.log_collection) + trial_syslogger_stdout = RemoteLogger(args.nnimanager_ip, args.nnimanager_port, 'trial', StdOutputType.Stdout, + args.log_collection) sys.stdout = sys.stderr = trial_keeper_syslogger hdfs_output_dir = None @@ -97,8 +101,10 @@ def main_loop(args): # Notice: We don't appoint env, which means subprocess wil inherit current environment and that is expected behavior log_pipe_stdout = trial_syslogger_stdout.get_pipelog_reader() - process = Popen(args.trial_command, shell = True, stdout = log_pipe_stdout, stderr = log_pipe_stdout) - nni_log(LogType.Info, 'Trial keeper spawns a subprocess (pid {0}) to run command: {1}'.format(process.pid, shlex.split(args.trial_command))) + process = Popen(args.trial_command, shell=True, stdout=log_pipe_stdout, stderr=log_pipe_stdout) + nni_log(LogType.Info, 'Trial keeper spawns a subprocess (pid {0}) to run command: {1}'.format(process.pid, + shlex.split( + args.trial_command))) while True: retCode = process.poll() @@ -110,9 +116,11 @@ def main_loop(args): nni_local_output_dir = os.environ['NNI_OUTPUT_DIR'] try: if copyDirectoryToHdfs(nni_local_output_dir, hdfs_output_dir, hdfs_client): - nni_log(LogType.Info, 'copy directory from {0} to {1} success!'.format(nni_local_output_dir, hdfs_output_dir)) + nni_log(LogType.Info, + 'copy directory from {0} to {1} success!'.format(nni_local_output_dir, hdfs_output_dir)) else: - nni_log(LogType.Info, 'copy directory from {0} to {1} failed!'.format(nni_local_output_dir, hdfs_output_dir)) + nni_log(LogType.Info, + 'copy directory from {0} to {1} failed!'.format(nni_local_output_dir, hdfs_output_dir)) except Exception as e: nni_log(LogType.Error, 'HDFS copy directory got exception: ' + str(e)) raise e @@ -123,14 +131,16 @@ def main_loop(args): time.sleep(2) + def trial_keeper_help_info(*args): print('please run --help to see guidance') + def check_version(args): try: trial_keeper_version = pkg_resources.get_distribution('nni').version except pkg_resources.ResolutionError as err: - #package nni does not exist, try nni-tool package + # package nni does not exist, try nni-tool package nni_log(LogType.Error, 'Package nni does not exist!') os._exit(1) if not args.nni_manager_version: @@ -145,21 +155,26 @@ def check_version(args): log_entry = {} if trial_keeper_version != nni_manager_version: nni_log(LogType.Error, 'Version does not match!') - error_message = 'NNIManager version is {0}, TrialKeeper version is {1}, NNI version does not match!'.format(nni_manager_version, trial_keeper_version) + error_message = 'NNIManager version is {0}, TrialKeeper version is {1}, NNI version does not match!'.format( + nni_manager_version, trial_keeper_version) log_entry['tag'] = 'VCFail' log_entry['msg'] = error_message - rest_post(gen_send_version_url(args.nnimanager_ip, args.nnimanager_port), json.dumps(log_entry), 10, False) + rest_post(gen_send_version_url(args.nnimanager_ip, args.nnimanager_port), json.dumps(log_entry), 10, + False) os._exit(1) else: nni_log(LogType.Info, 'Version match!') log_entry['tag'] = 'VCSuccess' - rest_post(gen_send_version_url(args.nnimanager_ip, args.nnimanager_port), json.dumps(log_entry), 10, False) + rest_post(gen_send_version_url(args.nnimanager_ip, args.nnimanager_port), json.dumps(log_entry), 10, + False) except AttributeError as err: nni_log(LogType.Error, err) + def is_multi_phase(): return MULTI_PHASE and (MULTI_PHASE in ['True', 'true']) + def download_parameter(meta_list, args): """ Download parameter file to local working directory. @@ -171,7 +186,8 @@ def download_parameter(meta_list, args): ] """ nni_log(LogType.Debug, str(meta_list)) - nni_log(LogType.Debug, 'NNI_SYS_DIR: {}, trial Id: {}, experiment ID: {}'.format(NNI_SYS_DIR, NNI_TRIAL_JOB_ID, NNI_EXP_ID)) + nni_log(LogType.Debug, + 'NNI_SYS_DIR: {}, trial Id: {}, experiment ID: {}'.format(NNI_SYS_DIR, NNI_TRIAL_JOB_ID, NNI_EXP_ID)) nni_log(LogType.Debug, 'NNI_SYS_DIR files: {}'.format(os.listdir(NNI_SYS_DIR))) for meta in meta_list: if meta['experimentId'] == NNI_EXP_ID and meta['trialId'] == NNI_TRIAL_JOB_ID: @@ -180,6 +196,7 @@ def download_parameter(meta_list, args): hdfs_client = get_hdfs_client(args) copyHdfsFileToLocal(meta['filePath'], param_fp, hdfs_client, override=False) + def fetch_parameter_file(args): class FetchThread(threading.Thread): def __init__(self, args): @@ -203,6 +220,7 @@ def run(self): fetch_file_thread = FetchThread(args) fetch_file_thread.start() + if __name__ == '__main__': '''NNI Trial Keeper main function''' PARSER = argparse.ArgumentParser() @@ -210,9 +228,9 @@ def run(self): PARSER.add_argument('--trial_command', type=str, help='Command to launch trial process') PARSER.add_argument('--nnimanager_ip', type=str, default='localhost', help='NNI manager rest server IP') PARSER.add_argument('--nnimanager_port', type=str, default='8081', help='NNI manager rest server port') - PARSER.add_argument('--pai_hdfs_output_dir', type=str, help='the output dir of pai_hdfs') # backward compatibility + PARSER.add_argument('--pai_hdfs_output_dir', type=str, help='the output dir of pai_hdfs') # backward compatibility PARSER.add_argument('--hdfs_output_dir', type=str, help='the output dir of hdfs') - PARSER.add_argument('--pai_hdfs_host', type=str, help='the host of pai_hdfs') # backward compatibility + PARSER.add_argument('--pai_hdfs_host', type=str, help='the host of pai_hdfs') # backward compatibility PARSER.add_argument('--hdfs_host', type=str, help='the host of hdfs') PARSER.add_argument('--pai_user_name', type=str, help='the username of hdfs') PARSER.add_argument('--nni_hdfs_exp_dir', type=str, help='nni experiment directory in hdfs') @@ -233,4 +251,3 @@ def run(self): except Exception as e: nni_log(LogType.Error, 'Exit trial keeper with code 1 because Exception: {} is catched'.format(str(e))) os._exit(1) - From 3d391610521d2029125e58084823335bdfe2d547 Mon Sep 17 00:00:00 2001 From: QuanluZhang Date: Thu, 31 Oct 2019 19:41:14 +0800 Subject: [PATCH 06/18] fix for pylint (#1656) --- src/sdk/pynni/nni/ppo_tuner/distri.py | 6 +++--- src/sdk/pynni/nni/ppo_tuner/policy.py | 2 +- src/sdk/pynni/nni/ppo_tuner/ppo_tuner.py | 15 ++++++++------- src/sdk/pynni/nni/ppo_tuner/util.py | 2 +- src/sdk/pynni/nni/smac_tuner/smac_tuner.py | 1 - 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/sdk/pynni/nni/ppo_tuner/distri.py b/src/sdk/pynni/nni/ppo_tuner/distri.py index 4666acc2da..5f00843b3e 100644 --- a/src/sdk/pynni/nni/ppo_tuner/distri.py +++ b/src/sdk/pynni/nni/ppo_tuner/distri.py @@ -143,14 +143,14 @@ def sample(self): re_masked_res = tf.reshape(masked_res, [-1, self.size]) u = tf.random_uniform(tf.shape(re_masked_res), dtype=self.logits.dtype) - return tf.argmax(re_masked_res - tf.log(-tf.log(u)), axis=-1) + return tf.argmax(re_masked_res - tf.log(-1*tf.log(u)), axis=-1) else: u = tf.random_uniform(tf.shape(self.logits), dtype=self.logits.dtype) - return tf.argmax(self.logits - tf.log(-tf.log(u)), axis=-1) + return tf.argmax(self.logits - tf.log(-1*tf.log(u)), axis=-1) @classmethod def fromflat(cls, flat): - return cls(flat) + return cls(flat) # pylint: disable=no-value-for-parameter class CategoricalPdType(PdType): """ diff --git a/src/sdk/pynni/nni/ppo_tuner/policy.py b/src/sdk/pynni/nni/ppo_tuner/policy.py index 65e2db414e..980959a49e 100644 --- a/src/sdk/pynni/nni/ppo_tuner/policy.py +++ b/src/sdk/pynni/nni/ppo_tuner/policy.py @@ -107,7 +107,7 @@ def _build_model_for_step(self): def sample(logits, mask_npinf): new_logits = tf.math.add(logits, mask_npinf) u = tf.random_uniform(tf.shape(new_logits), dtype=logits.dtype) - return tf.argmax(new_logits - tf.log(-tf.log(u)), axis=-1) + return tf.argmax(new_logits - tf.log(-1*tf.log(u)), axis=-1) def neglogp(logits, x): # return tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits, labels=x) diff --git a/src/sdk/pynni/nni/ppo_tuner/ppo_tuner.py b/src/sdk/pynni/nni/ppo_tuner/ppo_tuner.py index 1bc86ae750..4b5009d45d 100644 --- a/src/sdk/pynni/nni/ppo_tuner/ppo_tuner.py +++ b/src/sdk/pynni/nni/ppo_tuner/ppo_tuner.py @@ -22,11 +22,9 @@ class PPOTuner """ -import os import copy import logging import numpy as np -import json_tricks from gym import spaces import nni @@ -236,7 +234,8 @@ def compute_rewards(self, trials_info, trials_result): nextnonterminal = 1.0 - trials_info.dones[t+1] nextvalues = trials_info.values[t+1] delta = mb_rewards[t] + self.model_config.gamma * nextvalues * nextnonterminal - trials_info.values[t] - mb_advs[t] = lastgaelam = delta + self.model_config.gamma * self.model_config.lam * nextnonterminal * lastgaelam + lastgaelam = delta + self.model_config.gamma * self.model_config.lam * nextnonterminal * lastgaelam + mb_advs[t] = lastgaelam # pylint: disable=unsupported-assignment-operation mb_returns = mb_advs + trials_info.values trials_info.update_rewards(mb_rewards, mb_returns) @@ -536,8 +535,10 @@ def _next_round_inference(self): # generate new trials self.trials_result = [None for _ in range(self.inf_batch_size)] mb_obs, mb_actions, mb_values, mb_neglogpacs, mb_dones, last_values = self.model.inference(self.inf_batch_size) - self.trials_info = TrialsInfo(mb_obs, mb_actions, mb_values, mb_neglogpacs, - mb_dones, last_values, self.inf_batch_size) + self.trials_info = TrialsInfo(mb_obs, mb_actions, + mb_values, mb_neglogpacs, + mb_dones, last_values, + self.inf_batch_size) # check credit and submit new trials for _ in range(self.credit): trial_info_idx, actions = self.trials_info.get_next() @@ -581,8 +582,8 @@ def trial_end(self, parameter_id, success, **kwargs): assert trial_info_idx is not None # use mean of finished trials as the result of this failed trial values = [val for val in self.trials_result if val is not None] - logger.warning('zql values: {0}'.format(values)) - self.trials_result[trial_info_idx] = (sum(values) / len(values)) if len(values) > 0 else 0 + logger.warning('zql values: %s', values) + self.trials_result[trial_info_idx] = (sum(values) / len(values)) if values else 0 self.finished_trials += 1 if self.finished_trials == self.inf_batch_size: self._next_round_inference() diff --git a/src/sdk/pynni/nni/ppo_tuner/util.py b/src/sdk/pynni/nni/ppo_tuner/util.py index ac958e54de..acf704accc 100644 --- a/src/sdk/pynni/nni/ppo_tuner/util.py +++ b/src/sdk/pynni/nni/ppo_tuner/util.py @@ -56,7 +56,7 @@ def seq_to_batch(h, flat=False): def lstm(xs, ms, s, scope, nh, init_scale=1.0): """lstm cell""" - nbatch, nin = [v.value for v in xs[0].get_shape()] + _, nin = [v.value for v in xs[0].get_shape()] # the first is nbatch with tf.variable_scope(scope): wx = tf.get_variable("wx", [nin, nh*4], initializer=ortho_init(init_scale)) wh = tf.get_variable("wh", [nh, nh*4], initializer=ortho_init(init_scale)) diff --git a/src/sdk/pynni/nni/smac_tuner/smac_tuner.py b/src/sdk/pynni/nni/smac_tuner/smac_tuner.py index fccf8c230e..4e2f876b9e 100644 --- a/src/sdk/pynni/nni/smac_tuner/smac_tuner.py +++ b/src/sdk/pynni/nni/smac_tuner/smac_tuner.py @@ -39,7 +39,6 @@ from .convert_ss_to_scenario import generate_scenario - class SMACTuner(Tuner): """ Parameters From cd71d355ec95ac0b72cebef2c78ad3ed3c97e5e1 Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Fri, 1 Nov 2019 15:33:49 +0800 Subject: [PATCH 07/18] fix-pylint-dependencies (#1686) --- azure-pipelines.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 59b05edd3f..71db3d99f9 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -12,16 +12,16 @@ jobs: python3 -m pip install --upgrade pip setuptools --user python3 -m pip install pylint==2.3.1 --user displayName: 'Install python tools' - - script: | - python3 -m pylint --rcfile pylintrc tools - python3 -m pylint --rcfile src/sdk/pynni/pylintrc src/sdk/pycli/nnicli - python3 -m pylint --rcfile src/sdk/pynni/pylintrc src/sdk/pynni/nni - displayName: 'Run pylint' - script: | python3 -m pip install torch==0.4.1 --user python3 -m pip install torchvision==0.2.1 --user python3 -m pip install tensorflow==1.12.0 --user displayName: 'Install dependencies for integration' + - script: | + python3 -m pylint --rcfile pylintrc tools + python3 -m pylint --rcfile src/sdk/pynni/pylintrc src/sdk/pycli/nnicli + python3 -m pylint --rcfile src/sdk/pynni/pylintrc src/sdk/pynni/nni + displayName: 'Run pylint' - script: | source install.sh displayName: 'Install nni toolkit via source code' From 3214a7bf196f740fe24da4c9efe2eea788e1680c Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Fri, 1 Nov 2019 15:48:04 +0800 Subject: [PATCH 08/18] Dev pylint (#1688) * fix-pylint-dependencies * Fix pylint dependencies --- azure-pipelines.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 71db3d99f9..3900e550c1 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -25,6 +25,11 @@ jobs: - script: | source install.sh displayName: 'Install nni toolkit via source code' + - script: | + python3 -m pylint --rcfile pylintrc tools + python3 -m pylint --rcfile src/sdk/pynni/pylintrc src/sdk/pycli/nnicli + python3 -m pylint --rcfile src/sdk/pynni/pylintrc src/sdk/pynni/nni + displayName: 'Run pylint' - script: | python3 -m pip install flake8 --user IGNORE=./tools/nni_annotation/testcase/*:F821,./examples/trials/mnist-nas/*/mnist*.py:F821,./examples/trials/nas_cifar10/src/cifar10/general_child.py:F821 From c07bf610510a5b10a1f77a1b6cb9135a148765a9 Mon Sep 17 00:00:00 2001 From: Chengmin Chi Date: Fri, 1 Nov 2019 16:02:00 +0800 Subject: [PATCH 09/18] fix dependencies --- azure-pipelines.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 3900e550c1..8d2c0319ff 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -17,11 +17,6 @@ jobs: python3 -m pip install torchvision==0.2.1 --user python3 -m pip install tensorflow==1.12.0 --user displayName: 'Install dependencies for integration' - - script: | - python3 -m pylint --rcfile pylintrc tools - python3 -m pylint --rcfile src/sdk/pynni/pylintrc src/sdk/pycli/nnicli - python3 -m pylint --rcfile src/sdk/pynni/pylintrc src/sdk/pynni/nni - displayName: 'Run pylint' - script: | source install.sh displayName: 'Install nni toolkit via source code' From 2d61ee4da6372874a8b71946f45e9d68bdae261b Mon Sep 17 00:00:00 2001 From: Chengmin Chi Date: Fri, 1 Nov 2019 16:20:56 +0800 Subject: [PATCH 10/18] updates --- azure-pipelines.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 8d2c0319ff..c1de78ebe6 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -11,11 +11,16 @@ jobs: - script: | python3 -m pip install --upgrade pip setuptools --user python3 -m pip install pylint==2.3.1 --user + python3 -m pip install coverage --user displayName: 'Install python tools' - script: | python3 -m pip install torch==0.4.1 --user python3 -m pip install torchvision==0.2.1 --user - python3 -m pip install tensorflow==1.12.0 --user + python3 -m pip install tensorflow==1.13.1 --user + python3 -m pip install keras==2.1.6 --user + sudo apt-get install swig -y + PATH=$HOME/.local/bin:$PATH nnictl package install --name=SMAC + PATH=$HOME/.local/bin:$PATH nnictl package install --name=BOHB displayName: 'Install dependencies for integration' - script: | source install.sh From 3e2e490f5bfbf52fcc2da4e3ce99f76a29d51bce Mon Sep 17 00:00:00 2001 From: Chengmin Chi Date: Fri, 1 Nov 2019 16:26:33 +0800 Subject: [PATCH 11/18] updates --- azure-pipelines.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index c1de78ebe6..09df633da5 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -13,6 +13,9 @@ jobs: python3 -m pip install pylint==2.3.1 --user python3 -m pip install coverage --user displayName: 'Install python tools' + - script: | + source install.sh + displayName: 'Install nni toolkit via source code' - script: | python3 -m pip install torch==0.4.1 --user python3 -m pip install torchvision==0.2.1 --user @@ -21,10 +24,7 @@ jobs: sudo apt-get install swig -y PATH=$HOME/.local/bin:$PATH nnictl package install --name=SMAC PATH=$HOME/.local/bin:$PATH nnictl package install --name=BOHB - displayName: 'Install dependencies for integration' - - script: | - source install.sh - displayName: 'Install nni toolkit via source code' + displayName: 'Install dependencies' - script: | python3 -m pylint --rcfile pylintrc tools python3 -m pylint --rcfile src/sdk/pynni/pylintrc src/sdk/pycli/nnicli From 7ae1e0c12a54b6dd2002db6a16fdaa19b87eea44 Mon Sep 17 00:00:00 2001 From: Chengmin Chi Date: Fri, 1 Nov 2019 16:41:16 +0800 Subject: [PATCH 12/18] updates --- azure-pipelines.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 09df633da5..73369d7470 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -21,6 +21,7 @@ jobs: python3 -m pip install torchvision==0.2.1 --user python3 -m pip install tensorflow==1.13.1 --user python3 -m pip install keras==2.1.6 --user + python3 -m pip install gym onnx --uses sudo apt-get install swig -y PATH=$HOME/.local/bin:$PATH nnictl package install --name=SMAC PATH=$HOME/.local/bin:$PATH nnictl package install --name=BOHB From f9c2929e8a8d5cc5d63671a19ee89ee8c9d0a471 Mon Sep 17 00:00:00 2001 From: Chengmin Chi Date: Fri, 1 Nov 2019 16:56:00 +0800 Subject: [PATCH 13/18] updates --- azure-pipelines.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 73369d7470..bad452d054 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -21,7 +21,7 @@ jobs: python3 -m pip install torchvision==0.2.1 --user python3 -m pip install tensorflow==1.13.1 --user python3 -m pip install keras==2.1.6 --user - python3 -m pip install gym onnx --uses + python3 -m pip install gym onnx --user sudo apt-get install swig -y PATH=$HOME/.local/bin:$PATH nnictl package install --name=SMAC PATH=$HOME/.local/bin:$PATH nnictl package install --name=BOHB From 747b17d59f2b857fc1b3bab79037223c58050ca1 Mon Sep 17 00:00:00 2001 From: Chengmin Chi Date: Mon, 4 Nov 2019 10:28:16 +0800 Subject: [PATCH 14/18] exclude tools/nni_annotation --- azure-pipelines.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index bad452d054..4f9cd4a525 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -27,7 +27,9 @@ jobs: PATH=$HOME/.local/bin:$PATH nnictl package install --name=BOHB displayName: 'Install dependencies' - script: | - python3 -m pylint --rcfile pylintrc tools + python3 -m pylint --rcfile pylintrc tools/nni_cmd + python3 -m pylint --rcfile pylintrc tools/nni_gpu_tool + python3 -m pylint --rcfile pylintrc tools/nni_trial_tool python3 -m pylint --rcfile src/sdk/pynni/pylintrc src/sdk/pycli/nnicli python3 -m pylint --rcfile src/sdk/pynni/pylintrc src/sdk/pynni/nni displayName: 'Run pylint' From a1cc400bfcdfd4d735973bb5b44df8fa390ce7ed Mon Sep 17 00:00:00 2001 From: liuzhe-lz <40699903+liuzhe-lz@users.noreply.github.com> Date: Mon, 4 Nov 2019 11:22:17 +0800 Subject: [PATCH 15/18] let pylint detect packages instead of paths (#1696) --- azure-pipelines.yml | 11 ++++++----- src/sdk/pynni/nni/smac_tuner/__init__.py | 2 +- tools/nni_gpu_tool/gpu_metrics_collector.py | 4 ++-- tools/nni_trial_tool/log_utils.py | 10 +++++----- tools/nni_trial_tool/trial_keeper.py | 5 +---- 5 files changed, 15 insertions(+), 17 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 4f9cd4a525..0080e32c44 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -27,11 +27,12 @@ jobs: PATH=$HOME/.local/bin:$PATH nnictl package install --name=BOHB displayName: 'Install dependencies' - script: | - python3 -m pylint --rcfile pylintrc tools/nni_cmd - python3 -m pylint --rcfile pylintrc tools/nni_gpu_tool - python3 -m pylint --rcfile pylintrc tools/nni_trial_tool - python3 -m pylint --rcfile src/sdk/pynni/pylintrc src/sdk/pycli/nnicli - python3 -m pylint --rcfile src/sdk/pynni/pylintrc src/sdk/pynni/nni + python3 -m pylint --rcfile pylintrc nni_annotation + python3 -m pylint --rcfile pylintrc nni_cmd + python3 -m pylint --rcfile pylintrc nni_gpu_tool + python3 -m pylint --rcfile pylintrc nni_trial_tool + python3 -m pylint --rcfile pylintrc nni + python3 -m pylint --rcfile pylintrc nnicli displayName: 'Run pylint' - script: | python3 -m pip install flake8 --user diff --git a/src/sdk/pynni/nni/smac_tuner/__init__.py b/src/sdk/pynni/nni/smac_tuner/__init__.py index 911fe59794..ca8bdc8b8e 100644 --- a/src/sdk/pynni/nni/smac_tuner/__init__.py +++ b/src/sdk/pynni/nni/smac_tuner/__init__.py @@ -1 +1 @@ -from .smac_tuner import SMACTuner \ No newline at end of file +from .smac_tuner import SMACTuner diff --git a/tools/nni_gpu_tool/gpu_metrics_collector.py b/tools/nni_gpu_tool/gpu_metrics_collector.py index 7b7e9fca5b..436e1edaaf 100644 --- a/tools/nni_gpu_tool/gpu_metrics_collector.py +++ b/tools/nni_gpu_tool/gpu_metrics_collector.py @@ -33,14 +33,14 @@ def check_ready_to_run(): pidList.pop(0) # remove the key word 'ProcessId' pidList = list(map(int, pidList)) pidList.remove(os.getpid()) - return len(pidList) == 0 + return not pidList else: pgrep_output = subprocess.check_output('pgrep -fx \'python3 -m nni_gpu_tool.gpu_metrics_collector\'', shell=True) pidList = [] for pid in pgrep_output.splitlines(): pidList.append(int(pid)) pidList.remove(os.getpid()) - return len(pidList) == 0 + return not pidList def main(argv): metrics_output_dir = os.environ['METRIC_OUTPUT_DIR'] diff --git a/tools/nni_trial_tool/log_utils.py b/tools/nni_trial_tool/log_utils.py index 8b07754ff7..8b7c5b3dd5 100644 --- a/tools/nni_trial_tool/log_utils.py +++ b/tools/nni_trial_tool/log_utils.py @@ -72,7 +72,7 @@ def emit(self, record): log_entry['msg'] = self.format(record) try: - response = rest_post(gen_send_stdout_url(self.host, self.port), json.dumps(log_entry), 10, True) + rest_post(gen_send_stdout_url(self.host, self.port), json.dumps(log_entry), 10, True) except Exception as e: self.orig_stderr.write(str(e) + '\n') self.orig_stderr.flush() @@ -111,7 +111,7 @@ def write(self, buf): self.orig_stdout.flush() try: self.logger.log(self.log_level, line.rstrip()) - except Exception as e: + except Exception: pass class PipeLogReader(threading.Thread): @@ -146,9 +146,9 @@ def _populateQueue(stream, queue): line = self.queue.get(True, 5) try: self.logger.log(self.log_level, line.rstrip()) - except Exception as e: + except Exception: pass - except Exception as e: + except Exception: if cur_process_exit == True: self._is_read_completed = True break @@ -194,4 +194,4 @@ def is_read_completed(self): def set_process_exit(self): self.process_exit = True - return self.process_exit \ No newline at end of file + return self.process_exit diff --git a/tools/nni_trial_tool/trial_keeper.py b/tools/nni_trial_tool/trial_keeper.py index dc37ce9334..2ce89cc192 100644 --- a/tools/nni_trial_tool/trial_keeper.py +++ b/tools/nni_trial_tool/trial_keeper.py @@ -32,8 +32,7 @@ from .rest_utils import rest_post, rest_get from .url_utils import gen_send_version_url, gen_parameter_meta_url -from .constants import LOG_DIR, NNI_PLATFORM, STDOUT_FULL_PATH, STDERR_FULL_PATH, \ - MULTI_PHASE, NNI_TRIAL_JOB_ID, NNI_SYS_DIR, NNI_EXP_ID +from .constants import LOG_DIR, NNI_PLATFORM, MULTI_PHASE, NNI_TRIAL_JOB_ID, NNI_SYS_DIR, NNI_EXP_ID from .hdfsClientUtility import copyDirectoryToHdfs, copyHdfsDirectoryToLocal, copyHdfsFileToLocal from .log_utils import LogType, nni_log, RemoteLogger, StdOutputType @@ -79,8 +78,6 @@ def main_loop(args): if not os.path.exists(LOG_DIR): os.makedirs(LOG_DIR) - stdout_file = open(STDOUT_FULL_PATH, 'a+') - stderr_file = open(STDERR_FULL_PATH, 'a+') trial_keeper_syslogger = RemoteLogger(args.nnimanager_ip, args.nnimanager_port, 'trial_keeper', StdOutputType.Stdout, args.log_collection) # redirect trial keeper's stdout and stderr to syslog From a6aaed265ecc41e1c42480009cc8f036d4f08610 Mon Sep 17 00:00:00 2001 From: liuzhe Date: Mon, 4 Nov 2019 11:54:01 +0800 Subject: [PATCH 16/18] downgrade astroid and add set -e --- azure-pipelines.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 0080e32c44..c1f280a98b 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -10,7 +10,7 @@ jobs: steps: - script: | python3 -m pip install --upgrade pip setuptools --user - python3 -m pip install pylint==2.3.1 --user + python3 -m pip install pylint==2.3.1 astroid=2.2.5 --user python3 -m pip install coverage --user displayName: 'Install python tools' - script: | @@ -27,6 +27,7 @@ jobs: PATH=$HOME/.local/bin:$PATH nnictl package install --name=BOHB displayName: 'Install dependencies' - script: | + set -e python3 -m pylint --rcfile pylintrc nni_annotation python3 -m pylint --rcfile pylintrc nni_cmd python3 -m pylint --rcfile pylintrc nni_gpu_tool From 647d96978efb929e3a56d5698d9e5012af937daa Mon Sep 17 00:00:00 2001 From: liuzhe Date: Mon, 4 Nov 2019 12:10:57 +0800 Subject: [PATCH 17/18] bugfix --- azure-pipelines.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index c1f280a98b..0925962595 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -10,7 +10,7 @@ jobs: steps: - script: | python3 -m pip install --upgrade pip setuptools --user - python3 -m pip install pylint==2.3.1 astroid=2.2.5 --user + python3 -m pip install pylint==2.3.1 astroid==2.2.5 --user python3 -m pip install coverage --user displayName: 'Install python tools' - script: | From 1f1b399e5e73b81c756134e94c298f1b749eb07c Mon Sep 17 00:00:00 2001 From: Chengmin Chi Date: Mon, 4 Nov 2019 13:38:41 +0800 Subject: [PATCH 18/18] fix pylint --- src/sdk/pynni/nni/compression/tensorflow/compressor.py | 2 +- src/sdk/pynni/nni/compression/torch/compressor.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sdk/pynni/nni/compression/tensorflow/compressor.py b/src/sdk/pynni/nni/compression/tensorflow/compressor.py index c46e883342..6382c25a8a 100644 --- a/src/sdk/pynni/nni/compression/tensorflow/compressor.py +++ b/src/sdk/pynni/nni/compression/tensorflow/compressor.py @@ -80,7 +80,7 @@ def select_config(self, layer): Returns ------- ret : config or None - the retrieved configuration for this layer, if None, this layer should + the retrieved configuration for this layer, if None, this layer should not be compressed """ ret = None diff --git a/src/sdk/pynni/nni/compression/torch/compressor.py b/src/sdk/pynni/nni/compression/torch/compressor.py index 580b1c1fac..bb9e76e0da 100644 --- a/src/sdk/pynni/nni/compression/torch/compressor.py +++ b/src/sdk/pynni/nni/compression/torch/compressor.py @@ -73,7 +73,7 @@ def select_config(self, layer): Returns ------- ret : config or None - the retrieved configuration for this layer, if None, this layer should + the retrieved configuration for this layer, if None, this layer should not be compressed """ ret = None