From bf23601287561b51b66b8b8a317827572ccecccf Mon Sep 17 00:00:00 2001 From: gxiaotian Date: Mon, 11 Nov 2019 10:09:18 +0800 Subject: [PATCH 01/14] Add Docs for Tuning Systems with NNI (#1715) * Add Docs for Tuning Systems with NNI * Updates based on comments * updates * updates * updates --- README_zh_CN.md | 2 +- docs/en_US/CommunitySharings/TuningSystems.md | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 docs/en_US/CommunitySharings/TuningSystems.md diff --git a/README_zh_CN.md b/README_zh_CN.md index 9af25db8ea..a0cafb5a72 100644 --- a/README_zh_CN.md +++ b/README_zh_CN.md @@ -359,4 +359,4 @@ You can use these commands to get more information about the experiment ## **许可协议** -代码库遵循 [MIT 许可协议](LICENSE) \ No newline at end of file +代码库遵循 [MIT 许可协议](LICENSE) diff --git a/docs/en_US/CommunitySharings/TuningSystems.md b/docs/en_US/CommunitySharings/TuningSystems.md new file mode 100644 index 0000000000..30c44c7a8a --- /dev/null +++ b/docs/en_US/CommunitySharings/TuningSystems.md @@ -0,0 +1,10 @@ +# Automatically tune systems with NNI + +As computer systems and networking get increasingly complicated, optimizing them manually with explicit rules and heuristics becomes harder than ever before, sometimes impossible. Below are two examples of tuning systems with NNI. Anyone can easily tune their own systems by following them. + +* [Tuning RocksDB with NNI](../TrialExample/RocksdbExamples.md) +* [Tuning parameters of SPTAG (Space Partition Tree And Graph) with NNI](SptagAutoTune.md) + +Please see [this paper](https://dl.acm.org/citation.cfm?id=3352031) for more details: + +Mike Liang, Chieh-Jan, et al. "The Case for Learning-and-System Co-design." ACM SIGOPS Operating Systems Review 53.1 (2019): 68-74. From 803f056a307366dbef613defae9000131c5391c7 Mon Sep 17 00:00:00 2001 From: Lijiao <35484733+lvybriage@users.noreply.github.com> Date: Mon, 11 Nov 2019 10:40:58 +0800 Subject: [PATCH 02/14] maintain selected status (#1710) --- src/webui/src/App.tsx | 10 +++++++++- src/webui/src/components/Overview.tsx | 18 ++++++++++-------- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/src/webui/src/App.tsx b/src/webui/src/App.tsx index 3015a7e4ce..21ddfbd385 100644 --- a/src/webui/src/App.tsx +++ b/src/webui/src/App.tsx @@ -10,6 +10,7 @@ interface AppState { columnList: Array; experimentUpdateBroadcast: number; trialsUpdateBroadcast: number; + metricGraphMode: 'max' | 'min'; // tuner's optimize_mode filed } class App extends React.Component<{}, AppState> { @@ -22,6 +23,7 @@ class App extends React.Component<{}, AppState> { columnList: COLUMN, experimentUpdateBroadcast: 0, trialsUpdateBroadcast: 0, + metricGraphMode: 'max' }; } @@ -30,6 +32,7 @@ class App extends React.Component<{}, AppState> { this.setState(state => ({ experimentUpdateBroadcast: state.experimentUpdateBroadcast + 1 })); this.setState(state => ({ trialsUpdateBroadcast: state.trialsUpdateBroadcast + 1 })); this.timerId = window.setTimeout(this.refresh, this.state.interval * 1000); + this.setState({ metricGraphMode: (EXPERIMENT.optimizeMode === 'minimize' ? 'min' : 'max') }); } changeInterval = (interval: number) => { @@ -46,8 +49,12 @@ class App extends React.Component<{}, AppState> { this.setState({ columnList: columnList }); } + changeMetricGraphMode = (val: 'max' | 'min') => { + this.setState({ metricGraphMode: val }); + } + render() { - const { interval, columnList, experimentUpdateBroadcast, trialsUpdateBroadcast } = this.state; + const { interval, columnList, experimentUpdateBroadcast, trialsUpdateBroadcast, metricGraphMode } = this.state; if (experimentUpdateBroadcast === 0 || trialsUpdateBroadcast === 0) { return null; // TODO: render a loading page } @@ -59,6 +66,7 @@ class App extends React.Component<{}, AppState> { columnList, changeColumn: this.changeColumn, experimentUpdateBroadcast, trialsUpdateBroadcast, + metricGraphMode, changeMetricGraphMode: this.changeMetricGraphMode }) ); return ( diff --git a/src/webui/src/components/Overview.tsx b/src/webui/src/components/Overview.tsx index 22d52e5458..c6a3af449c 100644 --- a/src/webui/src/components/Overview.tsx +++ b/src/webui/src/components/Overview.tsx @@ -19,31 +19,33 @@ require('../static/style/overviewTitle.scss'); interface OverviewProps { experimentUpdateBroadcast: number; trialsUpdateBroadcast: number; + metricGraphMode: 'max' | 'min'; + changeMetricGraphMode: (val: 'max' | 'min') => void; } interface OverviewState { trialConcurrency: number; - metricGraphMode: 'max' | 'min'; } class Overview extends React.Component { constructor(props: OverviewProps) { super(props); this.state = { - trialConcurrency: EXPERIMENT.trialConcurrency, - metricGraphMode: (EXPERIMENT.optimizeMode === 'minimize' ? 'min' : 'max'), + trialConcurrency: EXPERIMENT.trialConcurrency }; } clickMaxTop = (event: React.SyntheticEvent) => { event.stopPropagation(); // #999 panel active bgcolor; #b3b3b3 as usual - this.setState({ metricGraphMode: 'max' }); + const { changeMetricGraphMode } = this.props; + changeMetricGraphMode('max'); } clickMinTop = (event: React.SyntheticEvent) => { event.stopPropagation(); - this.setState({ metricGraphMode: 'min' }); + const { changeMetricGraphMode } = this.props; + changeMetricGraphMode('min'); } changeConcurrency = (val: number) => { @@ -51,8 +53,8 @@ class Overview extends React.Component { } render() { - const { trialConcurrency, metricGraphMode } = this.state; - const { experimentUpdateBroadcast } = this.props; + const { trialConcurrency } = this.state; + const { experimentUpdateBroadcast, metricGraphMode } = this.props; const searchSpace = this.convertSearchSpace(); @@ -160,7 +162,7 @@ class Overview extends React.Component { private findBestTrials(): Trial[] { let bestTrials = TRIALS.sort(); - if (this.state.metricGraphMode === 'max') { + if (this.props.metricGraphMode === 'max') { bestTrials.reverse().splice(10); } else { bestTrials.splice(10); From 0168ff1c66026995b938eea93a24f94a0012c9be Mon Sep 17 00:00:00 2001 From: xuehui Date: Mon, 11 Nov 2019 10:45:27 +0800 Subject: [PATCH 03/14] update docstring and pylint (#1662) * update docstring of batchtuner * update docstring of batch tuner * update docstring of evolution tuner * update docstring and pylint of metis_tuner * fix pylint related to logger in metis_tuner * fix pylint * update * fix pylint in metis_tuner * update in networkmorphsim_tuner * update * update * update docstring in hyperopt_tuner * update batch_tuner * delete unused space * update in metis * update sdk_reference.rst * update netowrkmorhism * update networkmorphsim * update batch_tuner * update batch_tuner * update * update metis * roll back to print * update Returns * update * delete white space --- docs/en_US/sdk_reference.rst | 3 + src/sdk/pynni/nni/batch_tuner/batch_tuner.py | 63 ++-- .../nni/evolution_tuner/evolution_tuner.py | 52 ++- .../nni/hyperopt_tuner/hyperopt_tuner.py | 6 +- .../metis_tuner/Regression_GMM/CreateModel.py | 16 +- .../metis_tuner/Regression_GMM/Selection.py | 21 +- .../Regression_GP/OutlierDetection.py | 45 +-- .../metis_tuner/lib_acquisition_function.py | 50 ++- .../metis_tuner/lib_constraint_summation.py | 37 +- src/sdk/pynni/nni/metis_tuner/lib_data.py | 5 +- src/sdk/pynni/nni/metis_tuner/metis_tuner.py | 337 ++++++++++++------ .../nni/networkmorphism_tuner/bayesian.py | 26 +- .../pynni/nni/networkmorphism_tuner/graph.py | 65 ++-- .../graph_transformer.py | 20 +- .../layer_transformer.py | 24 +- .../pynni/nni/networkmorphism_tuner/layers.py | 326 +++++++++++------ .../networkmorphism_tuner.py | 117 +++--- src/sdk/pynni/nni/networkmorphism_tuner/nn.py | 33 +- .../test_networkmorphism_tuner.py | 17 +- 19 files changed, 842 insertions(+), 421 deletions(-) diff --git a/docs/en_US/sdk_reference.rst b/docs/en_US/sdk_reference.rst index 0eccbc1b5c..7bf274996d 100644 --- a/docs/en_US/sdk_reference.rst +++ b/docs/en_US/sdk_reference.rst @@ -36,6 +36,9 @@ Tuner .. autoclass:: nni.metis_tuner.metis_tuner.MetisTuner :members: +.. autoclass:: nni.batch_tuner.batch_tuner.BatchTuner + :members: + Assessor ------------------------ .. autoclass:: nni.assessor.Assessor diff --git a/src/sdk/pynni/nni/batch_tuner/batch_tuner.py b/src/sdk/pynni/nni/batch_tuner/batch_tuner.py index 64012444ac..c223d93552 100644 --- a/src/sdk/pynni/nni/batch_tuner/batch_tuner.py +++ b/src/sdk/pynni/nni/batch_tuner/batch_tuner.py @@ -31,22 +31,27 @@ class BatchTuner CHOICE = 'choice' VALUE = '_value' -logger = logging.getLogger('batch_tuner_AutoML') +LOGGER = logging.getLogger('batch_tuner_AutoML') class BatchTuner(Tuner): """ BatchTuner is tuner will running all the configure that user want to run batchly. + + Examples + -------- The search space only be accepted like: + ``` { 'combine_params': { '_type': 'choice', '_value': '[{...}, {...}, {...}]', } } + ``` """ def __init__(self): - self.count = -1 - self.values = [] + self._count = -1 + self._values = [] def is_valid(self, search_space): """ @@ -55,6 +60,11 @@ def is_valid(self, search_space): Parameters ---------- search_space : dict + + Returns + ------- + None or list + If valid, return candidate values; else return None. """ if not len(search_space) == 1: raise RuntimeError('BatchTuner only supprt one combined-paramreters key.') @@ -62,11 +72,14 @@ def is_valid(self, search_space): for param in search_space: param_type = search_space[param][TYPE] if not param_type == CHOICE: - raise RuntimeError('BatchTuner only supprt one combined-paramreters type is choice.') - else: - if isinstance(search_space[param][VALUE], list): - return search_space[param][VALUE] - raise RuntimeError('The combined-paramreters value in BatchTuner is not a list.') + raise RuntimeError('BatchTuner only supprt \ + one combined-paramreters type is choice.') + + if isinstance(search_space[param][VALUE], list): + return search_space[param][VALUE] + + raise RuntimeError('The combined-paramreters \ + value in BatchTuner is not a list.') return None def update_search_space(self, search_space): @@ -76,7 +89,7 @@ def update_search_space(self, search_space): ---------- search_space : dict """ - self.values = self.is_valid(search_space) + self._values = self.is_valid(search_space) def generate_parameters(self, parameter_id, **kwargs): """Returns a dict of trial (hyper-)parameters, as a serializable object. @@ -84,41 +97,49 @@ def generate_parameters(self, parameter_id, **kwargs): Parameters ---------- parameter_id : int + + Returns + ------- + dict + A candidate parameter group. """ - self.count += 1 - if self.count > len(self.values) - 1: + self._count += 1 + if self._count > len(self._values) - 1: raise nni.NoMoreTrialError('no more parameters now.') - return self.values[self.count] + return self._values[self._count] def receive_trial_result(self, parameter_id, parameters, value, **kwargs): pass def import_data(self, data): """Import additional data for tuning + Parameters ---------- data: a list of dictionarys, each of which has at least two keys, 'parameter' and 'value' """ - if not self.values: - logger.info("Search space has not been initialized, skip this data import") + if not self._values: + LOGGER.info("Search space has not been initialized, skip this data import") return - self.values = self.values[(self.count+1):] - self.count = -1 + self._values = self._values[(self._count+1):] + self._count = -1 _completed_num = 0 for trial_info in data: - logger.info("Importing data, current processing progress %s / %s", _completed_num, len(data)) + LOGGER .info("Importing data, current processing \ + progress %s / %s", _completed_num, len(data)) # simply validate data format assert "parameter" in trial_info _params = trial_info["parameter"] assert "value" in trial_info _value = trial_info['value'] if not _value: - logger.info("Useless trial data, value is %s, skip this trial data.", _value) + LOGGER.info("Useless trial data, value is %s, skip this trial data.", _value) continue _completed_num += 1 - if _params in self.values: - self.values.remove(_params) - logger.info("Successfully import data to batch tuner, total data: %d, imported data: %d.", len(data), _completed_num) + if _params in self._values: + self._values.remove(_params) + LOGGER .info("Successfully import data to batch tuner, \ + total data: %d, imported data: %d.", len(data), _completed_num) diff --git a/src/sdk/pynni/nni/evolution_tuner/evolution_tuner.py b/src/sdk/pynni/nni/evolution_tuner/evolution_tuner.py index 8cec6df1ce..3b12ab7505 100644 --- a/src/sdk/pynni/nni/evolution_tuner/evolution_tuner.py +++ b/src/sdk/pynni/nni/evolution_tuner/evolution_tuner.py @@ -32,7 +32,9 @@ def json2space(x, oldy=None, name=NodeType.ROOT): - """Change search space from json format to hyperopt format + """ + Change search space from json format to hyperopt format + """ y = list() if isinstance(x, dict): @@ -59,7 +61,9 @@ def json2space(x, oldy=None, name=NodeType.ROOT): return y def json2parameter(x, is_rand, random_state, oldy=None, Rand=False, name=NodeType.ROOT): - """Json to pramaters. + """ + Json to pramaters. + """ if isinstance(x, dict): if NodeType.TYPE in x.keys(): @@ -117,6 +121,17 @@ def json2parameter(x, is_rand, random_state, oldy=None, Rand=False, name=NodeTyp class Individual: """ Indicidual class to store the indv info. + + Attributes + ---------- + config : str + Search space. + info : str + The str to save information of individual. + result : float + The final metric of a individual. + store_dir : str + save_dir : str """ def __init__(self, config=None, info=None, result=None, save_dir=None): @@ -124,6 +139,7 @@ def __init__(self, config=None, info=None, result=None, save_dir=None): Parameters ---------- config : str + A config to represent a group of parameters. info : str result : float save_dir : str @@ -140,6 +156,8 @@ def __str__(self): def mutation(self, config=None, info=None, save_dir=None): """ + Mutation by reset state information. + Parameters ---------- config : str @@ -177,8 +195,11 @@ def __init__(self, optimize_mode="maximize", population_size=32): self.population = None self.space = None + def update_search_space(self, search_space): - """Update search space. + """ + Update search space. + Search_space contains the information that user pre-defined. Parameters @@ -191,15 +212,19 @@ def update_search_space(self, search_space): self.random_state = np.random.RandomState() self.population = [] is_rand = dict() + for item in self.space: is_rand[item] = True + for _ in range(self.population_size): config = json2parameter( self.searchspace_json, is_rand, self.random_state) self.population.append(Individual(config=config)) + def generate_parameters(self, parameter_id, **kwargs): - """Returns a dict of trial (hyper-)parameters, as a serializable object. + """ + This function will returns a dict of trial (hyper-)parameters, as a serializable object. Parameters ---------- @@ -207,15 +232,19 @@ def generate_parameters(self, parameter_id, **kwargs): Returns ------- - config : dict + dict + A group of candaidte parameters that evolution tuner generated. """ if not self.population: raise RuntimeError('The population is empty') + pos = -1 + for i in range(len(self.population)): if self.population[i].result is None: pos = i break + if pos != -1: indiv = copy.deepcopy(self.population[pos]) self.population.pop(pos) @@ -230,6 +259,7 @@ def generate_parameters(self, parameter_id, **kwargs): self.population[0].config) is_rand = dict() mutation_pos = space[random.randint(0, len(space)-1)] + for i in range(len(self.space)): is_rand[self.space[i]] = (self.space[i] == mutation_pos) config = json2parameter( @@ -238,21 +268,27 @@ def generate_parameters(self, parameter_id, **kwargs): # remove "_index" from config and save params-id total_config = config + self.total_data[parameter_id] = total_config config = split_index(total_config) + return config + def receive_trial_result(self, parameter_id, parameters, value, **kwargs): - '''Record the result from a trial + """ + Record the result from a trial Parameters ---------- - parameters: dict + parameter_id : int + parameters : dict value : dict/float if value is dict, it should have "default" key. value is final metrics of the trial. - ''' + """ reward = extract_scalar_reward(value) + if parameter_id not in self.total_data: raise RuntimeError('Received parameter_id not in total_data.') # restore the paramsters contains "_index" diff --git a/src/sdk/pynni/nni/hyperopt_tuner/hyperopt_tuner.py b/src/sdk/pynni/nni/hyperopt_tuner/hyperopt_tuner.py index 89c8d662c3..0e250fa8dd 100644 --- a/src/sdk/pynni/nni/hyperopt_tuner/hyperopt_tuner.py +++ b/src/sdk/pynni/nni/hyperopt_tuner/hyperopt_tuner.py @@ -422,7 +422,8 @@ def miscs_update_idxs_vals(self, misc_by_id[tid]['vals'][key] = [val] def get_suggestion(self, random_search=False): - """get suggestion from hyperopt + """ + get suggestion from hyperopt Parameters ---------- @@ -473,7 +474,8 @@ def get_suggestion(self, random_search=False): return total_params def import_data(self, data): - """Import additional data for tuning + """ + Import additional data for tuning Parameters ---------- diff --git a/src/sdk/pynni/nni/metis_tuner/Regression_GMM/CreateModel.py b/src/sdk/pynni/nni/metis_tuner/Regression_GMM/CreateModel.py index 3ed39e0cf8..7bc9e070fb 100644 --- a/src/sdk/pynni/nni/metis_tuner/Regression_GMM/CreateModel.py +++ b/src/sdk/pynni/nni/metis_tuner/Regression_GMM/CreateModel.py @@ -16,7 +16,8 @@ # BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. import os import sys @@ -31,7 +32,8 @@ def create_model(samples_x, samples_y_aggregation, percentage_goodbatch=0.34): ''' Create the Gaussian Mixture Model ''' - samples = [samples_x[i] + [samples_y_aggregation[i]] for i in range(0, len(samples_x))] + samples = [samples_x[i] + [samples_y_aggregation[i]] + for i in range(0, len(samples_x))] # Sorts so that we can get the top samples samples = sorted(samples, key=itemgetter(-1)) @@ -39,13 +41,16 @@ def create_model(samples_x, samples_y_aggregation, percentage_goodbatch=0.34): samples_goodbatch = samples[0:samples_goodbatch_size] samples_badbatch = samples[samples_goodbatch_size:] - samples_x_goodbatch = [sample_goodbatch[0:-1] for sample_goodbatch in samples_goodbatch] + samples_x_goodbatch = [sample_goodbatch[0:-1] + for sample_goodbatch in samples_goodbatch] #samples_y_goodbatch = [sample_goodbatch[-1] for sample_goodbatch in samples_goodbatch] - samples_x_badbatch = [sample_badbatch[0:-1] for sample_badbatch in samples_badbatch] + samples_x_badbatch = [sample_badbatch[0:-1] + for sample_badbatch in samples_badbatch] # === Trains GMM clustering models === # #sys.stderr.write("[%s] Train GMM's GMM model\n" % (os.path.basename(__file__))) - bgmm_goodbatch = mm.BayesianGaussianMixture(n_components=max(1, samples_goodbatch_size - 1)) + bgmm_goodbatch = mm.BayesianGaussianMixture( + n_components=max(1, samples_goodbatch_size - 1)) bad_n_components = max(1, len(samples_x) - samples_goodbatch_size - 1) bgmm_badbatch = mm.BayesianGaussianMixture(n_components=bad_n_components) bgmm_goodbatch.fit(samples_x_goodbatch) @@ -55,4 +60,3 @@ def create_model(samples_x, samples_y_aggregation, percentage_goodbatch=0.34): model['clusteringmodel_good'] = bgmm_goodbatch model['clusteringmodel_bad'] = bgmm_badbatch return model - \ No newline at end of file diff --git a/src/sdk/pynni/nni/metis_tuner/Regression_GMM/Selection.py b/src/sdk/pynni/nni/metis_tuner/Regression_GMM/Selection.py index eba35ae09d..758383f92f 100644 --- a/src/sdk/pynni/nni/metis_tuner/Regression_GMM/Selection.py +++ b/src/sdk/pynni/nni/metis_tuner/Regression_GMM/Selection.py @@ -16,7 +16,8 @@ # BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. import os import random @@ -33,14 +34,17 @@ CONSTRAINT_PARAMS_IDX = [] -def _ratio_scores(parameters_value, clusteringmodel_gmm_good, clusteringmodel_gmm_bad): +def _ratio_scores(parameters_value, clusteringmodel_gmm_good, + clusteringmodel_gmm_bad): ''' The ratio is smaller the better ''' - ratio = clusteringmodel_gmm_good.score([parameters_value]) / clusteringmodel_gmm_bad.score([parameters_value]) + ratio = clusteringmodel_gmm_good.score( + [parameters_value]) / clusteringmodel_gmm_bad.score([parameters_value]) sigma = 0 return ratio, sigma + def selection_r(x_bounds, x_types, clusteringmodel_gmm_good, @@ -60,6 +64,7 @@ def selection_r(x_bounds, return outputs + def selection(x_bounds, x_types, clusteringmodel_gmm_good, @@ -69,13 +74,14 @@ def selection(x_bounds, ''' Select the lowest mu value ''' - results = lib_acquisition_function.next_hyperparameter_lowest_mu(\ - _ratio_scores, [clusteringmodel_gmm_good, clusteringmodel_gmm_bad],\ - x_bounds, x_types, minimize_starting_points, \ - minimize_constraints_fun=minimize_constraints_fun) + results = lib_acquisition_function.next_hyperparameter_lowest_mu( + _ratio_scores, [clusteringmodel_gmm_good, clusteringmodel_gmm_bad], + x_bounds, x_types, minimize_starting_points, + minimize_constraints_fun=minimize_constraints_fun) return results + def _rand_with_constraints(x_bounds, x_types): ''' Random generate the variable with constraints @@ -96,6 +102,7 @@ def _rand_with_constraints(x_bounds, x_types): outputs[i] = random.randint(x_bounds[i][0], x_bounds[i][1]) return outputs + def _minimize_constraints_fun_summation(x): ''' Minimize constraints fun summation diff --git a/src/sdk/pynni/nni/metis_tuner/Regression_GP/OutlierDetection.py b/src/sdk/pynni/nni/metis_tuner/Regression_GP/OutlierDetection.py index 7010815b23..24b2e03027 100644 --- a/src/sdk/pynni/nni/metis_tuner/Regression_GP/OutlierDetection.py +++ b/src/sdk/pynni/nni/metis_tuner/Regression_GP/OutlierDetection.py @@ -17,7 +17,9 @@ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - +""" +OutlierDectection.py +""" import os import sys @@ -30,19 +32,21 @@ def _outlierDetection_threaded(inputs): - ''' + """ Detect the outlier - ''' + """ [samples_idx, samples_x, samples_y_aggregation] = inputs - sys.stderr.write("[%s] DEBUG: Evaluating %dth of %d samples\n"\ - % (os.path.basename(__file__), samples_idx + 1, len(samples_x))) + sys.stderr.write("[%s] DEBUG: Evaluating %dth of %d samples\n" + % (os.path.basename(__file__), samples_idx + 1, len(samples_x))) outlier = None - # Create a diagnostic regression model which removes the sample that we want to evaluate - diagnostic_regressor_gp = gp_create_model.create_model(\ - samples_x[0:samples_idx] + samples_x[samples_idx + 1:],\ - samples_y_aggregation[0:samples_idx] + samples_y_aggregation[samples_idx + 1:]) - mu, sigma = gp_prediction.predict(samples_x[samples_idx], diagnostic_regressor_gp['model']) + # Create a diagnostic regression model which removes the sample that we + # want to evaluate + diagnostic_regressor_gp = gp_create_model.create_model( + samples_x[0:samples_idx] + samples_x[samples_idx + 1:], + samples_y_aggregation[0:samples_idx] + samples_y_aggregation[samples_idx + 1:]) + mu, sigma = gp_prediction.predict( + samples_x[samples_idx], diagnostic_regressor_gp['model']) # 2.33 is the z-score for 98% confidence level if abs(samples_y_aggregation[samples_idx] - mu) > (2.33 * sigma): @@ -52,16 +56,18 @@ def _outlierDetection_threaded(inputs): "difference": abs(samples_y_aggregation[samples_idx] - mu) - (2.33 * sigma)} return outlier + def outlierDetection_threaded(samples_x, samples_y_aggregation): - ''' + """ Use Multi-thread to detect the outlier - ''' + """ outliers = [] - threads_inputs = [[samples_idx, samples_x, samples_y_aggregation]\ - for samples_idx in range(0, len(samples_x))] + threads_inputs = [[samples_idx, samples_x, samples_y_aggregation] + for samples_idx in range(0, len(samples_x))] threads_pool = ThreadPool(min(4, len(threads_inputs))) - threads_results = threads_pool.map(_outlierDetection_threaded, threads_inputs) + threads_results = threads_pool.map( + _outlierDetection_threaded, threads_inputs) threads_pool.close() threads_pool.join() @@ -69,15 +75,13 @@ def outlierDetection_threaded(samples_x, samples_y_aggregation): if threads_result is not None: outliers.append(threads_result) else: - print("error here.") + print("Error: threads_result is None.") outliers = outliers if outliers else None return outliers + def outlierDetection(samples_x, samples_y_aggregation): - ''' - TODO - ''' outliers = [] for samples_idx, _ in enumerate(samples_x): #sys.stderr.write("[%s] DEBUG: Evaluating %d of %d samples\n" @@ -92,7 +96,8 @@ def outlierDetection(samples_x, samples_y_aggregation): outliers.append({"samples_idx": samples_idx, "expected_mu": mu, "expected_sigma": sigma, - "difference": abs(samples_y_aggregation[samples_idx] - mu) - (2.33 * sigma)}) + "difference": \ + abs(samples_y_aggregation[samples_idx] - mu) - (2.33 * sigma)}) outliers = outliers if outliers else None return outliers diff --git a/src/sdk/pynni/nni/metis_tuner/lib_acquisition_function.py b/src/sdk/pynni/nni/metis_tuner/lib_acquisition_function.py index 8beff1a6e6..476323c93f 100644 --- a/src/sdk/pynni/nni/metis_tuner/lib_acquisition_function.py +++ b/src/sdk/pynni/nni/metis_tuner/lib_acquisition_function.py @@ -16,7 +16,11 @@ # BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +""" +lib_acquisition_function.py +""" import sys import numpy @@ -33,9 +37,9 @@ def next_hyperparameter_expected_improvement(fun_prediction, samples_y_aggregation, minimize_starting_points, minimize_constraints_fun=None): - ''' + """ "Expected Improvement" acquisition function - ''' + """ best_x = None best_acquisition_value = None x_bounds_minmax = [[i[0], i[-1]] for i in x_bounds] @@ -70,6 +74,7 @@ def next_hyperparameter_expected_improvement(fun_prediction, return outputs + def _expected_improvement(x, fun_prediction, fun_prediction_args, x_bounds, x_types, samples_y_aggregation, minimize_constraints_fun): @@ -77,7 +82,8 @@ def _expected_improvement(x, fun_prediction, fun_prediction_args, x = lib_data.match_val_type(x, x_bounds, x_types) expected_improvement = sys.maxsize - if (minimize_constraints_fun is None) or (minimize_constraints_fun(x) is True): + if (minimize_constraints_fun is None) or ( + minimize_constraints_fun(x) is True): mu, sigma = fun_prediction(x, *fun_prediction_args) loss_optimum = min(samples_y_aggregation) @@ -87,7 +93,7 @@ def _expected_improvement(x, fun_prediction, fun_prediction_args, with numpy.errstate(divide="ignore"): Z = scaling_factor * (mu - loss_optimum) / sigma expected_improvement = scaling_factor * (mu - loss_optimum) * \ - norm.cdf(Z) + sigma * norm.pdf(Z) + norm.cdf(Z) + sigma * norm.pdf(Z) expected_improvement = 0.0 if sigma == 0.0 else expected_improvement # We want expected_improvement to be as large as possible @@ -101,9 +107,9 @@ def next_hyperparameter_lowest_confidence(fun_prediction, x_bounds, x_types, minimize_starting_points, minimize_constraints_fun=None): - ''' + """ "Lowest Confidence" acquisition function - ''' + """ best_x = None best_acquisition_value = None x_bounds_minmax = [[i[0], i[-1]] for i in x_bounds] @@ -120,10 +126,12 @@ def next_hyperparameter_lowest_confidence(fun_prediction, x_types, minimize_constraints_fun)) - if (best_acquisition_value) is None or (res.fun < best_acquisition_value): + if (best_acquisition_value) is None or ( + res.fun < best_acquisition_value): res.x = numpy.ndarray.tolist(res.x) res.x = lib_data.match_val_type(res.x, x_bounds, x_types) - if (minimize_constraints_fun is None) or (minimize_constraints_fun(res.x) is True): + if (minimize_constraints_fun is None) or ( + minimize_constraints_fun(res.x) is True): best_acquisition_value = res.fun best_x = res.x @@ -134,13 +142,15 @@ def next_hyperparameter_lowest_confidence(fun_prediction, 'expected_sigma': sigma, 'acquisition_func': "lc"} return outputs + def _lowest_confidence(x, fun_prediction, fun_prediction_args, x_bounds, x_types, minimize_constraints_fun): # This is only for step-wise optimization x = lib_data.match_val_type(x, x_bounds, x_types) ci = sys.maxsize - if (minimize_constraints_fun is None) or (minimize_constraints_fun(x) is True): + if (minimize_constraints_fun is None) or ( + minimize_constraints_fun(x) is True): mu, sigma = fun_prediction(x, *fun_prediction_args) ci = (sigma * 1.96 * 2) / mu # We want ci to be as large as possible @@ -156,9 +166,9 @@ def next_hyperparameter_lowest_mu(fun_prediction, x_bounds, x_types, minimize_starting_points, minimize_constraints_fun=None): - ''' + """ "Lowest Mu" acquisition function - ''' + """ best_x = None best_acquisition_value = None x_bounds_minmax = [[i[0], i[-1]] for i in x_bounds] @@ -169,13 +179,15 @@ def next_hyperparameter_lowest_mu(fun_prediction, x0=starting_point.reshape(1, -1), bounds=x_bounds_minmax, method="L-BFGS-B", - args=(fun_prediction, fun_prediction_args, \ + args=(fun_prediction, fun_prediction_args, x_bounds, x_types, minimize_constraints_fun)) - if (best_acquisition_value is None) or (res.fun < best_acquisition_value): + if (best_acquisition_value is None) or ( + res.fun < best_acquisition_value): res.x = numpy.ndarray.tolist(res.x) res.x = lib_data.match_val_type(res.x, x_bounds, x_types) - if (minimize_constraints_fun is None) or (minimize_constraints_fun(res.x) is True): + if (minimize_constraints_fun is None) or ( + minimize_constraints_fun(res.x) is True): best_acquisition_value = res.fun best_x = res.x @@ -189,14 +201,14 @@ def next_hyperparameter_lowest_mu(fun_prediction, def _lowest_mu(x, fun_prediction, fun_prediction_args, x_bounds, x_types, minimize_constraints_fun): - ''' + """ Calculate the lowest mu - ''' + """ # This is only for step-wise optimization x = lib_data.match_val_type(x, x_bounds, x_types) mu = sys.maxsize - if (minimize_constraints_fun is None) or (minimize_constraints_fun(x) is True): + if (minimize_constraints_fun is None) or ( + minimize_constraints_fun(x) is True): mu, _ = fun_prediction(x, *fun_prediction_args) return mu - \ No newline at end of file diff --git a/src/sdk/pynni/nni/metis_tuner/lib_constraint_summation.py b/src/sdk/pynni/nni/metis_tuner/lib_constraint_summation.py index 1e9daaee95..cc385e9afc 100644 --- a/src/sdk/pynni/nni/metis_tuner/lib_constraint_summation.py +++ b/src/sdk/pynni/nni/metis_tuner/lib_constraint_summation.py @@ -16,7 +16,11 @@ # BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +""" +lib_constraint_summation.py +""" import math import random @@ -39,6 +43,7 @@ def check_feasibility(x_bounds, lowerbound, upperbound): return (x_bounds_lowerbound <= lowerbound <= x_bounds_upperbound) or \ (x_bounds_lowerbound <= upperbound <= x_bounds_upperbound) + def rand(x_bounds, x_types, lowerbound, upperbound, max_retries=100): ''' Key idea is that we try to move towards upperbound, by randomly choose one @@ -55,7 +60,8 @@ def rand(x_bounds, x_types, lowerbound, upperbound, max_retries=100): if x_types[i] == "discrete_int": x_idx_sorted.append([i, len(x_bounds[i])]) elif (x_types[i] == "range_int") or (x_types[i] == "range_continuous"): - x_idx_sorted.append([i, math.floor(x_bounds[i][1] - x_bounds[i][0])]) + x_idx_sorted.append( + [i, math.floor(x_bounds[i][1] - x_bounds[i][0])]) x_idx_sorted = sorted(x_idx_sorted, key=itemgetter(1)) for _ in range(max_retries): @@ -77,12 +83,13 @@ def rand(x_bounds, x_types, lowerbound, upperbound, max_retries=100): temp.append(j) # Randomly pick a number from the integer array if temp: - outputs[x_idx] = temp[random.randint(0, len(temp) - 1)] + outputs[x_idx] = temp[random.randint( + 0, len(temp) - 1)] elif (x_types[x_idx] == "range_int") or \ - (x_types[x_idx] == "range_continuous"): - outputs[x_idx] = random.randint(x_bounds[x_idx][0], - min(x_bounds[x_idx][-1], budget_max)) + (x_types[x_idx] == "range_continuous"): + outputs[x_idx] = random.randint( + x_bounds[x_idx][0], min(x_bounds[x_idx][-1], budget_max)) else: # The last x that we need to assign a random number @@ -91,26 +98,28 @@ def rand(x_bounds, x_types, lowerbound, upperbound, max_retries=100): # This check: # is our smallest possible value going to overflow the available budget space, - # and is our largest possible value going to underflow the lower bound + # and is our largest possible value going to underflow the + # lower bound if (x_bounds[x_idx][0] <= budget_max) and \ (x_bounds[x_idx][-1] >= randint_lowerbound): if x_types[x_idx] == "discrete_int": temp = [] for j in x_bounds[x_idx]: - # if (j <= budget_max) and (j >= randint_lowerbound): + # if (j <= budget_max) and (j >= + # randint_lowerbound): if randint_lowerbound <= j <= budget_max: temp.append(j) if temp: - outputs[x_idx] = temp[random.randint(0, len(temp) - 1)] + outputs[x_idx] = temp[random.randint( + 0, len(temp) - 1)] elif (x_types[x_idx] == "range_int") or \ (x_types[x_idx] == "range_continuous"): - outputs[x_idx] = random.randint(randint_lowerbound, - min(x_bounds[x_idx][1], budget_max)) + outputs[x_idx] = random.randint( + randint_lowerbound, min( + x_bounds[x_idx][1], budget_max)) if outputs[x_idx] is None: break - else: - budget_allocated += outputs[x_idx] + budget_allocated += outputs[x_idx] if None not in outputs: break return outputs - \ No newline at end of file diff --git a/src/sdk/pynni/nni/metis_tuner/lib_data.py b/src/sdk/pynni/nni/metis_tuner/lib_data.py index 6256dfc69a..0ff6dbdc55 100644 --- a/src/sdk/pynni/nni/metis_tuner/lib_data.py +++ b/src/sdk/pynni/nni/metis_tuner/lib_data.py @@ -16,7 +16,8 @@ # BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. import math import random @@ -56,7 +57,7 @@ def rand(x_bounds, x_types): temp = x_bounds[i][random.randint(0, len(x_bounds[i]) - 1)] outputs.append(temp) elif x_types[i] == "range_int": - temp = random.randint(x_bounds[i][0], x_bounds[i][1] -1) + temp = random.randint(x_bounds[i][0], x_bounds[i][1] - 1) outputs.append(temp) elif x_types[i] == "range_continuous": temp = random.uniform(x_bounds[i][0], x_bounds[i][1]) diff --git a/src/sdk/pynni/nni/metis_tuner/metis_tuner.py b/src/sdk/pynni/nni/metis_tuner/metis_tuner.py index d9ac3415fb..600cca99b5 100644 --- a/src/sdk/pynni/nni/metis_tuner/metis_tuner.py +++ b/src/sdk/pynni/nni/metis_tuner/metis_tuner.py @@ -16,7 +16,11 @@ # BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +""" +metis_tuner.py +""" import copy import logging @@ -51,10 +55,45 @@ class MetisTuner(Tuner): More algorithm information you could reference here: https://www.microsoft.com/en-us/research/publication/metis-robustly-tuning-tail-latencies-cloud-systems/ + + Attributes + ---------- + optimize_mode : str + optimize_mode is a string that including two mode "maximize" and "minimize" + + no_resampling : bool + True or False. + Should Metis consider re-sampling as part of the search strategy? + If you are confident that the training dataset is noise-free, + then you do not need re-sampling. + + no_candidates : bool + True or False. + Should Metis suggest parameters for the next benchmark? + If you do not plan to do more benchmarks, + Metis can skip this step. + + selection_num_starting_points : int + How many times Metis should try to find the global optimal in the search space? + The higher the number, the longer it takes to output the solution. + + cold_start_num : int + Metis need some trial result to get cold start. + when the number of trial result is less than + cold_start_num, Metis will randomly sample hyper-parameter for trial. + + exploration_probability: float + The probability of Metis to select parameter from exploration instead of exploitation. """ - def __init__(self, optimize_mode="maximize", no_resampling=True, no_candidates=False, - selection_num_starting_points=600, cold_start_num=10, exploration_probability=0.9): + def __init__( + self, + optimize_mode="maximize", + no_resampling=True, + no_candidates=False, + selection_num_starting_points=600, + cold_start_num=10, + exploration_probability=0.9): """ Parameters ---------- @@ -62,23 +101,34 @@ def __init__(self, optimize_mode="maximize", no_resampling=True, no_candidates=F optimize_mode is a string that including two mode "maximize" and "minimize" no_resampling : bool - True or False. Should Metis consider re-sampling as part of the search strategy? - If you are confident that the training dataset is noise-free, then you do not need re-sampling. - - no_candidates: bool - True or False. Should Metis suggest parameters for the next benchmark? - If you do not plan to do more benchmarks, Metis can skip this step. - - selection_num_starting_points: int - how many times Metis should try to find the global optimal in the search space? - The higher the number, the longer it takes to output the solution. + True or False. + Should Metis consider re-sampling as part of the search strategy? + If you are confident that the training dataset is noise-free, + then you do not need re-sampling. + + no_candidates : bool + True or False. + Should Metis suggest parameters for the next benchmark? + If you do not plan to do more benchmarks, + Metis can skip this step. + + selection_num_starting_points : int + How many times Metis should try to find the global optimal in the search space? + The higher the number, the longer it takes to output the solution. + + cold_start_num : int + Metis need some trial result to get cold start. + when the number of trial result is less than + cold_start_num, Metis will randomly sample hyper-parameter for trial. + + exploration_probability : float + The probability of Metis to select parameter from exploration instead of exploitation. - cold_start_num: int - Metis need some trial result to get cold start. when the number of trial result is less than - cold_start_num, Metis will randomly sample hyper-parameter for trial. + x_bounds : list + The constration of parameters. - exploration_probability: float - The probability of Metis to select parameter from exploration instead of exploitation. + x_types : list + The type of parameters. """ self.samples_x = [] @@ -101,7 +151,8 @@ def __init__(self, optimize_mode="maximize", no_resampling=True, no_candidates=F def update_search_space(self, search_space): - """Update the self.x_bounds and self.x_types by the search_space.json + """ + Update the self.x_bounds and self.x_types by the search_space.json Parameters ---------- @@ -120,12 +171,20 @@ def update_search_space(self, search_space): key_range = search_space[key]['_value'] idx = self.key_order.index(key) if key_type == 'quniform': - if key_range[2] == 1 and key_range[0].is_integer() and key_range[1].is_integer(): - self.x_bounds[idx] = [key_range[0], key_range[1]+1] + if key_range[2] == 1 and key_range[0].is_integer( + ) and key_range[1].is_integer(): + self.x_bounds[idx] = [key_range[0], key_range[1] + 1] self.x_types[idx] = 'range_int' else: low, high, q = key_range - bounds = np.clip(np.arange(np.round(low/q), np.round(high/q)+1) * q, low, high) + bounds = np.clip( + np.arange( + np.round( + low / q), + np.round( + high / q) + 1) * q, + low, + high) self.x_bounds[idx] = bounds self.x_types[idx] = 'discrete_int' elif key_type == 'randint': @@ -139,22 +198,28 @@ def update_search_space(self, search_space): for key_value in key_range: if not isinstance(key_value, (int, float)): - raise RuntimeError("Metis Tuner only support numerical choice.") + raise RuntimeError( + "Metis Tuner only support numerical choice.") self.x_types[idx] = 'discrete_int' else: - logger.info("Metis Tuner doesn't support this kind of variable: %s", key_type) - raise RuntimeError("Metis Tuner doesn't support this kind of variable: " + str(key_type)) + logger.info( + "Metis Tuner doesn't support this kind of variable: %s", + str(key_type)) + raise RuntimeError( + "Metis Tuner doesn't support this kind of variable: %s" % + str(key_type)) else: logger.info("The format of search space is not a dict.") raise RuntimeError("The format of search space is not a dict.") - self.minimize_starting_points = _rand_init(self.x_bounds, self.x_types, \ - self.selection_num_starting_points) + self.minimize_starting_points = _rand_init( + self.x_bounds, self.x_types, self.selection_num_starting_points) def _pack_output(self, init_parameter): - """Pack the output + """ + Pack the output Parameters ---------- @@ -167,14 +232,18 @@ def _pack_output(self, init_parameter): output = {} for i, param in enumerate(init_parameter): output[self.key_order[i]] = param + return output def generate_parameters(self, parameter_id, **kwargs): - """Generate next parameter for trial + """ + Generate next parameter for trial + If the number of trial result is lower than cold start number, metis will first random generate some parameters. - Otherwise, metis will choose the parameters by the Gussian Process Model and the Gussian Mixture Model. + Otherwise, metis will choose the parameters by + the Gussian Process Model and the Gussian Mixture Model. Parameters ---------- @@ -188,26 +257,34 @@ def generate_parameters(self, parameter_id, **kwargs): init_parameter = _rand_init(self.x_bounds, self.x_types, 1)[0] results = self._pack_output(init_parameter) else: - self.minimize_starting_points = _rand_init(self.x_bounds, self.x_types, \ - self.selection_num_starting_points) - results = self._selection(self.samples_x, self.samples_y_aggregation, self.samples_y, - self.x_bounds, self.x_types, - threshold_samplessize_resampling=(None if self.no_resampling is True else 50), - no_candidates=self.no_candidates, - minimize_starting_points=self.minimize_starting_points, - minimize_constraints_fun=self.minimize_constraints_fun) - - logger.info("Generate paramageters:\n%s", results) + self.minimize_starting_points = _rand_init( + self.x_bounds, self.x_types, self.selection_num_starting_points) + results = self._selection( + self.samples_x, + self.samples_y_aggregation, + self.samples_y, + self.x_bounds, + self.x_types, + threshold_samplessize_resampling=( + None if self.no_resampling is True else 50), + no_candidates=self.no_candidates, + minimize_starting_points=self.minimize_starting_points, + minimize_constraints_fun=self.minimize_constraints_fun) + + logger.info("Generate paramageters: \n%s", str(results)) return results def receive_trial_result(self, parameter_id, parameters, value, **kwargs): - """Tuner receive result from trial. + """ + Tuner receive result from trial. Parameters ---------- parameter_id : int + The id of parameters, generated by nni manager. parameters : dict + A group of parameters that trial has tried. value : dict/float if value is dict, it should have "default" key. """ @@ -216,8 +293,8 @@ def receive_trial_result(self, parameter_id, parameters, value, **kwargs): value = -value logger.info("Received trial result.") - logger.info("value is :%s", value) - logger.info("parameter is : %s", parameters) + logger.info("value is : %s", str(value)) + logger.info("parameter is : %s", str(parameters)) # parse parameter to sample_x sample_x = [0 for i in range(len(self.key_order))] @@ -244,11 +321,19 @@ def receive_trial_result(self, parameter_id, parameters, value, **kwargs): self.samples_y_aggregation.append([value]) - def _selection(self, samples_x, samples_y_aggregation, samples_y, - x_bounds, x_types, max_resampling_per_x=3, - threshold_samplessize_exploitation=12, - threshold_samplessize_resampling=50, no_candidates=False, - minimize_starting_points=None, minimize_constraints_fun=None): + def _selection( + self, + samples_x, + samples_y_aggregation, + samples_y, + x_bounds, + x_types, + max_resampling_per_x=3, + threshold_samplessize_exploitation=12, + threshold_samplessize_resampling=50, + no_candidates=False, + minimize_starting_points=None, + minimize_constraints_fun=None): with warnings.catch_warnings(): warnings.simplefilter("ignore") @@ -259,7 +344,8 @@ def _selection(self, samples_x, samples_y_aggregation, samples_y, samples_size_unique = len(samples_y) # ===== STEP 1: Compute the current optimum ===== - gp_model = gp_create_model.create_model(samples_x, samples_y_aggregation) + gp_model = gp_create_model.create_model( + samples_x, samples_y_aggregation) lm_current = gp_selection.selection( "lm", samples_y_aggregation, @@ -278,7 +364,7 @@ def _selection(self, samples_x, samples_y_aggregation, samples_y, }) if no_candidates is False: - # ===== STEP 2: Get recommended configurations for exploration ===== + # ===== STEP 2: Get recommended configurations for exploration ==== results_exploration = gp_selection.selection( "lc", samples_y_aggregation, @@ -303,25 +389,31 @@ def _selection(self, samples_x, samples_y_aggregation, samples_y, else: logger.info("DEBUG: No suitable exploration candidates were") - # ===== STEP 3: Get recommended configurations for exploitation ===== + # ===== STEP 3: Get recommended configurations for exploitation === if samples_size_all >= threshold_samplessize_exploitation: logger.info("Getting candidates for exploitation...\n") try: - gmm = gmm_create_model.create_model(samples_x, samples_y_aggregation) + gmm = gmm_create_model.create_model( + samples_x, samples_y_aggregation) if ("discrete_int" in x_types) or ("range_int" in x_types): - results_exploitation = gmm_selection.selection(x_bounds, x_types, - gmm['clusteringmodel_good'], - gmm['clusteringmodel_bad'], - minimize_starting_points, - minimize_constraints_fun=minimize_constraints_fun) + results_exploitation = gmm_selection.selection( + x_bounds, + x_types, + gmm['clusteringmodel_good'], + gmm['clusteringmodel_bad'], + minimize_starting_points, + minimize_constraints_fun=minimize_constraints_fun) else: - # If all parameters are of "range_continuous", let's use GMM to generate random starting points - results_exploitation = gmm_selection.selection_r(x_bounds, x_types, - gmm['clusteringmodel_good'], - gmm['clusteringmodel_bad'], - num_starting_points=self.selection_num_starting_points, - minimize_constraints_fun=minimize_constraints_fun) + # If all parameters are of "range_continuous", + # let's use GMM to generate random starting points + results_exploitation = gmm_selection.selection_r( + x_bounds, + x_types, + gmm['clusteringmodel_good'], + gmm['clusteringmodel_bad'], + num_starting_points=self.selection_num_starting_points, + minimize_constraints_fun=minimize_constraints_fun) if results_exploitation is not None: if _num_past_samples(results_exploitation['hyperparameter'], samples_x, samples_y) == 0: @@ -335,24 +427,30 @@ def _selection(self, samples_x, samples_y_aggregation, samples_y, } candidates.append(temp_candidate) - logger.info("DEBUG: 1 exploitation_gmm candidate selected\n") + logger.info( + "DEBUG: 1 exploitation_gmm candidate selected\n") logger.info(temp_candidate) else: - logger.info("DEBUG: No suitable exploitation_gmm candidates were found\n") + logger.info( + "DEBUG: No suitable exploitation_gmm candidates were found\n") except ValueError as exception: # The exception: ValueError: Fitting the mixture model failed # because some components have ill-defined empirical covariance # (for instance caused by singleton or collapsed samples). - # Try to decrease the number of components, or increase reg_covar. - logger.info("DEBUG: No suitable exploitation_gmm candidates were found due to exception.") + # Try to decrease the number of components, or increase + # reg_covar. + logger.info( + "DEBUG: No suitable exploitation_gmm \ + candidates were found due to exception.") logger.info(exception) # ===== STEP 4: Get a list of outliers ===== if (threshold_samplessize_resampling is not None) and \ - (samples_size_unique >= threshold_samplessize_resampling): + (samples_size_unique >= threshold_samplessize_resampling): logger.info("Getting candidates for re-sampling...\n") - results_outliers = gp_outlier_detection.outlierDetection_threaded(samples_x, samples_y_aggregation) + results_outliers = gp_outlier_detection.outlierDetection_threaded( + samples_x, samples_y_aggregation) if results_outliers is not None: for results_outlier in results_outliers: # pylint: disable=not-an-iterable @@ -365,11 +463,13 @@ def _selection(self, samples_x, samples_y_aggregation, samples_y, logger.info("DEBUG: %d re-sampling candidates selected\n") logger.info(temp_candidate) else: - logger.info("DEBUG: No suitable resampling candidates were found\n") + logger.info( + "DEBUG: No suitable resampling candidates were found\n") if candidates: - # ===== STEP 5: Compute the information gain of each candidate towards the optimum ===== - logger.info("Evaluating information gain of %d candidates...\n") + # ===== STEP 5: Compute the information gain of each candidate + logger.info( + "Evaluating information gain of %d candidates...\n") next_improvement = 0 threads_inputs = [[ @@ -377,36 +477,45 @@ def _selection(self, samples_x, samples_y_aggregation, samples_y, minimize_constraints_fun, minimize_starting_points ] for candidate in candidates] threads_pool = ThreadPool(4) - # Evaluate what would happen if we actually sample each candidate - threads_results = threads_pool.map(_calculate_lowest_mu_threaded, threads_inputs) + # Evaluate what would happen if we actually sample each + # candidate + threads_results = threads_pool.map( + _calculate_lowest_mu_threaded, threads_inputs) threads_pool.close() threads_pool.join() for threads_result in threads_results: if threads_result['expected_lowest_mu'] < lm_current['expected_mu']: # Information gain - temp_improvement = threads_result['expected_lowest_mu'] - lm_current['expected_mu'] + temp_improvement = threads_result['expected_lowest_mu'] - \ + lm_current['expected_mu'] if next_improvement > temp_improvement: next_improvement = temp_improvement next_candidate = threads_result['candidate'] else: - # ===== STEP 6: If we have no candidates, randomly pick one ===== + # ===== STEP 6: If we have no candidates, randomly pick one === logger.info( "DEBUG: No candidates from exploration, exploitation,\ and resampling. We will random a candidate for next_candidate\n" ) - next_candidate = _rand_with_constraints(x_bounds, x_types) \ - if minimize_starting_points is None else minimize_starting_points[0] - next_candidate = lib_data.match_val_type(next_candidate, x_bounds, x_types) - expected_mu, expected_sigma = gp_prediction.predict(next_candidate, gp_model['model']) - next_candidate = {'hyperparameter': next_candidate, 'reason': "random", - 'expected_mu': expected_mu, 'expected_sigma': expected_sigma} - - # ===== STEP 7 ===== - # If current optimal hyperparameter occurs in the history or exploration probability is less than the threshold, - # take next config as exploration step + next_candidate = _rand_with_constraints( + x_bounds, + x_types) if minimize_starting_points is None else minimize_starting_points[0] + next_candidate = lib_data.match_val_type( + next_candidate, x_bounds, x_types) + expected_mu, expected_sigma = gp_prediction.predict( + next_candidate, gp_model['model']) + next_candidate = { + 'hyperparameter': next_candidate, + 'reason': "random", + 'expected_mu': expected_mu, + 'expected_sigma': expected_sigma} + + # STEP 7: If current optimal hyperparameter occurs in the history + # or exploration probability is less than the threshold, take next + # config as exploration step outputs = self._pack_output(lm_current['hyperparameter']) ap = random.uniform(0, 1) if outputs in self.total_data or ap <= self.exploration_probability: @@ -419,11 +528,13 @@ def _selection(self, samples_x, samples_y_aggregation, samples_y, return outputs def import_data(self, data): - """Import additional data for tuning + """ + Import additional data for tuning + Parameters ---------- - data: - a list of dictionarys, each of which has at least two keys, 'parameter' and 'value' + data : a list of dict + each of which has at least two keys: 'parameter' and 'value'. """ _completed_num = 0 for trial_info in data: @@ -437,18 +548,26 @@ def import_data(self, data): logger.info("Useless trial data, value is %s, skip this trial data.", _value) continue self.supplement_data_num += 1 - _parameter_id = '_'.join(["ImportData", str(self.supplement_data_num)]) + _parameter_id = '_'.join( + ["ImportData", str(self.supplement_data_num)]) self.total_data.append(_params) - self.receive_trial_result(parameter_id=_parameter_id, parameters=_params, value=_value) + self.receive_trial_result( + parameter_id=_parameter_id, + parameters=_params, + value=_value) logger.info("Successfully import data to metis tuner.") + def _rand_with_constraints(x_bounds, x_types): outputs = None x_bounds_withconstraints = [x_bounds[i] for i in CONSTRAINT_PARAMS_IDX] x_types_withconstraints = [x_types[i] for i in CONSTRAINT_PARAMS_IDX] - x_val_withconstraints = lib_constraint_summation.rand(x_bounds_withconstraints,\ - x_types_withconstraints, CONSTRAINT_LOWERBOUND, CONSTRAINT_UPPERBOUND) + x_val_withconstraints = lib_constraint_summation.rand( + x_bounds_withconstraints, + x_types_withconstraints, + CONSTRAINT_LOWERBOUND, + CONSTRAINT_UPPERBOUND) if not x_val_withconstraints: outputs = [None] * len(x_bounds) @@ -462,12 +581,18 @@ def _rand_with_constraints(x_bounds, x_types): def _calculate_lowest_mu_threaded(inputs): - [candidate, samples_x, samples_y, x_bounds, x_types, minimize_constraints_fun, minimize_starting_points] = inputs + [candidate, samples_x, samples_y, x_bounds, x_types, + minimize_constraints_fun, minimize_starting_points] = inputs outputs = {"candidate": candidate, "expected_lowest_mu": None} - for expected_mu in [candidate['expected_mu'] + 1.96 * candidate['expected_sigma'], - candidate['expected_mu'] - 1.96 * candidate['expected_sigma']]: + for expected_mu in [ + candidate['expected_mu'] + + 1.96 * + candidate['expected_sigma'], + candidate['expected_mu'] - + 1.96 * + candidate['expected_sigma']]: temp_samples_x = copy.deepcopy(samples_x) temp_samples_y = copy.deepcopy(samples_y) @@ -480,8 +605,10 @@ def _calculate_lowest_mu_threaded(inputs): temp_samples_y.append([expected_mu]) # Aggregates multiple observation of the sample sampling points - temp_y_aggregation = [statistics.median(temp_sample_y) for temp_sample_y in temp_samples_y] - temp_gp = gp_create_model.create_model(temp_samples_x, temp_y_aggregation) + temp_y_aggregation = [statistics.median( + temp_sample_y) for temp_sample_y in temp_samples_y] + temp_gp = gp_create_model.create_model( + temp_samples_x, temp_y_aggregation) temp_results = gp_selection.selection( "lm", temp_y_aggregation, @@ -491,7 +618,8 @@ def _calculate_lowest_mu_threaded(inputs): minimize_starting_points, minimize_constraints_fun=minimize_constraints_fun) - if outputs["expected_lowest_mu"] is None or outputs["expected_lowest_mu"] > temp_results['expected_mu']: + if outputs["expected_lowest_mu"] is None \ + or outputs["expected_lowest_mu"] > temp_results['expected_mu']: outputs["expected_lowest_mu"] = temp_results['expected_mu'] return outputs @@ -510,18 +638,19 @@ def _rand_init(x_bounds, x_types, selection_num_starting_points): ''' Random sample some init seed within bounds. ''' - return [lib_data.rand(x_bounds, x_types) for i \ - in range(0, selection_num_starting_points)] + return [lib_data.rand(x_bounds, x_types) for i + in range(0, selection_num_starting_points)] def get_median(temp_list): - """Return median + """ + Return median """ num = len(temp_list) temp_list.sort() print(temp_list) if num % 2 == 0: - median = (temp_list[int(num/2)] + temp_list[int(num/2) - 1]) / 2 + median = (temp_list[int(num / 2)] + temp_list[int(num / 2) - 1]) / 2 else: - median = temp_list[int(num/2)] + median = temp_list[int(num / 2)] return median diff --git a/src/sdk/pynni/nni/networkmorphism_tuner/bayesian.py b/src/sdk/pynni/nni/networkmorphism_tuner/bayesian.py index 360771139a..15c5e83cdd 100644 --- a/src/sdk/pynni/nni/networkmorphism_tuner/bayesian.py +++ b/src/sdk/pynni/nni/networkmorphism_tuner/bayesian.py @@ -38,7 +38,7 @@ def layer_distance(a, b): """The distance between two layers.""" # pylint: disable=unidiomatic-typecheck - if type(a) != type(b): + if not isinstance(a, type(b)): return 1.0 if is_layer(a, "Conv"): att_diff = [ @@ -96,7 +96,8 @@ def skip_connection_distance(a, b): return 1.0 len_a = abs(a[1] - a[0]) len_b = abs(b[1] - b[0]) - return (abs(a[0] - b[0]) + abs(len_a - len_b)) / (max(a[0], b[0]) + max(len_a, len_b)) + return (abs(a[0] - b[0]) + abs(len_a - len_b)) / \ + (max(a[0], b[0]) + max(len_a, len_b)) def skip_connections_distance(list_a, list_b): @@ -161,7 +162,8 @@ def fit(self, train_x, train_y): def incremental_fit(self, train_x, train_y): """ Incrementally fit the regressor. """ if not self._first_fitted: - raise ValueError("The first_fit function needs to be called first.") + raise ValueError( + "The first_fit function needs to be called first.") train_x, train_y = np.array(train_x), np.array(train_y) @@ -174,7 +176,7 @@ def incremental_fit(self, train_x, train_y): temp_distance_matrix = np.concatenate((up_k, down_k), axis=0) k_matrix = bourgain_embedding_matrix(temp_distance_matrix) diagonal = np.diag_indices_from(k_matrix) - diagonal = (diagonal[0][-len(train_x) :], diagonal[1][-len(train_x) :]) + diagonal = (diagonal[0][-len(train_x):], diagonal[1][-len(train_x):]) k_matrix[diagonal] += self.alpha try: @@ -186,7 +188,8 @@ def incremental_fit(self, train_x, train_y): self._y = np.concatenate((self._y, train_y), axis=0) self._distance_matrix = temp_distance_matrix - self._alpha_vector = cho_solve((self._l_matrix, True), self._y) # Line 3 + self._alpha_vector = cho_solve( + (self._l_matrix, True), self._y) # Line 3 return self @@ -209,7 +212,8 @@ def first_fit(self, train_x, train_y): self._l_matrix = cholesky(k_matrix, lower=True) # Line 2 - self._alpha_vector = cho_solve((self._l_matrix, True), self._y) # Line 3 + self._alpha_vector = cho_solve( + (self._l_matrix, True), self._y) # Line 3 self._first_fitted = True return self @@ -227,7 +231,9 @@ def predict(self, train_x): # compute inverse K_inv of K based on its Cholesky # decomposition L and its inverse L_inv - l_inv = solve_triangular(self._l_matrix.T, np.eye(self._l_matrix.shape[0])) + l_inv = solve_triangular( + self._l_matrix.T, np.eye( + self._l_matrix.shape[0])) k_inv = l_inv.dot(l_inv.T) # Compute variance of predictive distribution y_var = np.ones(len(train_x), dtype=np.float) @@ -378,7 +384,11 @@ def generate(self, descriptors): continue temp_acq_value = self.acq(temp_graph) - pq.put(elem_class(temp_acq_value, elem.father_id, temp_graph)) + pq.put( + elem_class( + temp_acq_value, + elem.father_id, + temp_graph)) descriptors.append(temp_graph.extract_descriptor()) if self._accept_new_acq_value(opt_acq, temp_acq_value): opt_acq = temp_acq_value diff --git a/src/sdk/pynni/nni/networkmorphism_tuner/graph.py b/src/sdk/pynni/nni/networkmorphism_tuner/graph.py index 3d951a1965..abf825c48a 100644 --- a/src/sdk/pynni/nni/networkmorphism_tuner/graph.py +++ b/src/sdk/pynni/nni/networkmorphism_tuner/graph.py @@ -249,7 +249,8 @@ def _redirect_edge(self, u_id, v_id, new_v_id): self.reverse_adj_list[v_id].remove(edge_tuple) break self.reverse_adj_list[new_v_id].append((u_id, layer_id)) - for index, value in enumerate(self.layer_id_to_output_node_ids[layer_id]): + for index, value in enumerate( + self.layer_id_to_output_node_ids[layer_id]): if value == v_id: self.layer_id_to_output_node_ids[layer_id][index] = new_v_id break @@ -350,7 +351,8 @@ def _search(self, u, start_dim, total_dim, n_add): self._replace_layer(layer_id, new_layer) elif is_layer(layer, "BatchNormalization"): - new_layer = wider_bn(layer, start_dim, total_dim, n_add, self.weighted) + new_layer = wider_bn( + layer, start_dim, total_dim, n_add, self.weighted) self._replace_layer(layer_id, new_layer) self._search(v, start_dim, total_dim, n_add) @@ -405,7 +407,8 @@ def to_deeper_model(self, target_id, new_layer): target_id: A convolutional layer ID. The new block should be inserted after the block. new_layer: An instance of StubLayer subclasses. """ - self.operation_history.append(("to_deeper_model", target_id, new_layer)) + self.operation_history.append( + ("to_deeper_model", target_id, new_layer)) input_id = self.layer_id_to_input_node_ids[target_id][0] output_id = self.layer_id_to_output_node_ids[target_id][0] if self.weighted: @@ -478,14 +481,20 @@ def to_add_skip_model(self, start_id, end_id): pre_end_node_id = self.layer_id_to_input_node_ids[end_id][0] end_node_id = self.layer_id_to_output_node_ids[end_id][0] - skip_output_id = self._insert_pooling_layer_chain(start_node_id, end_node_id) + skip_output_id = self._insert_pooling_layer_chain( + start_node_id, end_node_id) # Add the conv layer - new_conv_layer = get_conv_class(self.n_dim)(filters_start, filters_end, 1) + new_conv_layer = get_conv_class( + self.n_dim)( + filters_start, + filters_end, + 1) skip_output_id = self.add_layer(new_conv_layer, skip_output_id) # Add the add layer. - add_input_node_id = self._add_node(deepcopy(self.node_list[end_node_id])) + add_input_node_id = self._add_node( + deepcopy(self.node_list[end_node_id])) add_layer = StubAdd() self._redirect_edge(pre_end_node_id, end_node_id, add_input_node_id) @@ -504,7 +513,8 @@ def to_add_skip_model(self, start_id, end_id): weights = np.zeros((filters_end, filters_start) + filter_shape) bias = np.zeros(filters_end) new_conv_layer.set_weights( - (add_noise(weights, np.array([0, 1])), add_noise(bias, np.array([0, 1]))) + (add_noise(weights, np.array([0, 1])), add_noise( + bias, np.array([0, 1]))) ) def to_concat_skip_model(self, start_id, end_id): @@ -513,7 +523,8 @@ def to_concat_skip_model(self, start_id, end_id): start_id: The convolutional layer ID, after which to start the skip-connection. end_id: The convolutional layer ID, after which to end the skip-connection. """ - self.operation_history.append(("to_concat_skip_model", start_id, end_id)) + self.operation_history.append( + ("to_concat_skip_model", start_id, end_id)) filters_end = self.layer_list[end_id].output.shape[-1] filters_start = self.layer_list[start_id].output.shape[-1] start_node_id = self.layer_id_to_output_node_ids[start_id][0] @@ -521,9 +532,11 @@ def to_concat_skip_model(self, start_id, end_id): pre_end_node_id = self.layer_id_to_input_node_ids[end_id][0] end_node_id = self.layer_id_to_output_node_ids[end_id][0] - skip_output_id = self._insert_pooling_layer_chain(start_node_id, end_node_id) + skip_output_id = self._insert_pooling_layer_chain( + start_node_id, end_node_id) - concat_input_node_id = self._add_node(deepcopy(self.node_list[end_node_id])) + concat_input_node_id = self._add_node( + deepcopy(self.node_list[end_node_id])) self._redirect_edge(pre_end_node_id, end_node_id, concat_input_node_id) concat_layer = StubConcatenate() @@ -532,7 +545,10 @@ def to_concat_skip_model(self, start_id, end_id): self.node_list[skip_output_id], ] concat_output_node_id = self._add_node(Node(concat_layer.output_shape)) - self._add_edge(concat_layer, concat_input_node_id, concat_output_node_id) + self._add_edge( + concat_layer, + concat_input_node_id, + concat_output_node_id) self._add_edge(concat_layer, skip_output_id, concat_output_node_id) concat_layer.output = self.node_list[concat_output_node_id] self.node_list[concat_output_node_id].shape = concat_layer.output_shape @@ -559,7 +575,8 @@ def to_concat_skip_model(self, start_id, end_id): ) bias = np.zeros(filters_end) new_conv_layer.set_weights( - (add_noise(weights, np.array([0, 1])), add_noise(bias, np.array([0, 1]))) + (add_noise(weights, np.array([0, 1])), add_noise( + bias, np.array([0, 1]))) ) def _insert_pooling_layer_chain(self, start_node_id, end_node_id): @@ -568,7 +585,8 @@ def _insert_pooling_layer_chain(self, start_node_id, end_node_id): new_layer = deepcopy(layer) if is_layer(new_layer, "Conv"): filters = self.node_list[start_node_id].shape[-1] - new_layer = get_conv_class(self.n_dim)(filters, filters, 1, layer.stride) + new_layer = get_conv_class(self.n_dim)( + filters, filters, 1, layer.stride) if self.weighted: init_conv_weight(new_layer) else: @@ -601,8 +619,10 @@ def extract_descriptor(self): temp_v = v temp_layer_id = layer_id skip_type = None - while not (temp_v in index_in_main_chain and temp_u in index_in_main_chain): - if is_layer(self.layer_list[temp_layer_id], "Concatenate"): + while not ( + temp_v in index_in_main_chain and temp_u in index_in_main_chain): + if is_layer( + self.layer_list[temp_layer_id], "Concatenate"): skip_type = NetworkDescriptor.CONCAT_CONNECT if is_layer(self.layer_list[temp_layer_id], "Add"): skip_type = NetworkDescriptor.ADD_CONNECT @@ -711,7 +731,8 @@ def deep_layer_ids(self): def wide_layer_ids(self): return ( - self._conv_layer_ids_in_order()[:-1] + self._dense_layer_ids_in_order()[:-1] + self._conv_layer_ids_in_order( + )[:-1] + self._dense_layer_ids_in_order()[:-1] ) def skip_connection_layer_ids(self): @@ -810,7 +831,8 @@ def __init__(self, graph): topo_node_list = self.graph.topological_order output_id = topo_node_list[-1] input_id = topo_node_list[0] - input_tensor = keras.layers.Input(shape=graph.node_list[input_id].shape) + input_tensor = keras.layers.Input( + shape=graph.node_list[input_id].shape) node_list = deepcopy(self.graph.node_list) node_list[input_id] = input_tensor @@ -838,7 +860,8 @@ def __init__(self, graph): output_tensor = keras.layers.Activation("softmax", name="activation_add")( output_tensor ) - self.model = keras.models.Model(inputs=input_tensor, outputs=output_tensor) + self.model = keras.models.Model( + inputs=input_tensor, outputs=output_tensor) if graph.weighted: for index, layer in enumerate(self.layers): @@ -892,7 +915,8 @@ def __init__(self, graph): for layer_id, item in enumerate(graph.layer_list): layer = graph.layer_list[layer_id] - layer_information = layer_description_extractor(layer, graph.node_to_id) + layer_information = layer_description_extractor( + layer, graph.node_to_id) layer_list.append((layer_id, layer_information)) data["node_list"] = node_list @@ -938,7 +962,8 @@ def json_to_graph(json_model: str): graph.input_shape = input_shape vis = json_model["vis"] - graph.vis = {tuple(item): True for item in vis} if vis is not None else None + graph.vis = { + tuple(item): True for item in vis} if vis is not None else None graph.weighted = json_model["weighted"] layer_id_to_input_node_ids = json_model["layer_id_to_input_node_ids"] graph.layer_id_to_input_node_ids = { diff --git a/src/sdk/pynni/nni/networkmorphism_tuner/graph_transformer.py b/src/sdk/pynni/nni/networkmorphism_tuner/graph_transformer.py index a318188f3e..6b36e8ed97 100644 --- a/src/sdk/pynni/nni/networkmorphism_tuner/graph_transformer.py +++ b/src/sdk/pynni/nni/networkmorphism_tuner/graph_transformer.py @@ -40,7 +40,8 @@ def to_wider_graph(graph): ''' weighted_layer_ids = graph.wide_layer_ids() weighted_layer_ids = list( - filter(lambda x: graph.layer_list[x].output.shape[-1], weighted_layer_ids) + filter( + lambda x: graph.layer_list[x].output.shape[-1], weighted_layer_ids) ) wider_layers = sample(weighted_layer_ids, 1) @@ -58,12 +59,14 @@ def to_wider_graph(graph): def to_skip_connection_graph(graph): ''' skip connection graph ''' - # The last conv layer cannot be widen since wider operator cannot be done over the two sides of flatten. + # The last conv layer cannot be widen since wider operator cannot be done + # over the two sides of flatten. weighted_layer_ids = graph.skip_connection_layer_ids() valid_connection = [] - for skip_type in sorted([NetworkDescriptor.ADD_CONNECT, NetworkDescriptor.CONCAT_CONNECT]): + for skip_type in sorted( + [NetworkDescriptor.ADD_CONNECT, NetworkDescriptor.CONCAT_CONNECT]): for index_a in range(len(weighted_layer_ids)): - for index_b in range(len(weighted_layer_ids))[index_a + 1 :]: + for index_b in range(len(weighted_layer_ids))[index_a + 1:]: valid_connection.append((index_a, index_b, skip_type)) if not valid_connection: @@ -84,9 +87,14 @@ def create_new_layer(layer, n_dim): input_shape = layer.output.shape dense_deeper_classes = [StubDense, get_dropout_class(n_dim), StubReLU] - conv_deeper_classes = [get_conv_class(n_dim), get_batch_norm_class(n_dim), StubReLU] + conv_deeper_classes = [ + get_conv_class(n_dim), + get_batch_norm_class(n_dim), + StubReLU] if is_layer(layer, "ReLU"): - conv_deeper_classes = [get_conv_class(n_dim), get_batch_norm_class(n_dim)] + conv_deeper_classes = [ + get_conv_class(n_dim), + get_batch_norm_class(n_dim)] dense_deeper_classes = [StubDense, get_dropout_class(n_dim)] elif is_layer(layer, "Dropout"): dense_deeper_classes = [StubDense, StubReLU] diff --git a/src/sdk/pynni/nni/networkmorphism_tuner/layer_transformer.py b/src/sdk/pynni/nni/networkmorphism_tuner/layer_transformer.py index bd580a1cab..ab6d275fbe 100644 --- a/src/sdk/pynni/nni/networkmorphism_tuner/layer_transformer.py +++ b/src/sdk/pynni/nni/networkmorphism_tuner/layer_transformer.py @@ -52,7 +52,8 @@ def deeper_conv_block(conv_layer, kernel_size, weighted=True): if weighted: new_conv_layer.set_weights( - (add_noise(weight, np.array([0, 1])), add_noise(bias, np.array([0, 1]))) + (add_noise(weight, np.array([0, 1])), + add_noise(bias, np.array([0, 1]))) ) new_weights = [ add_noise(np.ones(n_filters, dtype=np.float32), np.array([0, 1])), @@ -74,7 +75,8 @@ def dense_to_deeper_block(dense_layer, weighted=True): new_dense_layer = StubDense(units, units) if weighted: new_dense_layer.set_weights( - (add_noise(weight, np.array([0, 1])), add_noise(bias, np.array([0, 1]))) + (add_noise(weight, np.array([0, 1])), + add_noise(bias, np.array([0, 1]))) ) return [StubReLU(), new_dense_layer] @@ -97,8 +99,11 @@ def wider_pre_dense(layer, n_add, weighted=True): teacher_index = rand[i] new_weight = teacher_w[teacher_index, :] new_weight = new_weight[np.newaxis, :] - student_w = np.concatenate((student_w, add_noise(new_weight, student_w)), axis=0) - student_b = np.append(student_b, add_noise(teacher_b[teacher_index], student_b)) + student_w = np.concatenate( + (student_w, add_noise(new_weight, student_w)), axis=0) + student_b = np.append( + student_b, add_noise( + teacher_b[teacher_index], student_b)) new_pre_layer = StubDense(layer.input_units, n_units2 + n_add) new_pre_layer.set_weights((student_w, student_b)) @@ -209,7 +214,7 @@ def wider_next_dense(layer, start_dim, total_dim, n_add, weighted=True): student_w[:, : start_dim * n_units_each_channel], add_noise(new_weight, student_w), student_w[ - :, start_dim * n_units_each_channel : total_dim * n_units_each_channel + :, start_dim * n_units_each_channel: total_dim * n_units_each_channel ], ), axis=1, @@ -225,7 +230,8 @@ def add_noise(weights, other_weights): ''' w_range = np.ptp(other_weights.flatten()) noise_range = NOISE_RATIO * w_range - noise = np.random.uniform(-noise_range / 2.0, noise_range / 2.0, weights.shape) + noise = np.random.uniform(-noise_range / 2.0, + noise_range / 2.0, weights.shape) return np.add(noise, weights) @@ -236,7 +242,8 @@ def init_dense_weight(layer): weight = np.eye(units) bias = np.zeros(units) layer.set_weights( - (add_noise(weight, np.array([0, 1])), add_noise(bias, np.array([0, 1]))) + (add_noise(weight, np.array([0, 1])), + add_noise(bias, np.array([0, 1]))) ) @@ -256,7 +263,8 @@ def init_conv_weight(layer): bias = np.zeros(n_filters) layer.set_weights( - (add_noise(weight, np.array([0, 1])), add_noise(bias, np.array([0, 1]))) + (add_noise(weight, np.array([0, 1])), + add_noise(bias, np.array([0, 1]))) ) diff --git a/src/sdk/pynni/nni/networkmorphism_tuner/layers.py b/src/sdk/pynni/nni/networkmorphism_tuner/layers.py index e2f0ac484c..d9c2e5d99e 100644 --- a/src/sdk/pynni/nni/networkmorphism_tuner/layers.py +++ b/src/sdk/pynni/nni/networkmorphism_tuner/layers.py @@ -28,8 +28,10 @@ class AvgPool(nn.Module): - '''AvgPool Module. - ''' + """ + AvgPool Module. + """ + def __init__(self): super().__init__() @@ -39,8 +41,10 @@ def forward(self, input_tensor): class GlobalAvgPool1d(AvgPool): - '''GlobalAvgPool1d Module. - ''' + """ + GlobalAvgPool1d Module. + """ + def forward(self, input_tensor): return functional.avg_pool1d(input_tensor, input_tensor.size()[2:]).view( input_tensor.size()[:2] @@ -48,8 +52,10 @@ def forward(self, input_tensor): class GlobalAvgPool2d(AvgPool): - '''GlobalAvgPool2d Module. - ''' + """ + GlobalAvgPool2d Module. + """ + def forward(self, input_tensor): return functional.avg_pool2d(input_tensor, input_tensor.size()[2:]).view( input_tensor.size()[:2] @@ -57,8 +63,10 @@ def forward(self, input_tensor): class GlobalAvgPool3d(AvgPool): - '''GlobalAvgPool3d Module. - ''' + """ + GlobalAvgPool3d Module. + """ + def forward(self, input_tensor): return functional.avg_pool3d(input_tensor, input_tensor.size()[2:]).view( input_tensor.size()[:2] @@ -66,70 +74,86 @@ def forward(self, input_tensor): class StubLayer: - '''StubLayer Module. Base Module. - ''' + """ + StubLayer Module. Base Module. + """ + def __init__(self, input_node=None, output_node=None): self.input = input_node self.output = output_node self.weights = None def build(self, shape): - '''build shape. - ''' + """ + build shape. + """ def set_weights(self, weights): - '''set weights. - ''' + """ + set weights. + """ self.weights = weights def import_weights(self, torch_layer): - '''import weights. - ''' + """ + import weights. + """ def import_weights_keras(self, keras_layer): - '''import weights from keras layer. - ''' + """ + import weights from keras layer. + """ def export_weights(self, torch_layer): - '''export weights. - ''' + """ + export weights. + """ def export_weights_keras(self, keras_layer): - '''export weights to keras layer. - ''' + """ + export weights to keras layer. + """ def get_weights(self): - '''get weights. - ''' + """ + get weights. + """ return self.weights def size(self): - '''size(). - ''' + """ + size(). + """ return 0 @property def output_shape(self): - '''output shape. - ''' + """ + output shape. + """ return self.input.shape def to_real_layer(self): - '''to real layer. - ''' + """ + to real layer. + """ def __str__(self): - '''str() function to print. - ''' + """ + str() function to print. + """ return type(self).__name__[4:] class StubWeightBiasLayer(StubLayer): - '''StubWeightBiasLayer Module to set the bias. - ''' + """ + StubWeightBiasLayer Module to set the bias. + """ + def import_weights(self, torch_layer): self.set_weights( - (torch_layer.weight.data.cpu().numpy(), torch_layer.bias.data.cpu().numpy()) + (torch_layer.weight.data.cpu().numpy(), + torch_layer.bias.data.cpu().numpy()) ) def import_weights_keras(self, keras_layer): @@ -144,8 +168,10 @@ def export_weights_keras(self, keras_layer): class StubBatchNormalization(StubWeightBiasLayer): - '''StubBatchNormalization Module. Batch Norm. - ''' + """ + StubBatchNormalization Module. Batch Norm. + """ + def __init__(self, num_features, input_node=None, output_node=None): super().__init__(input_node, output_node) self.num_features = num_features @@ -175,29 +201,37 @@ def to_real_layer(self): class StubBatchNormalization1d(StubBatchNormalization): - '''StubBatchNormalization1d Module. - ''' + """ + StubBatchNormalization1d Module. + """ + def to_real_layer(self): return torch.nn.BatchNorm1d(self.num_features) class StubBatchNormalization2d(StubBatchNormalization): - '''StubBatchNormalization2d Module. - ''' + """ + StubBatchNormalization2d Module. + """ + def to_real_layer(self): return torch.nn.BatchNorm2d(self.num_features) class StubBatchNormalization3d(StubBatchNormalization): - '''StubBatchNormalization3d Module. - ''' + """ + StubBatchNormalization3d Module. + """ + def to_real_layer(self): return torch.nn.BatchNorm3d(self.num_features) class StubDense(StubWeightBiasLayer): - '''StubDense Module. Linear. - ''' + """ + StubDense Module. Linear. + """ + def __init__(self, input_units, units, input_node=None, output_node=None): super().__init__(input_node, output_node) self.input_units = input_units @@ -208,7 +242,9 @@ def output_shape(self): return (self.units,) def import_weights_keras(self, keras_layer): - self.set_weights((keras_layer.get_weights()[0].T, keras_layer.get_weights()[1])) + self.set_weights( + (keras_layer.get_weights()[0].T, + keras_layer.get_weights()[1])) def export_weights_keras(self, keras_layer): keras_layer.set_weights((self.weights[0].T, self.weights[1])) @@ -221,9 +257,12 @@ def to_real_layer(self): class StubConv(StubWeightBiasLayer): - '''StubConv Module. Conv. - ''' - def __init__(self, input_channel, filters, kernel_size, stride=1, input_node=None, output_node=None): + """ + StubConv Module. Conv. + """ + + def __init__(self, input_channel, filters, kernel_size, + stride=1, input_node=None, output_node=None): super().__init__(input_node, output_node) self.input_channel = input_channel self.filters = filters @@ -242,13 +281,16 @@ def output_shape(self): return tuple(ret) def import_weights_keras(self, keras_layer): - self.set_weights((keras_layer.get_weights()[0].T, keras_layer.get_weights()[1])) + self.set_weights( + (keras_layer.get_weights()[0].T, + keras_layer.get_weights()[1])) def export_weights_keras(self, keras_layer): keras_layer.set_weights((self.weights[0].T, self.weights[1])) def size(self): - return (self.input_channel * self.kernel_size * self.kernel_size + 1) * self.filters + return (self.input_channel * self.kernel_size * + self.kernel_size + 1) * self.filters @abstractmethod def to_real_layer(self): @@ -272,8 +314,10 @@ def __str__(self): class StubConv1d(StubConv): - '''StubConv1d Module. - ''' + """ + StubConv1d Module. + """ + def to_real_layer(self): return torch.nn.Conv1d( self.input_channel, @@ -285,8 +329,10 @@ def to_real_layer(self): class StubConv2d(StubConv): - '''StubConv2d Module. - ''' + """ + StubConv2d Module. + """ + def to_real_layer(self): return torch.nn.Conv2d( self.input_channel, @@ -298,8 +344,10 @@ def to_real_layer(self): class StubConv3d(StubConv): - '''StubConv3d Module. - ''' + """ + StubConv3d Module. + """ + def to_real_layer(self): return torch.nn.Conv3d( self.input_channel, @@ -311,8 +359,10 @@ def to_real_layer(self): class StubAggregateLayer(StubLayer): - '''StubAggregateLayer Module. - ''' + """ + StubAggregateLayer Module. + """ + def __init__(self, input_nodes=None, output_node=None): if input_nodes is None: input_nodes = [] @@ -320,8 +370,8 @@ def __init__(self, input_nodes=None, output_node=None): class StubConcatenate(StubAggregateLayer): - '''StubConcatenate Module. - ''' + """StubConcatenate Module. + """ @property def output_shape(self): ret = 0 @@ -335,8 +385,9 @@ def to_real_layer(self): class StubAdd(StubAggregateLayer): - '''StubAdd Module. - ''' + """ + StubAdd Module. + """ @property def output_shape(self): return self.input[0].shape @@ -346,8 +397,9 @@ def to_real_layer(self): class StubFlatten(StubLayer): - '''StubFlatten Module. - ''' + """ + StubFlatten Module. + """ @property def output_shape(self): ret = 1 @@ -360,22 +412,28 @@ def to_real_layer(self): class StubReLU(StubLayer): - '''StubReLU Module. - ''' + """ + StubReLU Module. + """ + def to_real_layer(self): return torch.nn.ReLU() class StubSoftmax(StubLayer): - '''StubSoftmax Module. - ''' + """ + StubSoftmax Module. + """ + def to_real_layer(self): return torch.nn.LogSoftmax(dim=1) class StubDropout(StubLayer): - '''StubDropout Module. - ''' + """ + StubDropout Module. + """ + def __init__(self, rate, input_node=None, output_node=None): super().__init__(input_node, output_node) self.rate = rate @@ -386,36 +444,45 @@ def to_real_layer(self): class StubDropout1d(StubDropout): - '''StubDropout1d Module. - ''' + """ + StubDropout1d Module. + """ + def to_real_layer(self): return torch.nn.Dropout(self.rate) class StubDropout2d(StubDropout): - '''StubDropout2d Module. - ''' + """ + StubDropout2d Module. + """ + def to_real_layer(self): return torch.nn.Dropout2d(self.rate) class StubDropout3d(StubDropout): - '''StubDropout3d Module. - ''' + """ + StubDropout3d Module. + """ + def to_real_layer(self): return torch.nn.Dropout3d(self.rate) class StubInput(StubLayer): - '''StubInput Module. - ''' + """ + StubInput Module. + """ + def __init__(self, input_node=None, output_node=None): super().__init__(input_node, output_node) class StubPooling(StubLayer): - '''StubPooling Module. - ''' + """ + StubPooling Module. + """ def __init__(self, kernel_size=None, @@ -444,30 +511,37 @@ def to_real_layer(self): class StubPooling1d(StubPooling): - '''StubPooling1d Module. - ''' + """ + StubPooling1d Module. + """ def to_real_layer(self): return torch.nn.MaxPool1d(self.kernel_size, stride=self.stride) class StubPooling2d(StubPooling): - '''StubPooling2d Module. - ''' + """ + StubPooling2d Module. + """ + def to_real_layer(self): return torch.nn.MaxPool2d(self.kernel_size, stride=self.stride) class StubPooling3d(StubPooling): - '''StubPooling3d Module. - ''' + """ + StubPooling3d Module. + """ + def to_real_layer(self): return torch.nn.MaxPool3d(self.kernel_size, stride=self.stride) class StubGlobalPooling(StubLayer): - '''StubGlobalPooling Module. - ''' + """ + StubGlobalPooling Module. + """ + def __init__(self, input_node=None, output_node=None): super().__init__(input_node, output_node) @@ -481,49 +555,63 @@ def to_real_layer(self): class StubGlobalPooling1d(StubGlobalPooling): - '''StubGlobalPooling1d Module. - ''' + """ + StubGlobalPooling1d Module. + """ + def to_real_layer(self): return GlobalAvgPool1d() class StubGlobalPooling2d(StubGlobalPooling): - '''StubGlobalPooling2d Module. - ''' + """ + StubGlobalPooling2d Module. + """ + def to_real_layer(self): return GlobalAvgPool2d() class StubGlobalPooling3d(StubGlobalPooling): - '''StubGlobalPooling3d Module. - ''' + """ + StubGlobalPooling3d Module. + """ + def to_real_layer(self): return GlobalAvgPool3d() class TorchConcatenate(nn.Module): - '''TorchConcatenate Module. - ''' + """ + TorchConcatenate Module. + """ + def forward(self, input_list): return torch.cat(input_list, dim=1) class TorchAdd(nn.Module): - '''TorchAdd Module. - ''' + """ + TorchAdd Module. + """ + def forward(self, input_list): return input_list[0] + input_list[1] class TorchFlatten(nn.Module): - '''TorchFlatten Module. - ''' + """ + TorchFlatten Module. + """ + def forward(self, input_tensor): return input_tensor.view(input_tensor.size(0), -1) + def keras_dropout(layer, rate): - '''keras dropout layer. - ''' + """ + Keras dropout layer. + """ from keras import layers @@ -539,8 +627,9 @@ def keras_dropout(layer, rate): def to_real_keras_layer(layer): - ''' real keras layer. - ''' + """ + Real keras layer. + """ from keras import layers if is_layer(layer, "Dense"): @@ -574,10 +663,14 @@ def to_real_keras_layer(layer): def is_layer(layer, layer_type): - '''judge the layer type. - Returns: + """ + Judge the layer type. + + Returns + ------- + bool boolean -- True or False - ''' + """ if layer_type == "Input": return isinstance(layer, StubInput) @@ -607,8 +700,9 @@ def is_layer(layer, layer_type): def layer_description_extractor(layer, node_to_id): - '''get layer description. - ''' + """ + Get layer description. + """ layer_input = layer.input layer_output = layer.output @@ -641,7 +735,8 @@ def layer_description_extractor(layer, node_to_id): layer.units, ] elif isinstance(layer, (StubBatchNormalization,)): - return (type(layer).__name__, layer_input, layer_output, layer.num_features) + return (type(layer).__name__, layer_input, + layer_output, layer.num_features) elif isinstance(layer, (StubDropout,)): return (type(layer).__name__, layer_input, layer_output, layer.rate) elif isinstance(layer, StubPooling): @@ -658,8 +753,8 @@ def layer_description_extractor(layer, node_to_id): def layer_description_builder(layer_information, id_to_node): - '''build layer from description. - ''' + """build layer from description. + """ layer_type = layer_information[0] layer_input_ids = layer_information[1] @@ -696,8 +791,9 @@ def layer_description_builder(layer_information, id_to_node): def layer_width(layer): - '''get layer width. - ''' + """ + Get layer width. + """ if is_layer(layer, "Dense"): return layer.units diff --git a/src/sdk/pynni/nni/networkmorphism_tuner/networkmorphism_tuner.py b/src/sdk/pynni/nni/networkmorphism_tuner/networkmorphism_tuner.py index 893e718041..a5bdec98cb 100644 --- a/src/sdk/pynni/nni/networkmorphism_tuner/networkmorphism_tuner.py +++ b/src/sdk/pynni/nni/networkmorphism_tuner/networkmorphism_tuner.py @@ -17,11 +17,13 @@ # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT # OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # ================================================================================================== +""" +networkmorphsim_tuner.py +""" import logging import os - from nni.tuner import Tuner from nni.utils import OptimizeMode, extract_scalar_reward from nni.networkmorphism_tuner.bayesian import BayesianOptimizer @@ -34,7 +36,35 @@ class NetworkMorphismTuner(Tuner): - """NetworkMorphismTuner is a tuner which using network morphism techniques.""" + """ + NetworkMorphismTuner is a tuner which using network morphism techniques. + + Attributes + ---------- + n_classes : int + The class number or output node number (default: ``10``) + input_shape : tuple + A tuple including: (input_width, input_width, input_channel) + t_min : float + The minimum temperature for simulated annealing. (default: ``Constant.T_MIN``) + beta : float + The beta in acquisition function. (default: ``Constant.BETA``) + algorithm_name : str + algorithm name used in the network morphism (default: ``"Bayesian"``) + optimize_mode : str + optimize mode "minimize" or "maximize" (default: ``"minimize"``) + verbose : bool + verbose to print the log (default: ``True``) + bo : BayesianOptimizer + The optimizer used in networkmorphsim tuner. + max_model_size : int + max model size to the graph (default: ``Constant.MAX_MODEL_SIZE``) + default_model_len : int + default model length (default: ``Constant.MODEL_LEN``) + default_model_width : int + default model width (default: ``Constant.MODEL_WIDTH``) + search_space : dict + """ def __init__( self, @@ -52,36 +82,8 @@ def __init__( default_model_len=Constant.MODEL_LEN, default_model_width=Constant.MODEL_WIDTH, ): - """ initilizer of the NetworkMorphismTuner. - - Parameters - ---------- - task : str - task mode, such as "cv","common" etc. (default: {"cv"}) - input_width : int - input sample shape (default: {32}) - input_channel : int - input sample shape (default: {3}) - n_output_node : int - output node number (default: {10}) - algorithm_name : str - algorithm name used in the network morphism (default: {"Bayesian"}) - optimize_mode : str - optimize mode "minimize" or "maximize" (default: {"minimize"}) - path : str - default mode path to save the model file (default: {"model_path"}) - verbose : bool - verbose to print the log (default: {True}) - beta : float - The beta in acquisition function. (default: {Constant.BETA}) - t_min : float - The minimum temperature for simulated annealing. (default: {Constant.T_MIN}) - max_model_size : int - max model size to the graph (default: {Constant.MAX_MODEL_SIZE}) - default_model_len : int - default model length (default: {Constant.MODEL_LEN}) - default_model_width : int - default model width (default: {Constant.MODEL_WIDTH}) + """ + initilizer of the NetworkMorphismTuner. """ if not os.path.exists(path): @@ -92,7 +94,8 @@ def __init__( elif task == "common": self.generators = [MlpGenerator] else: - raise NotImplementedError('{} task not supported in List ["cv","common"]') + raise NotImplementedError( + '{} task not supported in List ["cv","common"]') self.n_classes = n_output_node self.input_shape = (input_width, input_width, input_channel) @@ -106,7 +109,8 @@ def __init__( self.verbose = verbose self.model_count = 0 - self.bo = BayesianOptimizer(self, self.t_min, self.optimize_mode, self.beta) + self.bo = BayesianOptimizer( + self, self.t_min, self.optimize_mode, self.beta) self.training_queue = [] self.descriptors = [] self.history = [] @@ -117,6 +121,7 @@ def __init__( self.search_space = dict() + def update_search_space(self, search_space): """ Update search space definition in tuner by search_space in neural architecture. @@ -140,7 +145,8 @@ def generate_parameters(self, parameter_id, **kwargs): new_father_id, generated_graph = self.generate() new_model_id = self.model_count self.model_count += 1 - self.training_queue.append((generated_graph, new_father_id, new_model_id)) + self.training_queue.append( + (generated_graph, new_father_id, new_model_id)) self.descriptors.append(generated_graph.extract_descriptor()) graph, father_id, model_id = self.training_queue.pop(0) @@ -153,12 +159,15 @@ def generate_parameters(self, parameter_id, **kwargs): return json_out def receive_trial_result(self, parameter_id, parameters, value, **kwargs): - """ Record an observation of the objective function. + """ + Record an observation of the objective function. Parameters ---------- parameter_id : int + the id of a group of paramters that generated by nni manager. parameters : dict + A group of parameters. value : dict/float if value is dict, it should have "default" key. """ @@ -175,8 +184,11 @@ def receive_trial_result(self, parameter_id, parameters, value, **kwargs): self.add_model(reward, model_id) self.update(father_id, graph, reward, model_id) + def init_search(self): - """Call the generators to generate the initial architectures for the search.""" + """ + Call the generators to generate the initial architectures for the search. + """ if self.verbose: logger.info("Initializing search.") for generator in self.generators: @@ -191,14 +203,16 @@ def init_search(self): if self.verbose: logger.info("Initialization finished.") + def generate(self): - """Generate the next neural architecture. + """ + Generate the next neural architecture. Returns ------- - other_info: any object + other_info : any object Anything to be saved in the training queue together with the architecture. - generated_graph: Graph + generated_graph : Graph An instance of Graph. """ generated_graph, new_father_id = self.bo.generate(self.descriptors) @@ -211,7 +225,8 @@ def generate(self): return new_father_id, generated_graph def update(self, other_info, graph, metric_value, model_id): - """ Update the controller with evaluation result of a neural architecture. + """ + Update the controller with evaluation result of a neural architecture. Parameters ---------- @@ -228,7 +243,8 @@ def update(self, other_info, graph, metric_value, model_id): self.bo.add_child(father_id, model_id) def add_model(self, metric_value, model_id): - """ Add model to the history, x_queue and y_queue + """ + Add model to the history, x_queue and y_queue Parameters ---------- @@ -252,16 +268,21 @@ def add_model(self, metric_value, model_id): file.close() return ret + def get_best_model_id(self): - """ Get the best model_id from history using the metric value + """ + Get the best model_id from history using the metric value """ if self.optimize_mode is OptimizeMode.Maximize: - return max(self.history, key=lambda x: x["metric_value"])["model_id"] + return max(self.history, key=lambda x: x["metric_value"])[ + "model_id"] return min(self.history, key=lambda x: x["metric_value"])["model_id"] + def load_model_by_id(self, model_id): - """Get the model by model_id + """ + Get the model by model_id Parameters ---------- @@ -281,7 +302,8 @@ def load_model_by_id(self, model_id): return load_model def load_best_model(self): - """ Get the best model by model id + """ + Get the best model by model id Returns ------- @@ -291,7 +313,8 @@ def load_best_model(self): return self.load_model_by_id(self.get_best_model_id()) def get_metric_value_by_id(self, model_id): - """ Get the model metric valud by its model_id + """ + Get the model metric valud by its model_id Parameters ---------- diff --git a/src/sdk/pynni/nni/networkmorphism_tuner/nn.py b/src/sdk/pynni/nni/networkmorphism_tuner/nn.py index 363c06be5a..2e820ab2c2 100644 --- a/src/sdk/pynni/nni/networkmorphism_tuner/nn.py +++ b/src/sdk/pynni/nni/networkmorphism_tuner/nn.py @@ -92,17 +92,25 @@ def generate(self, model_len=None, model_width=None): for i in range(model_len): output_node_id = graph.add_layer(StubReLU(), output_node_id) output_node_id = graph.add_layer( - self.batch_norm(graph.node_list[output_node_id].shape[-1]), output_node_id + self.batch_norm( + graph.node_list[output_node_id].shape[-1]), output_node_id ) output_node_id = graph.add_layer( - self.conv(temp_input_channel, model_width, kernel_size=3, stride=stride), + self.conv( + temp_input_channel, + model_width, + kernel_size=3, + stride=stride), output_node_id, ) temp_input_channel = model_width - if pooling_len == 0 or ((i + 1) % pooling_len == 0 and i != model_len - 1): - output_node_id = graph.add_layer(self.pooling(), output_node_id) + if pooling_len == 0 or ( + (i + 1) % pooling_len == 0 and i != model_len - 1): + output_node_id = graph.add_layer( + self.pooling(), output_node_id) - output_node_id = graph.add_layer(self.global_avg_pooling(), output_node_id) + output_node_id = graph.add_layer( + self.global_avg_pooling(), output_node_id) output_node_id = graph.add_layer( self.dropout(Constant.CONV_DROPOUT_RATE), output_node_id ) @@ -111,7 +119,11 @@ def generate(self, model_len=None, model_width=None): output_node_id, ) output_node_id = graph.add_layer(StubReLU(), output_node_id) - graph.add_layer(StubDense(model_width, self.n_output_node), output_node_id) + graph.add_layer( + StubDense( + model_width, + self.n_output_node), + output_node_id) return graph @@ -145,7 +157,8 @@ def generate(self, model_len=None, model_width=None): if model_width is None: model_width = Constant.MODEL_WIDTH if isinstance(model_width, list) and not len(model_width) == model_len: - raise ValueError("The length of 'model_width' does not match 'model_len'") + raise ValueError( + "The length of 'model_width' does not match 'model_len'") elif isinstance(model_width, int): model_width = [model_width] * model_len @@ -162,5 +175,9 @@ def generate(self, model_len=None, model_width=None): output_node_id = graph.add_layer(StubReLU(), output_node_id) n_nodes_prev_layer = width - graph.add_layer(StubDense(n_nodes_prev_layer, self.n_output_node), output_node_id) + graph.add_layer( + StubDense( + n_nodes_prev_layer, + self.n_output_node), + output_node_id) return graph diff --git a/src/sdk/pynni/nni/networkmorphism_tuner/test_networkmorphism_tuner.py b/src/sdk/pynni/nni/networkmorphism_tuner/test_networkmorphism_tuner.py index 09bbe820a9..5da56c487f 100644 --- a/src/sdk/pynni/nni/networkmorphism_tuner/test_networkmorphism_tuner.py +++ b/src/sdk/pynni/nni/networkmorphism_tuner/test_networkmorphism_tuner.py @@ -59,9 +59,12 @@ def test_graph_json_transform(self): graph_recover.layer_id_to_input_node_ids, ) self.assertEqual(graph_init.adj_list, graph_recover.adj_list) - self.assertEqual(graph_init.reverse_adj_list, graph_recover.reverse_adj_list) self.assertEqual( - len(graph_init.operation_history), len(graph_recover.operation_history) + graph_init.reverse_adj_list, + graph_recover.reverse_adj_list) + self.assertEqual( + len(graph_init.operation_history), len( + graph_recover.operation_history) ) self.assertEqual(graph_init.n_dim, graph_recover.n_dim) self.assertEqual(graph_init.conv, graph_recover.conv) @@ -71,7 +74,8 @@ def test_graph_json_transform(self): node_list_init = [node.shape for node in graph_init.node_list] node_list_recover = [node.shape for node in graph_recover.node_list] self.assertEqual(node_list_init, node_list_recover) - self.assertEqual(len(graph_init.node_to_id), len(graph_recover.node_to_id)) + self.assertEqual(len(graph_init.node_to_id), + len(graph_recover.node_to_id)) layer_list_init = [ layer_description_extractor(item, graph_init.node_to_id) for item in graph_init.layer_list @@ -82,7 +86,8 @@ def test_graph_json_transform(self): ] self.assertEqual(layer_list_init, layer_list_recover) - node_to_id_init = [graph_init.node_to_id[node] for node in graph_init.node_list] + node_to_id_init = [graph_init.node_to_id[node] + for node in graph_init.node_list] node_to_id_recover = [ graph_recover.node_to_id[node] for node in graph_recover.node_list ] @@ -192,8 +197,8 @@ def test_get_best_model_id(self): """ tuner = NetworkMorphismTuner() - tuner.add_model(0.8, 0) - tuner.add_model(0.9, 1) + tuner.add_model(0.8, 0) + tuner.add_model(0.9, 1) self.assertEqual(tuner.get_best_model_id(), 1) From 9dec51e2ba04030d0f092ecc2ad61fab9bc331a6 Mon Sep 17 00:00:00 2001 From: SparkSnail Date: Mon, 11 Nov 2019 11:19:25 +0800 Subject: [PATCH 04/14] Support space in logDir (#1694) --- .../common/experimentStartupInfo.ts | 2 +- .../training_service/common/util.ts | 28 +++++++++---------- .../local/localTrainingService.ts | 12 ++++---- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/src/nni_manager/common/experimentStartupInfo.ts b/src/nni_manager/common/experimentStartupInfo.ts index 5675facdde..ba8bd8789b 100644 --- a/src/nni_manager/common/experimentStartupInfo.ts +++ b/src/nni_manager/common/experimentStartupInfo.ts @@ -43,7 +43,7 @@ class ExperimentStartupInfo { this.initialized = true; if (logDir !== undefined && logDir.length > 0) { - this.logDir = path.join(logDir, getExperimentId()); + this.logDir = path.join(path.normalize(logDir), getExperimentId()); } else { this.logDir = path.join(os.homedir(), 'nni', 'experiments', getExperimentId()); } diff --git a/src/nni_manager/training_service/common/util.ts b/src/nni_manager/training_service/common/util.ts index ef05ac57b3..294728ee6d 100644 --- a/src/nni_manager/training_service/common/util.ts +++ b/src/nni_manager/training_service/common/util.ts @@ -70,11 +70,11 @@ export async function validateCodeDir(codeDir: string) : Promise { */ export async function execMkdir(directory: string, share: boolean = false): Promise { if (process.platform === 'win32') { - await cpp.exec(`powershell.exe New-Item -Path ${directory} -ItemType "directory" -Force`); + await cpp.exec(`powershell.exe New-Item -Path "${directory}" -ItemType "directory" -Force`); } else if (share) { - await cpp.exec(`(umask 0; mkdir -p ${directory})`); + await cpp.exec(`(umask 0; mkdir -p '${directory}')`); } else { - await cpp.exec(`mkdir -p ${directory}`); + await cpp.exec(`mkdir -p '${directory}'`); } return Promise.resolve(); @@ -87,9 +87,9 @@ export async function execMkdir(directory: string, share: boolean = false): Prom */ export async function execCopydir(source: string, destination: string): Promise { if (process.platform === 'win32') { - await cpp.exec(`powershell.exe Copy-Item ${source} -Destination ${destination} -Recurse`); + await cpp.exec(`powershell.exe Copy-Item "${source}" -Destination "${destination}" -Recurse`); } else { - await cpp.exec(`cp -r ${source} ${destination}`); + await cpp.exec(`cp -r '${source}' '${destination}'`); } return Promise.resolve(); @@ -101,9 +101,9 @@ export async function execCopydir(source: string, destination: string): Promise< */ export async function execNewFile(filename: string): Promise { if (process.platform === 'win32') { - await cpp.exec(`powershell.exe New-Item -Path ${filename} -ItemType "file" -Force`); + await cpp.exec(`powershell.exe New-Item -Path "${filename}" -ItemType "file" -Force`); } else { - await cpp.exec(`touch ${filename}`); + await cpp.exec(`touch '${filename}'`); } return Promise.resolve(); @@ -115,9 +115,9 @@ export async function execNewFile(filename: string): Promise { */ export function runScript(filePath: string): cp.ChildProcess { if (process.platform === 'win32') { - return cp.exec(`powershell.exe -ExecutionPolicy Bypass -file ${filePath}`); + return cp.exec(`powershell.exe -ExecutionPolicy Bypass -file "${filePath}"`); } else { - return cp.exec(`bash ${filePath}`); + return cp.exec(`bash '${filePath}'`); } } @@ -128,9 +128,9 @@ export function runScript(filePath: string): cp.ChildProcess { export async function execTail(filePath: string): Promise { let cmdresult: cpp.childProcessPromise.Result; if (process.platform === 'win32') { - cmdresult = await cpp.exec(`powershell.exe Get-Content ${filePath} -Tail 1`); + cmdresult = await cpp.exec(`powershell.exe Get-Content "${filePath}" -Tail 1`); } else { - cmdresult = await cpp.exec(`tail -n 1 ${filePath}`); + cmdresult = await cpp.exec(`tail -n 1 '${filePath}'`); } return Promise.resolve(cmdresult); @@ -142,9 +142,9 @@ export async function execTail(filePath: string): Promise { if (process.platform === 'win32') { - await cpp.exec(`powershell.exe Remove-Item ${directory} -Recurse -Force`); + await cpp.exec(`powershell.exe Remove-Item "${directory}" -Recurse -Force`); } else { - await cpp.exec(`rm -rf ${directory}`); + await cpp.exec(`rm -rf '${directory}'`); } return Promise.resolve(); @@ -173,7 +173,7 @@ export function setEnvironmentVariable(variable: { key: string; value: string }) if (process.platform === 'win32') { return `$env:${variable.key}="${variable.value}"`; } else { - return `export ${variable.key}=${variable.value}`; + return `export ${variable.key}='${variable.value}'`; } } diff --git a/src/nni_manager/training_service/local/localTrainingService.ts b/src/nni_manager/training_service/local/localTrainingService.ts index 2d4d1a1745..17cfc1fab9 100644 --- a/src/nni_manager/training_service/local/localTrainingService.ts +++ b/src/nni_manager/training_service/local/localTrainingService.ts @@ -490,18 +490,18 @@ class LocalTrainingService implements TrainingService { const script: string[] = []; if (process.platform === 'win32') { script.push( - `cmd.exe /c ${localTrialConfig.command} 2>${path.join(workingDirectory, 'stderr')}`, + `cmd.exe /c ${localTrialConfig.command} 2>"${path.join(workingDirectory, 'stderr')}"`, `$NOW_DATE = [int64](([datetime]::UtcNow)-(get-date "1/1/1970")).TotalSeconds`, `$NOW_DATE = "$NOW_DATE" + (Get-Date -Format fff).ToString()`, - `Write $LASTEXITCODE " " $NOW_DATE | Out-File ${path.join(workingDirectory, '.nni', 'state')} -NoNewline -encoding utf8`); + `Write $LASTEXITCODE " " $NOW_DATE | Out-File "${path.join(workingDirectory, '.nni', 'state')}" -NoNewline -encoding utf8`); } else { - script.push(`eval ${localTrialConfig.command} 2>${path.join(workingDirectory, 'stderr')}`); + script.push(`eval ${localTrialConfig.command} 2>"${path.join(workingDirectory, 'stderr')}"`); if (process.platform === 'darwin') { // https://superuser.com/questions/599072/how-to-get-bash-execution-time-in-milliseconds-under-mac-os-x // Considering the worst case, write 999 to avoid negative duration - script.push(`echo $? \`date +%s999\` >${path.join(workingDirectory, '.nni', 'state')}`); + script.push(`echo $? \`date +%s999\` >'${path.join(workingDirectory, '.nni', 'state')}'`); } else { - script.push(`echo $? \`date +%s%3N\` >${path.join(workingDirectory, '.nni', 'state')}`); + script.push(`echo $? \`date +%s%3N\` >'${path.join(workingDirectory, '.nni', 'state')}'`); } } @@ -522,7 +522,7 @@ class LocalTrainingService implements TrainingService { if (process.platform !== 'win32') { runScriptContent.push('#!/bin/bash'); } - runScriptContent.push(`cd ${this.localTrialConfig.codeDir}`); + runScriptContent.push(`cd '${this.localTrialConfig.codeDir}'`); for (const variable of variables) { runScriptContent.push(setEnvironmentVariable(variable)); } From 0e3906aaf5d34684294b64effffecd6afedf4831 Mon Sep 17 00:00:00 2001 From: xuehui Date: Mon, 11 Nov 2019 11:20:07 +0800 Subject: [PATCH 05/14] update gbdt docs (#1720) --- docs/en_US/TrialExample/GbdtExample.md | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/docs/en_US/TrialExample/GbdtExample.md b/docs/en_US/TrialExample/GbdtExample.md index 22a94f48bb..95ab71d06f 100644 --- a/docs/en_US/TrialExample/GbdtExample.md +++ b/docs/en_US/TrialExample/GbdtExample.md @@ -44,7 +44,15 @@ Given the features and label in train data, we train a GBDT regression model and ## 3. How to run in nni -### 3.1 Prepare your trial code + +### 3.1 Install all the requirments + +``` +pip install lightgbm +pip install pandas +``` + +### 3.2 Prepare your trial code You need to prepare a basic code as following: @@ -86,7 +94,7 @@ if __name__ == '__main__': run(lgb_train, lgb_eval, PARAMS, X_test, y_test) ``` -### 3.2 Prepare your search space. +### 3.3 Prepare your search space. If you like to tune `num_leaves`, `learning_rate`, `bagging_fraction` and `bagging_freq`, you could write a [search_space.json](https://github.com/Microsoft/nni/blob/master/examples/trials/auto-gbdt/search_space.json) as follow: ```json @@ -100,7 +108,7 @@ If you like to tune `num_leaves`, `learning_rate`, `bagging_fraction` and `baggi More support variable type you could reference [here](../Tutorial/SearchSpaceSpec.md). -### 3.3 Add SDK of nni into your code. +### 3.4 Add SDK of nni into your code. ```diff +import nni @@ -146,7 +154,7 @@ if __name__ == '__main__': run(lgb_train, lgb_eval, PARAMS, X_test, y_test) ``` -### 3.4 Write a config file and run it. +### 3.5 Write a config file and run it. In the config file, you could set some settings including: From f5803f6869869a610dcaa3dcdf75d6baee02487c Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Mon, 11 Nov 2019 16:48:55 +0800 Subject: [PATCH 06/14] Fix windows pipeline (#1729) --- azure-pipelines.yml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 14ef0197ca..f6ccae34ef 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -126,18 +126,10 @@ jobs: cd test powershell.exe -file unittest.ps1 displayName: 'unit test' - - script: | - cd test - python naive_test.py - displayName: 'Naive test' - script: | cd test python tuner_test.py displayName: 'Built-in tuners / assessors tests' - - script: | - cd test - python metrics_test.py - displayName: 'Trial job metrics test' - script: | cd test PATH=$HOME/.local/bin:$PATH python3 cli_test.py From 8735fa5836acff2f76fe1d407eb465efb1cf7752 Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Mon, 11 Nov 2019 16:49:59 +0800 Subject: [PATCH 07/14] Update trial doc string (#1713) * trial docstring --- src/sdk/pynni/nni/trial.py | 68 ++++++++++++++++++++++++++++++++++---- 1 file changed, 62 insertions(+), 6 deletions(-) diff --git a/src/sdk/pynni/nni/trial.py b/src/sdk/pynni/nni/trial.py index befec8816e..e0c7cde163 100644 --- a/src/sdk/pynni/nni/trial.py +++ b/src/sdk/pynni/nni/trial.py @@ -43,8 +43,18 @@ def get_next_parameter(): - """Returns a set of (hyper-)paremeters generated by Tuner. - Returns None if no more (hyper-)parameters can be generated by Tuner.""" + """ + Get the hyper paremeters generated by tuner. For a multiphase experiment, it returns a new group of hyper + parameters at each call of get_next_parameter. For a non-multiphase (multiPhase is not configured or set to False) + experiment, it returns hyper parameters only on the first call for each trial job, it returns None since second call. + This API should be called only once in each trial job of an experiment which is not specified as multiphase. + + Returns + ------- + dict + A dict object contains the hyper parameters generated by tuner, the keys of the dict are defined in + search space. Returns None if no more hyper parameters can be generated by tuner. + """ global _params _params = platform.get_next_parameter() if _params is None: @@ -52,6 +62,15 @@ def get_next_parameter(): return _params['parameters'] def get_current_parameter(tag=None): + """ + Get current hyper parameters generated by tuner. It returns the same group of hyper parameters as the last + call of get_next_parameter returns. + + Parameters + ---------- + tag: str + hyper parameter key + """ global _params if _params is None: return None @@ -60,19 +79,51 @@ def get_current_parameter(tag=None): return _params['parameters'][tag] def get_experiment_id(): + """ + Get experiment ID. + + Returns + ------- + str + Identifier of current experiment + """ return _experiment_id def get_trial_id(): + """ + Get trial job ID which is string identifier of a trial job, for example 'MoXrp'. In one experiment, each trial + job has an unique string ID. + + Returns + ------- + str + Identifier of current trial job which is calling this API. + """ return _trial_id def get_sequence_id(): + """ + Get trial job sequence nubmer. A sequence number is an integer value assigned to each trial job base on the + order they are submitted, incremental starting from 0. In one experiment, both trial job ID and sequence number + are unique for each trial job, they are of different data types. + + Returns + ------- + int + Sequence number of current trial job which is calling this API. + """ return _sequence_id _intermediate_seq = 0 def report_intermediate_result(metric): - """Reports intermediate result to Assessor. - metric: serializable object. + """ + Reports intermediate result to NNI. + + Parameters + ---------- + metric: + serializable object. """ global _intermediate_seq assert _params is not None, 'nni.get_next_parameter() needs to be called before report_intermediate_result' @@ -88,8 +139,13 @@ def report_intermediate_result(metric): def report_final_result(metric): - """Reports final result to tuner. - metric: serializable object. + """ + Reports final result to NNI. + + Parameters + ---------- + metric: + serializable object. """ assert _params is not None, 'nni.get_next_parameter() needs to be called before report_final_result' metric = json_tricks.dumps({ From e6df29cf7de01deca38e763d84c506d8fbbf8458 Mon Sep 17 00:00:00 2001 From: chicm-ms <38930155+chicm-ms@users.noreply.github.com> Date: Mon, 11 Nov 2019 16:50:29 +0800 Subject: [PATCH 08/14] Installation test pipeline (#1709) * nni installation test pipeline --- test/pipelines-it-installation.yml | 45 ++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 test/pipelines-it-installation.yml diff --git a/test/pipelines-it-installation.yml b/test/pipelines-it-installation.yml new file mode 100644 index 0000000000..a4fb28587b --- /dev/null +++ b/test/pipelines-it-installation.yml @@ -0,0 +1,45 @@ +jobs: +- job: 'pip_install_ubuntu_python36' + pool: + vmImage: 'ubuntu-18.04' + strategy: + matrix: + Python36: + PYTHON_VERSION: '3.6' + + steps: + - script: | + python3 -V + python3 -m pip install --upgrade pip setuptools --user + python3 -m pip install --upgrade nni --user + displayName: 'Install nni' + +- job: 'pip_install_macOS_python36' + pool: + vmImage: 'macOS-10.13' + strategy: + matrix: + Python36: + PYTHON_VERSION: '3.6' + + steps: + - script: | + python3 -V + python3 -m pip install --upgrade pip setuptools --user + python3 -m pip install --upgrade nni --user + displayName: 'Install nni' + +- job: 'pip_install_windows_python36' + pool: + vmImage: 'windows-latest' + strategy: + matrix: + Python36: + PYTHON_VERSION: '3.6' + + steps: + - script: | + python -V + python -m pip install --upgrade pip setuptools --user + python -m pip install --upgrade nni --user + displayName: 'Install nni' From 751445d3c01dbe96955e4bfb11f9e4954c8a8b49 Mon Sep 17 00:00:00 2001 From: Guoxin Date: Mon, 11 Nov 2019 17:05:49 +0800 Subject: [PATCH 09/14] docstr/pylint of GP Tuner & CurveFitting Assessor & MedianStop Assessor (#1692) # docstr/pylint of GP Tuner & CurveFitting Assessor & MedianStop Assessor --- docs/en_US/sdk_reference.rst | 3 + .../curvefitting_assessor.py | 17 +- .../curvefitting_assessor/curvefunctions.py | 104 +++++----- .../curvefitting_assessor/model_factory.py | 28 +-- src/sdk/pynni/nni/gp_tuner/gp_tuner.py | 106 ++++++----- src/sdk/pynni/nni/gp_tuner/target_space.py | 179 +++++++++++++----- src/sdk/pynni/nni/gp_tuner/util.py | 139 +++++++++++--- .../medianstop_assessor.py | 56 +++--- src/sdk/pynni/nni/tuner.py | 3 +- 9 files changed, 409 insertions(+), 226 deletions(-) diff --git a/docs/en_US/sdk_reference.rst b/docs/en_US/sdk_reference.rst index 7bf274996d..de274fabec 100644 --- a/docs/en_US/sdk_reference.rst +++ b/docs/en_US/sdk_reference.rst @@ -39,6 +39,9 @@ Tuner .. autoclass:: nni.batch_tuner.batch_tuner.BatchTuner :members: +.. autoclass:: nni.gp_tuner.gp_tuner.GPTuner + :members: + Assessor ------------------------ .. autoclass:: nni.assessor.Assessor diff --git a/src/sdk/pynni/nni/curvefitting_assessor/curvefitting_assessor.py b/src/sdk/pynni/nni/curvefitting_assessor/curvefitting_assessor.py index 37e51bccd7..cf9e217099 100644 --- a/src/sdk/pynni/nni/curvefitting_assessor/curvefitting_assessor.py +++ b/src/sdk/pynni/nni/curvefitting_assessor/curvefitting_assessor.py @@ -29,13 +29,13 @@ class CurvefittingAssessor(Assessor): Parameters ---------- - epoch_num: int + epoch_num : int The total number of epoch - optimize_mode: str + optimize_mode : str optimize mode, 'maximize' or 'minimize' - start_step: int + start_step : int only after receiving start_step number of reported intermediate results - threshold: float + threshold : float The threshold that we decide to early stop the worse performance curve. """ def __init__(self, epoch_num=20, optimize_mode='maximize', start_step=6, threshold=0.95, gap=1): @@ -70,9 +70,9 @@ def trial_end(self, trial_job_id, success): Parameters ---------- - trial_job_id: int + trial_job_id : int trial job id - success: bool + success : bool True if succssfully finish the experiment, False otherwise """ if success: @@ -90,9 +90,9 @@ def assess_trial(self, trial_job_id, trial_history): Parameters ---------- - trial_job_id: int + trial_job_id : int trial job id - trial_history: list + trial_history : list The history performance matrix of each trial Returns @@ -105,7 +105,6 @@ def assess_trial(self, trial_job_id, trial_history): Exception unrecognize exception in curvefitting_assessor """ - trial_job_id = trial_job_id self.trial_history = trial_history if not self.set_best_performance: return AssessResult.Good diff --git a/src/sdk/pynni/nni/curvefitting_assessor/curvefunctions.py b/src/sdk/pynni/nni/curvefitting_assessor/curvefunctions.py index e5972ecff3..575aec2a8f 100644 --- a/src/sdk/pynni/nni/curvefitting_assessor/curvefunctions.py +++ b/src/sdk/pynni/nni/curvefitting_assessor/curvefunctions.py @@ -14,7 +14,9 @@ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - +""" +A family of functions used by CurvefittingAssessor +""" import numpy as np all_models = {} @@ -29,10 +31,10 @@ def vap(x, a, b, c): Parameters ---------- - x: int - a: float - b: float - c: float + x : int + a : float + b : float + c : float Returns ------- @@ -50,10 +52,10 @@ def pow3(x, c, a, alpha): Parameters ---------- - x: int - c: float - a: float - alpha: float + x : int + c : float + a : float + alpha : float Returns ------- @@ -71,9 +73,9 @@ def linear(x, a, b): Parameters ---------- - x: int - a: float - b: float + x : int + a : float + b : float Returns ------- @@ -91,9 +93,9 @@ def logx_linear(x, a, b): Parameters ---------- - x: int - a: float - b: float + x : int + a : float + b : float Returns ------- @@ -112,10 +114,10 @@ def dr_hill_zero_background(x, theta, eta, kappa): Parameters ---------- - x: int - theta: float - eta: float - kappa: float + x : int + theta : float + eta : float + kappa : float Returns ------- @@ -133,10 +135,10 @@ def log_power(x, a, b, c): Parameters ---------- - x: int - a: float - b: float - c: float + x : int + a : float + b : float + c : float Returns ------- @@ -154,11 +156,11 @@ def pow4(x, alpha, a, b, c): Parameters ---------- - x: int - alpha: float - a: float - b: float - c: float + x : int + alpha : float + a : float + b : float + c : float Returns ------- @@ -177,11 +179,11 @@ def mmf(x, alpha, beta, kappa, delta): Parameters ---------- - x: int - alpha: float - beta: float - kappa: float - delta: float + x : int + alpha : float + beta : float + kappa : float + delta : float Returns ------- @@ -199,11 +201,11 @@ def exp4(x, c, a, b, alpha): Parameters ---------- - x: int - c: float - a: float - b: float - alpha: float + x : int + c : float + a : float + b : float + alpha : float Returns ------- @@ -221,9 +223,9 @@ def ilog2(x, c, a): Parameters ---------- - x: int - c: float - a: float + x : int + c : float + a : float Returns ------- @@ -242,11 +244,11 @@ def weibull(x, alpha, beta, kappa, delta): Parameters ---------- - x: int - alpha: float - beta: float - kappa: float - delta: float + x : int + alpha : float + beta : float + kappa : float + delta : float Returns ------- @@ -264,11 +266,11 @@ def janoschek(x, a, beta, k, delta): Parameters ---------- - x: int - a: float - beta: float - k: float - delta: float + x : int + a : float + beta : float + k : float + delta : float Returns ------- diff --git a/src/sdk/pynni/nni/curvefitting_assessor/model_factory.py b/src/sdk/pynni/nni/curvefitting_assessor/model_factory.py index c276928bc4..6df6066dc0 100644 --- a/src/sdk/pynni/nni/curvefitting_assessor/model_factory.py +++ b/src/sdk/pynni/nni/curvefitting_assessor/model_factory.py @@ -40,7 +40,7 @@ class CurveModel: Parameters ---------- - target_pos: int + target_pos : int The point we need to predict """ def __init__(self, target_pos): @@ -120,14 +120,14 @@ def predict_y(self, model, pos): Parameters ---------- - model: string + model : string name of the curve function model - pos: int + pos : int the epoch number of the position you want to predict Returns ------- - int: + int The expected matrix at pos """ if model_para_num[model] == 2: @@ -143,9 +143,9 @@ def f_comb(self, pos, sample): Parameters ---------- - pos: int + pos : int the epoch number of the position you want to predict - sample: list + sample : list sample is a (1 * NUM_OF_FUNCTIONS) matrix, representing{w1, w2, ... wk} Returns @@ -165,7 +165,7 @@ def normalize_weight(self, samples): Parameters ---------- - samples: list + samples : list a collection of sample, it's a (NUM_OF_INSTANCE * NUM_OF_FUNCTIONS) matrix, representing{{w11, w12, ..., w1k}, {w21, w22, ... w2k}, ...{wk1, wk2,..., wkk}} @@ -187,7 +187,7 @@ def sigma_sq(self, sample): Parameters ---------- - sample: list + sample : list sample is a (1 * NUM_OF_FUNCTIONS) matrix, representing{w1, w2, ... wk} Returns @@ -206,9 +206,9 @@ def normal_distribution(self, pos, sample): Parameters ---------- - pos: int + pos : int the epoch number of the position you want to predict - sample: list + sample : list sample is a (1 * NUM_OF_FUNCTIONS) matrix, representing{w1, w2, ... wk} Returns @@ -225,7 +225,7 @@ def likelihood(self, samples): Parameters ---------- - sample: list + sample : list sample is a (1 * NUM_OF_FUNCTIONS) matrix, representing{w1, w2, ... wk} Returns @@ -244,7 +244,7 @@ def prior(self, samples): Parameters ---------- - samples: list + samples : list a collection of sample, it's a (NUM_OF_INSTANCE * NUM_OF_FUNCTIONS) matrix, representing{{w11, w12, ..., w1k}, {w21, w22, ... w2k}, ...{wk1, wk2,..., wkk}} @@ -267,7 +267,7 @@ def target_distribution(self, samples): Parameters ---------- - samples: list + samples : list a collection of sample, it's a (NUM_OF_INSTANCE * NUM_OF_FUNCTIONS) matrix, representing{{w11, w12, ..., w1k}, {w21, w22, ... w2k}, ...{wk1, wk2,..., wkk}} @@ -322,7 +322,7 @@ def predict(self, trial_history): Parameters ---------- - trial_history: list + trial_history : list The history performance matrix of each trial. Returns diff --git a/src/sdk/pynni/nni/gp_tuner/gp_tuner.py b/src/sdk/pynni/nni/gp_tuner/gp_tuner.py index e2f3b8ee54..22122a6cf2 100644 --- a/src/sdk/pynni/nni/gp_tuner/gp_tuner.py +++ b/src/sdk/pynni/nni/gp_tuner/gp_tuner.py @@ -17,9 +17,11 @@ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -''' -gp_tuner.py -''' +""" +GPTuner is a Bayesian Optimization method where Gaussian Process is used for modeling loss functions. + +See :class:`GPTuner` for details. +""" import warnings import logging @@ -38,18 +40,40 @@ class GPTuner(Tuner): - ''' - GPTuner - ''' + """ + GPTuner is a Bayesian Optimization method where Gaussian Process is used for modeling loss functions. + + Parameters + ---------- + optimize_mode : str + optimize mode, 'maximize' or 'minimize', by default 'maximize' + utility : str + utility function (also called 'acquisition funcition') to use, which can be 'ei', 'ucb' or 'poi'. By default 'ei'. + kappa : float + value used by utility function 'ucb'. The bigger kappa is, the more the tuner will be exploratory. By default 5. + xi : float + used by utility function 'ei' and 'poi'. The bigger xi is, the more the tuner will be exploratory. By default 0. + nu : float + used to specify Matern kernel. The smaller nu, the less smooth the approximated function is. By default 2.5. + alpha : float + Used to specify Gaussian Process Regressor. Larger values correspond to increased noise level in the observations. + By default 1e-6. + cold_start_num : int + Number of random exploration to perform before Gaussian Process. By default 10. + selection_num_warm_up : int + Number of random points to evaluate for getting the point which maximizes the acquisition function. By default 100000 + selection_num_starting_points : int + Number of times to run L-BFGS-B from a random starting point after the warmup. By default 250. + """ def __init__(self, optimize_mode="maximize", utility='ei', kappa=5, xi=0, nu=2.5, alpha=1e-6, cold_start_num=10, selection_num_warm_up=100000, selection_num_starting_points=250): - self.optimize_mode = OptimizeMode(optimize_mode) + self._optimize_mode = OptimizeMode(optimize_mode) # utility function related - self.utility = utility - self.kappa = kappa - self.xi = xi + self._utility = utility + self._kappa = kappa + self._xi = xi # target space self._space = None @@ -72,30 +96,23 @@ def __init__(self, optimize_mode="maximize", utility='ei', kappa=5, xi=0, nu=2.5 self._selection_num_starting_points = selection_num_starting_points # num of imported data - self.supplement_data_num = 0 + self._supplement_data_num = 0 def update_search_space(self, search_space): - """Update the self.bounds and self.types by the search_space.json + """ + Update the self.bounds and self.types by the search_space.json file. - Parameters - ---------- - search_space : dict + Override of the abstract method in :class:`~nni.tuner.Tuner`. """ self._space = TargetSpace(search_space, self._random_state) def generate_parameters(self, parameter_id, **kwargs): - """Generate next parameter for trial - If the number of trial result is lower than cold start number, - gp will first randomly generate some parameters. - Otherwise, choose the parameters by the Gussian Process Model - - Parameters - ---------- - parameter_id : int - - Returns - ------- - result : dict + """ + Method which provides one set of hyper-parameters. + If the number of trial result is lower than cold_start_number, GPTuner will first randomly generate some parameters. + Otherwise, choose the parameters by the Gussian Process Model. + + Override of the abstract method in :class:`~nni.tuner.Tuner`. """ if self._space.len() < self._cold_start_num: results = self._space.random_sample() @@ -107,7 +124,7 @@ def generate_parameters(self, parameter_id, **kwargs): self._gp.fit(self._space.params, self._space.target) util = UtilityFunction( - kind=self.utility, kappa=self.kappa, xi=self.xi) + kind=self._utility, kappa=self._kappa, xi=self._xi) results = acq_max( f_acq=util.utility, @@ -124,17 +141,13 @@ def generate_parameters(self, parameter_id, **kwargs): return results def receive_trial_result(self, parameter_id, parameters, value, **kwargs): - """Tuner receive result from trial. - - Parameters - ---------- - parameter_id : int - parameters : dict - value : dict/float - if value is dict, it should have "default" key. + """ + Method invoked when a trial reports its final result. + + Override of the abstract method in :class:`~nni.tuner.Tuner`. """ value = extract_scalar_reward(value) - if self.optimize_mode == OptimizeMode.Minimize: + if self._optimize_mode == OptimizeMode.Minimize: value = -value logger.info("Received trial result.") @@ -143,26 +156,27 @@ def receive_trial_result(self, parameter_id, parameters, value, **kwargs): self._space.register(parameters, value) def import_data(self, data): - """Import additional data for tuning - Parameters - ---------- - data: - a list of dictionarys, each of which has at least two keys, 'parameter' and 'value' + """ + Import additional data for tuning. + + Override of the abstract method in :class:`~nni.tuner.Tuner`. """ _completed_num = 0 for trial_info in data: - logger.info("Importing data, current processing progress %s / %s", _completed_num, len(data)) + logger.info( + "Importing data, current processing progress %s / %s", _completed_num, len(data)) _completed_num += 1 assert "parameter" in trial_info _params = trial_info["parameter"] assert "value" in trial_info _value = trial_info['value'] if not _value: - logger.info("Useless trial data, value is %s, skip this trial data.", _value) + logger.info( + "Useless trial data, value is %s, skip this trial data.", _value) continue - self.supplement_data_num += 1 + self._supplement_data_num += 1 _parameter_id = '_'.join( - ["ImportData", str(self.supplement_data_num)]) + ["ImportData", str(self._supplement_data_num)]) self.receive_trial_result( parameter_id=_parameter_id, parameters=_params, value=_value) logger.info("Successfully import data to GP tuner.") diff --git a/src/sdk/pynni/nni/gp_tuner/target_space.py b/src/sdk/pynni/nni/gp_tuner/target_space.py index eacf515267..5e26869c54 100644 --- a/src/sdk/pynni/nni/gp_tuner/target_space.py +++ b/src/sdk/pynni/nni/gp_tuner/target_space.py @@ -17,39 +17,51 @@ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -''' -target_space.py -''' +""" +Tool class to hold the param-space coordinates (X) and target values (Y). +""" + import numpy as np import nni.parameter_expressions as parameter_expressions def _hashable(params): - """ ensure that an point is hashable by a python dict """ + """ + Transform list params to tuple format. Ensure that an point is hashable by a python dict. + + Parameters + ---------- + params : numpy array + array format of parameters + + Returns + ------- + tuple + tuple format of parameters + """ return tuple(map(float, params)) class TargetSpace(): """ Holds the param-space coordinates (X) and target values (Y) + + Parameters + ---------- + pbounds : dict + Dictionary with parameters names and legal values. + + random_state : int, RandomState, or None + optionally specify a seed for a random number generator, by default None. """ def __init__(self, pbounds, random_state=None): - """ - Parameters - ---------- - pbounds : dict - Dictionary with parameters names as keys and a tuple with minimum - and maximum values. - - random_state : int, RandomState, or None - optionally specify a seed for a random number generator - """ - self.random_state = random_state + self._random_state = random_state # Get the name of the parameters self._keys = sorted(pbounds) + # Create an array with parameters bounds self._bounds = np.array( [item[1] for item in sorted(pbounds.items(), key=lambda x: x[0])] @@ -71,54 +83,100 @@ def __init__(self, pbounds, random_state=None): self._cache = {} def __contains__(self, params): - ''' + """ check if a parameter is already registered - ''' + + Parameters + ---------- + params : numpy array + + Returns + ------- + bool + True if the parameter is already registered, else false + """ return _hashable(params) in self._cache def len(self): - ''' + """ length of registered params and targets - ''' + + Returns + ------- + int + """ assert len(self._params) == len(self._target) return len(self._target) @property def params(self): - ''' - params: numpy array - ''' + """ + registered parameters + + Returns + ------- + numpy array + """ return self._params @property def target(self): - ''' - target: numpy array - ''' + """ + registered target values + + Returns + ------- + numpy array + """ return self._target @property def dim(self): - ''' - dim: int - length of keys - ''' + """ + dimension of parameters + + Returns + ------- + int + """ return len(self._keys) @property def keys(self): - ''' - keys: numpy array - ''' + """ + keys of parameters + + Returns + ------- + numpy array + """ return self._keys @property def bounds(self): - '''bounds''' + """ + bounds of parameters + + Returns + ------- + numpy array + """ return self._bounds def params_to_array(self, params): - ''' dict to array ''' + """ + dict to array + + Parameters + ---------- + params : dict + dict format of parameters + + Returns + ------- + numpy array + array format of parameters + """ try: assert set(params) == set(self.keys) except AssertionError: @@ -129,11 +187,20 @@ def params_to_array(self, params): return np.asarray([params[key] for key in self.keys]) def array_to_params(self, x): - ''' + """ array to dict maintain int type if the paramters is defined as int in search_space.json - ''' + Parameters + ---------- + x : numpy array + array format of parameters + + Returns + ------- + dict + dict format of parameters + """ try: assert len(x) == len(self.keys) except AssertionError: @@ -159,15 +226,15 @@ def register(self, params, target): Parameters ---------- - x : dict + params : dict + parameters - y : float + target : float target function value """ x = self.params_to_array(params) if x in self: - #raise KeyError('Data point {} is not unique'.format(x)) print('Data point {} is not unique'.format(x)) # Insert data into unique dictionary @@ -180,32 +247,43 @@ def random_sample(self): """ Creates a random point within the bounds of the space. + Returns + ------- + numpy array + one groupe of parameter """ params = np.empty(self.dim) for col, _bound in enumerate(self._bounds): if _bound['_type'] == 'choice': params[col] = parameter_expressions.choice( - _bound['_value'], self.random_state) + _bound['_value'], self._random_state) elif _bound['_type'] == 'randint': - params[col] = self.random_state.randint( + params[col] = self._random_state.randint( _bound['_value'][0], _bound['_value'][1], size=1) elif _bound['_type'] == 'uniform': params[col] = parameter_expressions.uniform( - _bound['_value'][0], _bound['_value'][1], self.random_state) + _bound['_value'][0], _bound['_value'][1], self._random_state) elif _bound['_type'] == 'quniform': params[col] = parameter_expressions.quniform( - _bound['_value'][0], _bound['_value'][1], _bound['_value'][2], self.random_state) + _bound['_value'][0], _bound['_value'][1], _bound['_value'][2], self._random_state) elif _bound['_type'] == 'loguniform': params[col] = parameter_expressions.loguniform( - _bound['_value'][0], _bound['_value'][1], self.random_state) + _bound['_value'][0], _bound['_value'][1], self._random_state) elif _bound['_type'] == 'qloguniform': params[col] = parameter_expressions.qloguniform( - _bound['_value'][0], _bound['_value'][1], _bound['_value'][2], self.random_state) + _bound['_value'][0], _bound['_value'][1], _bound['_value'][2], self._random_state) return params def max(self): - """Get maximum target value found and corresponding parametes.""" + """ + Get maximum target value found and its corresponding parameters. + + Returns + ------- + dict + target value and parameters, empty dict if nothing registered + """ try: res = { 'target': self.target.max(), @@ -218,7 +296,14 @@ def max(self): return res def res(self): - """Get all target values found and corresponding parametes.""" + """ + Get all target values found and corresponding parameters. + + Returns + ------- + list + a list of target values and their corresponding parameters + """ params = [dict(zip(self.keys, p)) for p in self.params] return [ diff --git a/src/sdk/pynni/nni/gp_tuner/util.py b/src/sdk/pynni/nni/gp_tuner/util.py index 9dac7ff499..ed30a22aab 100644 --- a/src/sdk/pynni/nni/gp_tuner/util.py +++ b/src/sdk/pynni/nni/gp_tuner/util.py @@ -17,9 +17,9 @@ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -''' -gp_tuner.py -''' +""" +utility functions and classes for GPTuner +""" import warnings import numpy as np @@ -28,9 +28,21 @@ def _match_val_type(vals, bounds): - ''' - Update values in the array, to match their corresponding type - ''' + """ + Update values in the array, to match their corresponding type, make sure the value is legal. + + Parameters + ---------- + vals : numpy array + values of parameters + bounds : numpy array + list of dictionary which stores parameters names and legal values. + + Returns + ------- + vals_new : list + The closest legal value to the original value + """ vals_new = [] for i, bound in enumerate(bounds): @@ -52,32 +64,33 @@ def acq_max(f_acq, gp, y_max, bounds, space, num_warmup, num_starting_points): A function to find the maximum of the acquisition function It uses a combination of random sampling (cheap) and the 'L-BFGS-B' - optimization method. First by sampling `n_warmup` (1e5) points at random, - and then running L-BFGS-B from `n_iter` (250) random starting points. + optimization method. First by sampling ``num_warmup`` points at random, + and then running L-BFGS-B from ``num_starting_points`` random starting points. Parameters ---------- - :param f_acq: + f_acq : UtilityFunction.utility The acquisition function object that return its point-wise value. - :param gp: + gp : GaussianProcessRegressor A gaussian process fitted to the relevant data. - :param y_max: + y_max : float The current maximum known value of the target function. - :param bounds: + bounds : numpy array The variables bounds to limit the search of the acq max. - :param num_warmup: + num_warmup : int number of times to randomly sample the aquisition function - :param num_starting_points: + num_starting_points : int number of times to run scipy.minimize Returns ------- - :return: x_max, The arg max of the acquisition function. + numpy array + The parameter which achieves max of the acquisition function. """ # Warm up with random points @@ -117,36 +130,70 @@ def acq_max(f_acq, gp, y_max, bounds, space, num_warmup, num_starting_points): class UtilityFunction(): """ - An object to compute the acquisition functions. + A class to compute different acquisition function values. + + Parameters + ---------- + kind : string + specification of utility function to use + kappa : float + parameter usedd for 'ucb' acquisition function + xi : float + parameter usedd for 'ei' and 'poi' acquisition function """ def __init__(self, kind, kappa, xi): - """ - If UCB is to be used, a constant kappa is needed. - """ - self.kappa = kappa - - self.xi = xi + self._kappa = kappa + self._xi = xi if kind not in ['ucb', 'ei', 'poi']: err = "The utility function " \ "{} has not been implemented, " \ "please choose one of ucb, ei, or poi.".format(kind) raise NotImplementedError(err) - self.kind = kind + self._kind = kind def utility(self, x, gp, y_max): - '''return utility function''' - if self.kind == 'ucb': - return self._ucb(x, gp, self.kappa) - if self.kind == 'ei': - return self._ei(x, gp, y_max, self.xi) - if self.kind == 'poi': - return self._poi(x, gp, y_max, self.xi) + """ + return utility function + + Parameters + ---------- + x : numpy array + parameters + gp : GaussianProcessRegressor + y_max : float + maximum target value observed so far + + Returns + ------- + function + return corresponding function, return None if parameter is illegal + """ + if self._kind == 'ucb': + return self._ucb(x, gp, self._kappa) + if self._kind == 'ei': + return self._ei(x, gp, y_max, self._xi) + if self._kind == 'poi': + return self._poi(x, gp, y_max, self._xi) return None @staticmethod def _ucb(x, gp, kappa): + """ + Upper Confidence Bound (UCB) utility function + + Parameters + ---------- + x : numpy array + parameters + gp : GaussianProcessRegressor + kappa : float + + Returns + ------- + float + """ with warnings.catch_warnings(): warnings.simplefilter("ignore") mean, std = gp.predict(x, return_std=True) @@ -155,6 +202,22 @@ def _ucb(x, gp, kappa): @staticmethod def _ei(x, gp, y_max, xi): + """ + Expected Improvement (EI) utility function + + Parameters + ---------- + x : numpy array + parameters + gp : GaussianProcessRegressor + y_max : float + maximum target value observed so far + xi : float + + Returns + ------- + float + """ with warnings.catch_warnings(): warnings.simplefilter("ignore") mean, std = gp.predict(x, return_std=True) @@ -164,6 +227,22 @@ def _ei(x, gp, y_max, xi): @staticmethod def _poi(x, gp, y_max, xi): + """ + Possibility Of Improvement (POI) utility function + + Parameters + ---------- + x : numpy array + parameters + gp : GaussianProcessRegressor + y_max : float + maximum target value observed so far + xi : float + + Returns + ------- + float + """ with warnings.catch_warnings(): warnings.simplefilter("ignore") mean, std = gp.predict(x, return_std=True) diff --git a/src/sdk/pynni/nni/medianstop_assessor/medianstop_assessor.py b/src/sdk/pynni/nni/medianstop_assessor/medianstop_assessor.py index a2543dc539..d2fee423c4 100644 --- a/src/sdk/pynni/nni/medianstop_assessor/medianstop_assessor.py +++ b/src/sdk/pynni/nni/medianstop_assessor/medianstop_assessor.py @@ -27,21 +27,21 @@ class MedianstopAssessor(Assessor): Parameters ---------- - optimize_mode: str + optimize_mode : str optimize mode, 'maximize' or 'minimize' - start_step: int + start_step : int only after receiving start_step number of reported intermediate results """ def __init__(self, optimize_mode='maximize', start_step=0): - self.start_step = start_step - self.running_history = dict() - self.completed_avg_history = dict() + self._start_step = start_step + self._running_history = dict() + self._completed_avg_history = dict() if optimize_mode == 'maximize': - self.high_better = True + self._high_better = True elif optimize_mode == 'minimize': - self.high_better = False + self._high_better = False else: - self.high_better = True + self._high_better = True logger.warning('unrecognized optimize_mode %s', optimize_mode) def _update_data(self, trial_job_id, trial_history): @@ -49,35 +49,35 @@ def _update_data(self, trial_job_id, trial_history): Parameters ---------- - trial_job_id: int + trial_job_id : int trial job id - trial_history: list + trial_history : list The history performance matrix of each trial """ - if trial_job_id not in self.running_history: - self.running_history[trial_job_id] = [] - self.running_history[trial_job_id].extend(trial_history[len(self.running_history[trial_job_id]):]) + if trial_job_id not in self._running_history: + self._running_history[trial_job_id] = [] + self._running_history[trial_job_id].extend(trial_history[len(self._running_history[trial_job_id]):]) def trial_end(self, trial_job_id, success): """trial_end Parameters ---------- - trial_job_id: int + trial_job_id : int trial job id - success: bool + success : bool True if succssfully finish the experiment, False otherwise """ - if trial_job_id in self.running_history: + if trial_job_id in self._running_history: if success: cnt = 0 history_sum = 0 - self.completed_avg_history[trial_job_id] = [] - for each in self.running_history[trial_job_id]: + self._completed_avg_history[trial_job_id] = [] + for each in self._running_history[trial_job_id]: cnt += 1 history_sum += each - self.completed_avg_history[trial_job_id].append(history_sum / cnt) - self.running_history.pop(trial_job_id) + self._completed_avg_history[trial_job_id].append(history_sum / cnt) + self._running_history.pop(trial_job_id) else: logger.warning('trial_end: trial_job_id does not exist in running_history') @@ -86,9 +86,9 @@ def assess_trial(self, trial_job_id, trial_history): Parameters ---------- - trial_job_id: int + trial_job_id : int trial job id - trial_history: list + trial_history : list The history performance matrix of each trial Returns @@ -102,7 +102,7 @@ def assess_trial(self, trial_job_id, trial_history): unrecognize exception in medianstop_assessor """ curr_step = len(trial_history) - if curr_step < self.start_step: + if curr_step < self._start_step: return AssessResult.Good try: @@ -115,18 +115,18 @@ def assess_trial(self, trial_job_id, trial_history): logger.exception(error) self._update_data(trial_job_id, num_trial_history) - if self.high_better: + if self._high_better: best_history = max(trial_history) else: best_history = min(trial_history) avg_array = [] - for id_ in self.completed_avg_history: - if len(self.completed_avg_history[id_]) >= curr_step: - avg_array.append(self.completed_avg_history[id_][curr_step - 1]) + for id_ in self._completed_avg_history: + if len(self._completed_avg_history[id_]) >= curr_step: + avg_array.append(self._completed_avg_history[id_][curr_step - 1]) if avg_array: avg_array.sort() - if self.high_better: + if self._high_better: median = avg_array[(len(avg_array)-1) // 2] return AssessResult.Bad if best_history < median else AssessResult.Good else: diff --git a/src/sdk/pynni/nni/tuner.py b/src/sdk/pynni/nni/tuner.py index 177232b7ed..a39ed9ff11 100644 --- a/src/sdk/pynni/nni/tuner.py +++ b/src/sdk/pynni/nni/tuner.py @@ -79,7 +79,8 @@ class Tuner(Recoverable): :class:`~nni.smac_tuner.smac_tuner.SMACTuner` :class:`~nni.gridsearch_tuner.gridsearch_tuner.GridSearchTuner` :class:`~nni.networkmorphism_tuner.networkmorphism_tuner.NetworkMorphismTuner` - :class:`~nni.metis_tuner.mets_tuner.MetisTuner` + :class:`~nni.metis_tuner.metis_tuner.MetisTuner` + :class:`~nni.gp_tuner.gp_tuner.GPTuner` """ def generate_parameters(self, parameter_id, **kwargs): From b37fbca8619807d96bdc8c313e25d9dcd784b004 Mon Sep 17 00:00:00 2001 From: Yuge Zhang Date: Mon, 11 Nov 2019 19:01:42 +0800 Subject: [PATCH 10/14] Show experiment name in nnictl experiment list (#1726) * show experiment name in nnictl list * remove author name in metadata --- tools/nni_cmd/config_utils.py | 3 ++- tools/nni_cmd/constants.py | 2 +- tools/nni_cmd/launcher.py | 7 ++++--- tools/nni_cmd/nnictl_utils.py | 29 +++++++++++++++++++++-------- 4 files changed, 28 insertions(+), 13 deletions(-) diff --git a/tools/nni_cmd/config_utils.py b/tools/nni_cmd/config_utils.py index c7c88bcf3e..670121e95b 100644 --- a/tools/nni_cmd/config_utils.py +++ b/tools/nni_cmd/config_utils.py @@ -72,7 +72,7 @@ def __init__(self): self.experiment_file = os.path.join(NNICTL_HOME_DIR, '.experiment') self.experiments = self.read_file() - def add_experiment(self, expId, port, time, file_name, platform): + def add_experiment(self, expId, port, time, file_name, platform, experiment_name): '''set {key:value} paris to self.experiment''' self.experiments[expId] = {} self.experiments[expId]['port'] = port @@ -81,6 +81,7 @@ def add_experiment(self, expId, port, time, file_name, platform): self.experiments[expId]['status'] = 'INITIALIZED' self.experiments[expId]['fileName'] = file_name self.experiments[expId]['platform'] = platform + self.experiments[expId]['experimentName'] = experiment_name self.write_file() def update_experiment(self, expId, key, value): diff --git a/tools/nni_cmd/constants.py b/tools/nni_cmd/constants.py index 0777d2db98..419433d0d8 100644 --- a/tools/nni_cmd/constants.py +++ b/tools/nni_cmd/constants.py @@ -66,7 +66,7 @@ '%s\n' \ '----------------------------------------------------------------------------------------\n' -EXPERIMENT_DETAIL_FORMAT = 'Id: %s Status: %s Port: %s Platform: %s StartTime: %s EndTime: %s \n' +EXPERIMENT_DETAIL_FORMAT = 'Id: %s Name: %s Status: %s Port: %s Platform: %s StartTime: %s EndTime: %s\n' EXPERIMENT_MONITOR_INFO = 'Id: %s Status: %s Port: %s Platform: %s \n' \ 'StartTime: %s Duration: %s' diff --git a/tools/nni_cmd/launcher.py b/tools/nni_cmd/launcher.py index f99f8dfe43..1452f343bb 100644 --- a/tools/nni_cmd/launcher.py +++ b/tools/nni_cmd/launcher.py @@ -478,10 +478,11 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen web_ui_url_list = get_local_urls(args.port) nni_config.set_config('webuiUrl', web_ui_url_list) - #save experiment information + # save experiment information nnictl_experiment_config = Experiments() - nnictl_experiment_config.add_experiment(experiment_id, args.port, start_time, config_file_name,\ - experiment_config['trainingServicePlatform']) + nnictl_experiment_config.add_experiment(experiment_id, args.port, start_time, config_file_name, + experiment_config['trainingServicePlatform'], + experiment_config['experimentName']) print_normal(EXPERIMENT_SUCCESS_INFO % (experiment_id, ' '.join(web_ui_url_list))) diff --git a/tools/nni_cmd/nnictl_utils.py b/tools/nni_cmd/nnictl_utils.py index 4cadce182d..8045be66d2 100644 --- a/tools/nni_cmd/nnictl_utils.py +++ b/tools/nni_cmd/nnictl_utils.py @@ -99,9 +99,13 @@ def check_experiment_id(args, update=True): print_error('There are multiple experiments, please set the experiment id...') experiment_information = "" for key in running_experiment_list: - experiment_information += (EXPERIMENT_DETAIL_FORMAT % (key, experiment_dict[key]['status'], \ - experiment_dict[key]['port'], experiment_dict[key].get('platform'), experiment_dict[key]['startTime'],\ - experiment_dict[key]['endTime'])) + experiment_information += EXPERIMENT_DETAIL_FORMAT % (key, + experiment_dict[key].get('experimentName', 'N/A'), + experiment_dict[key]['status'], + experiment_dict[key]['port'], + experiment_dict[key].get('platform'), + experiment_dict[key]['startTime'], + experiment_dict[key]['endTime']) print(EXPERIMENT_INFORMATION_FORMAT % experiment_information) exit(1) elif not running_experiment_list: @@ -155,9 +159,13 @@ def parse_ids(args): print_error('There are multiple experiments, please set the experiment id...') experiment_information = "" for key in running_experiment_list: - experiment_information += (EXPERIMENT_DETAIL_FORMAT % (key, experiment_dict[key]['status'], \ - experiment_dict[key]['port'], experiment_dict[key].get('platform'), experiment_dict[key]['startTime'], \ - experiment_dict[key]['endTime'])) + experiment_information += EXPERIMENT_DETAIL_FORMAT % (key, + experiment_dict[key].get('experimentName', 'N/A'), + experiment_dict[key]['status'], + experiment_dict[key]['port'], + experiment_dict[key].get('platform'), + experiment_dict[key]['startTime'], + experiment_dict[key]['endTime']) print(EXPERIMENT_INFORMATION_FORMAT % experiment_information) exit(1) else: @@ -573,8 +581,13 @@ def experiment_list(args): print_warning('There is no experiment running...\nYou can use \'nnictl experiment list --all\' to list all experiments.') experiment_information = "" for key in experiment_id_list: - experiment_information += (EXPERIMENT_DETAIL_FORMAT % (key, experiment_dict[key]['status'], experiment_dict[key]['port'],\ - experiment_dict[key].get('platform'), experiment_dict[key]['startTime'], experiment_dict[key]['endTime'])) + experiment_information += EXPERIMENT_DETAIL_FORMAT % (key, + experiment_dict[key].get('experimentName', 'N/A'), + experiment_dict[key]['status'], + experiment_dict[key]['port'], + experiment_dict[key].get('platform'), + experiment_dict[key]['startTime'], + experiment_dict[key]['endTime']) print(EXPERIMENT_INFORMATION_FORMAT % experiment_information) def get_time_interval(time1, time2): From 7c4e81b555cce29c5d319baf90b0e2d38dc169a4 Mon Sep 17 00:00:00 2001 From: Tang Lang Date: Mon, 11 Nov 2019 19:14:52 +0800 Subject: [PATCH 11/14] fix pruner export (#1727) --- src/sdk/pynni/nni/compression/torch/compressor.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/sdk/pynni/nni/compression/torch/compressor.py b/src/sdk/pynni/nni/compression/torch/compressor.py index 6a60a29cf0..65d2e90f13 100644 --- a/src/sdk/pynni/nni/compression/torch/compressor.py +++ b/src/sdk/pynni/nni/compression/torch/compressor.py @@ -206,6 +206,8 @@ def export_model(self, model_path, mask_path=None, onnx_path=None, input_shape=N """ assert model_path is not None, 'model_path must be specified' for name, m in self.bound_model.named_modules(): + if name == "": + continue mask = self.mask_dict.get(name) if mask is not None: mask_sum = mask.sum().item() From 81fcff86aa30fb188a66aea2bd682dc1ad08ec16 Mon Sep 17 00:00:00 2001 From: Cjkkkk <656569648@qq.com> Date: Tue, 12 Nov 2019 16:13:44 +0800 Subject: [PATCH 12/14] Api refactor (#1728) api refactor for compression, especially, quantization APIs --- docs/en_US/Compressor/Overview.md | 52 +++++++- .../pynni/nni/compression/torch/compressor.py | 112 +++++++++++++++--- src/sdk/pynni/tests/test_compressor.py | 9 +- 3 files changed, 147 insertions(+), 26 deletions(-) diff --git a/docs/en_US/Compressor/Overview.md b/docs/en_US/Compressor/Overview.md index 5fc8e45c5d..b078d748a6 100644 --- a/docs/en_US/Compressor/Overview.md +++ b/docs/en_US/Compressor/Overview.md @@ -180,12 +180,54 @@ class YourQuantizer(nni.compression.tensorflow.Quantizer): def quantize_weight(self, weight, config, **kwargs): """ - weight is the target weight tensor - config is the selected dict object in config_list for this layer - kwargs contains op, op_types, and op_name - design your quantizer and return new weight + quantize should overload this method to quantize weight tensors. + This method is effectively hooked to :meth:`forward` of the model. + + Parameters + ---------- + weight : Tensor + weight that needs to be quantized + config : dict + the configuration for weight quantization """ + + # Put your code to generate `new_weight` here + return new_weight + + def quantize_output(self, output, config, **kwargs): + """ + quantize should overload this method to quantize output. + This method is effectively hooked to `:meth:`forward` of the model. + + Parameters + ---------- + output : Tensor + output that needs to be quantized + config : dict + the configuration for output quantization + """ + + # Put your code to generate `new_output` here + + return new_output + + def quantize_input(self, *inputs, config, **kwargs): + """ + quantize should overload this method to quantize input. + This method is effectively hooked to :meth:`forward` of the model. + + Parameters + ---------- + inputs : Tensor + inputs that needs to be quantized + config : dict + the configuration for inputs quantization + """ + + # Put your code to generate `new_input` here + + return new_input # note for pytorch version, there is no sess in input arguments def update_epoch(self, epoch_num, sess): @@ -200,8 +242,6 @@ class YourQuantizer(nni.compression.tensorflow.Quantizer): pass ``` -__[TODO]__ Will add another member function `quantize_layer_output`, as some quantization algorithms also quantize layers' output. - ### Usage of user customized compression algorithm __[TODO]__ ... diff --git a/src/sdk/pynni/nni/compression/torch/compressor.py b/src/sdk/pynni/nni/compression/torch/compressor.py index 65d2e90f13..bb06524fba 100644 --- a/src/sdk/pynni/nni/compression/torch/compressor.py +++ b/src/sdk/pynni/nni/compression/torch/compressor.py @@ -32,7 +32,23 @@ def __init__(self, model, config_list): """ self.bound_model = model self.config_list = config_list - self.modules_to_compress = [] + self.modules_to_compress = None + + def detect_modules_to_compress(self): + """ + detect all modules should be compressed, and save the result in `self.modules_to_compress`. + + The model will be instrumented and user should never edit it after calling this method. + """ + if self.modules_to_compress is None: + self.modules_to_compress = [] + for name, module in self.bound_model.named_modules(): + layer = LayerInfo(name, module) + config = self.select_config(layer) + if config is not None: + self.modules_to_compress.append((layer, config)) + return self.modules_to_compress + def compress(self): """ @@ -41,12 +57,9 @@ def compress(self): The model will be instrumented and user should never edit it after calling this method. `self.modules_to_compress` records all the to-be-compressed layers """ - for name, module in self.bound_model.named_modules(): - layer = LayerInfo(name, module) - config = self.select_config(layer) - if config is not None: - self._instrument_layer(layer, config) - self.modules_to_compress.append((layer, config)) + modules_to_compress = self.detect_modules_to_compress() + for layer, config in modules_to_compress: + self._instrument_layer(layer, config) return self.bound_model def get_modules_to_compress(self): @@ -55,7 +68,7 @@ def get_modules_to_compress(self): Returns ------- - self.modules_to_compress : list + list a list of the layers, each of which is a tuple (`layer`, `config`), `layer` is `LayerInfo`, `config` is a `dict` """ @@ -72,7 +85,7 @@ def select_config(self, layer): Returns ------- - ret : config or None + config or None the retrieved configuration for this layer, if None, this layer should not be compressed """ @@ -240,26 +253,87 @@ class Quantizer(Compressor): """ def quantize_weight(self, weight, config, op, op_type, op_name): - """user should know where dequantize goes and implement it in quantize method - we now do not provide dequantize method + """ + quantize should overload this method to quantize weight. + This method is effectively hooked to :meth:`forward` of the model. + + Parameters + ---------- + weight : Tensor + weight that needs to be quantized + config : dict + the configuration for weight quantization """ raise NotImplementedError("Quantizer must overload quantize_weight()") + def quantize_output(self, output, config, op, op_type, op_name): + """ + quantize should overload this method to quantize output. + This method is effectively hooked to :meth:`forward` of the model. + + Parameters + ---------- + output : Tensor + output that needs to be quantized + config : dict + the configuration for output quantization + """ + raise NotImplementedError("Quantizer must overload quantize_output()") + + def quantize_input(self, *inputs, config, op, op_type, op_name): + """ + quantize should overload this method to quantize input. + This method is effectively hooked to :meth:`forward` of the model. + + Parameters + ---------- + inputs : Tensor + inputs that needs to be quantized + config : dict + the configuration for inputs quantization + """ + raise NotImplementedError("Quantizer must overload quantize_input()") + + def _instrument_layer(self, layer, config): + """ + Create a wrapper forward function to replace the original one. + + Parameters + ---------- + layer : LayerInfo + the layer to instrument the mask + config : dict + the configuration for quantization + """ assert layer._forward is None, 'Each model can only be compressed once' - if not _check_weight(layer.module): - _logger.warning('Module %s does not have parameter "weight"', layer.name) - return + assert "quant_types" in config, 'must provide quant_types in config' + assert isinstance(config["quant_types"], list), 'quant_types must be list type' + + if 'weight' in config["quant_types"]: + if not _check_weight(layer.module): + _logger.warning('Module %s does not have parameter "weight"', layer.name) layer._forward = layer.module.forward def new_forward(*inputs): - weight = layer.module.weight.data - new_weight = self.quantize_weight(weight, config, op=layer.module, op_type=layer.type, op_name=layer.name) - layer.module.weight.data = new_weight - return layer._forward(*inputs) + if 'input' in config["quant_types"]: + inputs = self.quantize_input(inputs, config=config, op=layer.module, op_type=layer.type, op_name=layer.name) + + if 'weight' in config["quant_types"] and _check_weight(layer.module): + weight = layer.module.weight.data + new_weight = self.quantize_weight(weight, config, op=layer.module, op_type=layer.type, op_name=layer.name) + layer.module.weight.data = new_weight + result = layer._forward(*inputs) + layer.module.weight.data = weight + else: + result = layer._forward(*inputs) - layer.module.forward = new_forward + if 'output' in config["quant_types"]: + result = self.quantize_output(result, config, op=layer.module, op_type=layer.type, op_name=layer.name) + return result + + layer.module.forward = new_forward def _check_weight(module): try: diff --git a/src/sdk/pynni/tests/test_compressor.py b/src/sdk/pynni/tests/test_compressor.py index ca8b628640..e4eb0bbe46 100644 --- a/src/sdk/pynni/tests/test_compressor.py +++ b/src/sdk/pynni/tests/test_compressor.py @@ -114,7 +114,14 @@ def test_torch_pruner(self): def test_torch_quantizer(self): model = TorchMnist() - torch_compressor.NaiveQuantizer(model, [{'op_types': ['default']}]).compress() + configure_list = [{ + 'quant_types': ['weight'], + 'quant_bits': { + 'weight': 8, + }, + 'op_types':['Conv2d', 'Linear'] + }] + torch_compressor.NaiveQuantizer(model, configure_list).compress() if __name__ == '__main__': From 926c42dec29c9a70db5f8669aa0ed3ce4d22978c Mon Sep 17 00:00:00 2001 From: QuanluZhang Date: Wed, 13 Nov 2019 08:11:21 +0800 Subject: [PATCH 13/14] improve doc docstring of gridsearch/smac/ppo (#1693) --- docs/en_US/sdk_reference.rst | 7 +- .../pynni/nni/gridsearch_tuner/__init__.py | 1 + .../nni/gridsearch_tuner/gridsearch_tuner.py | 119 ++++++++--- src/sdk/pynni/nni/ppo_tuner/__init__.py | 1 + src/sdk/pynni/nni/ppo_tuner/distri.py | 6 +- src/sdk/pynni/nni/ppo_tuner/model.py | 25 +-- src/sdk/pynni/nni/ppo_tuner/policy.py | 60 ++++-- src/sdk/pynni/nni/ppo_tuner/ppo_tuner.py | 197 ++++++++++++------ src/sdk/pynni/nni/ppo_tuner/util.py | 37 ++-- .../nni/smac_tuner/convert_ss_to_scenario.py | 15 +- src/sdk/pynni/nni/smac_tuner/smac_tuner.py | 109 ++++++---- src/sdk/pynni/nni/tuner.py | 7 +- 12 files changed, 400 insertions(+), 184 deletions(-) diff --git a/docs/en_US/sdk_reference.rst b/docs/en_US/sdk_reference.rst index de274fabec..6b4d6d8d79 100644 --- a/docs/en_US/sdk_reference.rst +++ b/docs/en_US/sdk_reference.rst @@ -24,10 +24,10 @@ Tuner .. autoclass:: nni.evolution_tuner.evolution_tuner.EvolutionTuner :members: -.. autoclass:: nni.smac_tuner.smac_tuner.SMACTuner +.. autoclass:: nni.smac_tuner.SMACTuner :members: -.. autoclass:: nni.gridsearch_tuner.gridsearch_tuner.GridSearchTuner +.. autoclass:: nni.gridsearch_tuner.GridSearchTuner :members: .. autoclass:: nni.networkmorphism_tuner.networkmorphism_tuner.NetworkMorphismTuner @@ -36,6 +36,9 @@ Tuner .. autoclass:: nni.metis_tuner.metis_tuner.MetisTuner :members: +.. autoclass:: nni.ppo_tuner.PPOTuner + :members: + .. autoclass:: nni.batch_tuner.batch_tuner.BatchTuner :members: diff --git a/src/sdk/pynni/nni/gridsearch_tuner/__init__.py b/src/sdk/pynni/nni/gridsearch_tuner/__init__.py index e69de29bb2..705d242e98 100644 --- a/src/sdk/pynni/nni/gridsearch_tuner/__init__.py +++ b/src/sdk/pynni/nni/gridsearch_tuner/__init__.py @@ -0,0 +1 @@ +from .gridsearch_tuner import GridSearchTuner \ No newline at end of file diff --git a/src/sdk/pynni/nni/gridsearch_tuner/gridsearch_tuner.py b/src/sdk/pynni/nni/gridsearch_tuner/gridsearch_tuner.py index ebe09b1cf3..7a8cd49dc7 100644 --- a/src/sdk/pynni/nni/gridsearch_tuner/gridsearch_tuner.py +++ b/src/sdk/pynni/nni/gridsearch_tuner/gridsearch_tuner.py @@ -17,10 +17,10 @@ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -''' +""" gridsearch_tuner.py including: class GridSearchTuner -''' +""" import copy import logging @@ -37,29 +37,40 @@ class GridSearchTuner logger = logging.getLogger('grid_search_AutoML') class GridSearchTuner(Tuner): - ''' + """ GridSearchTuner will search all the possible configures that the user define in the searchSpace. - The only acceptable types of search space are 'choice', 'quniform', 'randint' + The only acceptable types of search space are ``choice``, ``quniform``, ``randint`` - Type 'choice' will select one of the options. Note that it can also be nested. + Type ``choice`` will select one of the options. Note that it can also be nested. - Type 'quniform' will receive three values [low, high, q], where [low, high] specifies a range and 'q' specifies the interval - It will be sampled in a way that the first sampled value is 'low', + Type ``quniform`` will receive three values [``low``, ``high``, ``q``], + where [``low``, ``high``] specifies a range and ``q`` specifies the interval. + It will be sampled in a way that the first sampled value is ``low``, and each of the following values is 'interval' larger than the value in front of it. - Type 'randint' gives all possible intergers in range[low, high). Note that 'high' is not included. - ''' + Type ``randint`` gives all possible intergers in range[``low``, ``high``). Note that ``high`` is not included. + """ def __init__(self): self.count = -1 self.expanded_search_space = [] self.supplement_data = dict() - def json2parameter(self, ss_spec): - ''' - generate all possible configs for hyperparameters from hyperparameter space. - ss_spec: hyperparameter space - ''' + def _json2parameter(self, ss_spec): + """ + Generate all possible configs for hyperparameters from hyperparameter space. + + Parameters + ---------- + ss_spec : dict or list + Hyperparameter space or the ``_value`` of a hyperparameter + + Returns + ------- + list or dict + All the candidate choices of hyperparameters. for a hyperparameter, chosen_params + is a list. for multiple hyperparameters (e.g., search space), chosen_params is a dict. + """ if isinstance(ss_spec, dict): if '_type' in ss_spec.keys(): _type = ss_spec['_type'] @@ -67,7 +78,7 @@ def json2parameter(self, ss_spec): chosen_params = list() if _type == 'choice': for value in _value: - choice = self.json2parameter(value) + choice = self._json2parameter(value) if isinstance(choice, list): chosen_params.extend(choice) else: @@ -81,12 +92,12 @@ def json2parameter(self, ss_spec): else: chosen_params = dict() for key in ss_spec.keys(): - chosen_params[key] = self.json2parameter(ss_spec[key]) - return self.expand_parameters(chosen_params) + chosen_params[key] = self._json2parameter(ss_spec[key]) + return self._expand_parameters(chosen_params) elif isinstance(ss_spec, list): chosen_params = list() for subspec in ss_spec[1:]: - choice = self.json2parameter(subspec) + choice = self._json2parameter(subspec) if isinstance(choice, list): chosen_params.extend(choice) else: @@ -97,27 +108,39 @@ def json2parameter(self, ss_spec): return chosen_params def _parse_quniform(self, param_value): - '''parse type of quniform parameter and return a list''' + """ + Parse type of quniform parameter and return a list + """ low, high, q = param_value[0], param_value[1], param_value[2] return np.clip(np.arange(np.round(low/q), np.round(high/q)+1) * q, low, high) def _parse_randint(self, param_value): - '''parse type of randint parameter and return a list''' + """ + Parse type of randint parameter and return a list + """ return np.arange(param_value[0], param_value[1]).tolist() - def expand_parameters(self, para): - ''' + def _expand_parameters(self, para): + """ Enumerate all possible combinations of all parameters - para: {key1: [v11, v12, ...], key2: [v21, v22, ...], ...} - return: {{key1: v11, key2: v21, ...}, {key1: v11, key2: v22, ...}, ...} - ''' + + Parameters + ---------- + para : dict + {key1: [v11, v12, ...], key2: [v21, v22, ...], ...} + + Returns + ------- + dict + {{key1: v11, key2: v21, ...}, {key1: v11, key2: v22, ...}, ...} + """ if len(para) == 1: for key, values in para.items(): return list(map(lambda v: {key: v}, values)) key = list(para)[0] values = para.pop(key) - rest_para = self.expand_parameters(para) + rest_para = self._expand_parameters(para) ret_para = list() for val in values: for config in rest_para: @@ -126,12 +149,37 @@ def expand_parameters(self, para): return ret_para def update_search_space(self, search_space): - ''' - Check if the search space is valid and expand it: support only 'choice', 'quniform', randint' - ''' - self.expanded_search_space = self.json2parameter(search_space) + """ + Check if the search space is valid and expand it: support only ``choice``, ``quniform``, ``randint``. + + Parameters + ---------- + search_space : dict + The format could be referred to search space spec (https://nni.readthedocs.io/en/latest/Tutorial/SearchSpaceSpec.html). + """ + self.expanded_search_space = self._json2parameter(search_space) def generate_parameters(self, parameter_id, **kwargs): + """ + Generate parameters for one trial. + + Parameters + ---------- + parameter_id : int + The id for the generated hyperparameter + **kwargs + Not used + + Returns + ------- + dict + One configuration from the expanded search space. + + Raises + ------ + NoMoreTrialError + If all the configurations has been sent, raise :class:`~nni.NoMoreTrialError`. + """ self.count += 1 while self.count <= len(self.expanded_search_space) - 1: _params_tuple = convert_dict2tuple(self.expanded_search_space[self.count]) @@ -142,15 +190,20 @@ def generate_parameters(self, parameter_id, **kwargs): raise nni.NoMoreTrialError('no more parameters now.') def receive_trial_result(self, parameter_id, parameters, value, **kwargs): + """ + Receive a trial's final performance result reported through :func:`~nni.report_final_result` by the trial. + GridSearchTuner does not need trial's results. + """ pass def import_data(self, data): - """Import additional data for tuning + """ + Import additional data for tuning Parameters ---------- - data: - a list of dictionarys, each of which has at least two keys, 'parameter' and 'value' + list + A list of dictionarys, each of which has at least two keys, ``parameter`` and ``value`` """ _completed_num = 0 for trial_info in data: diff --git a/src/sdk/pynni/nni/ppo_tuner/__init__.py b/src/sdk/pynni/nni/ppo_tuner/__init__.py index e69de29bb2..ada7e57c23 100644 --- a/src/sdk/pynni/nni/ppo_tuner/__init__.py +++ b/src/sdk/pynni/nni/ppo_tuner/__init__.py @@ -0,0 +1 @@ +from .ppo_tuner import PPOTuner diff --git a/src/sdk/pynni/nni/ppo_tuner/distri.py b/src/sdk/pynni/nni/ppo_tuner/distri.py index 5f00843b3e..39f539c870 100644 --- a/src/sdk/pynni/nni/ppo_tuner/distri.py +++ b/src/sdk/pynni/nni/ppo_tuner/distri.py @@ -77,7 +77,7 @@ def sample_placeholder(self, prepend_shape, name=None): class CategoricalPd(Pd): """ - categorical prossibility distribution + Categorical prossibility distribution """ def __init__(self, logits, mask_npinf, nsteps, size, is_act_model): self.logits = logits @@ -154,7 +154,7 @@ def fromflat(cls, flat): class CategoricalPdType(PdType): """ - to create CategoricalPd + To create CategoricalPd """ def __init__(self, ncat, nsteps, np_mask, is_act_model): self.ncat = ncat @@ -180,7 +180,7 @@ def sample_dtype(self): def _matching_fc(tensor, name, size, nsteps, init_scale, init_bias, np_mask, is_act_model): """ - add fc op, and add mask op when not in action mode + Add fc op, and add mask op when not in action mode """ if tensor.shape[-1] == size: assert False diff --git a/src/sdk/pynni/nni/ppo_tuner/model.py b/src/sdk/pynni/nni/ppo_tuner/model.py index 330f10369d..e042eec3d9 100644 --- a/src/sdk/pynni/nni/ppo_tuner/model.py +++ b/src/sdk/pynni/nni/ppo_tuner/model.py @@ -28,21 +28,18 @@ class Model: """ We use this object to : - __init__: - - Creates the step_model - - Creates the train_model + __init__: + - Creates the step_model + - Creates the train_model - train(): - - Make the training part (feedforward and retropropagation of gradients) + train(): + - Make the training part (feedforward and retropropagation of gradients) - save/load(): - - Save load the model + save/load(): + - Save load the model """ def __init__(self, *, policy, nbatch_act, nbatch_train, nsteps, ent_coef, vf_coef, max_grad_norm, microbatch_size=None, np_mask=None): - """ - init - """ self.sess = sess = get_session() with tf.variable_scope('ppo2_model', reuse=tf.AUTO_REUSE): @@ -137,9 +134,13 @@ def __init__(self, *, policy, nbatch_act, nbatch_train, def train(self, lr, cliprange, obs, returns, masks, actions, values, neglogpacs, states=None): """ - train the model. + Train the model. Here we calculate advantage A(s,a) = R + yV(s') - V(s) - Returns = R + yV(s') + + Returns + ------- + obj + = R + yV(s') """ advs = returns - values diff --git a/src/sdk/pynni/nni/ppo_tuner/policy.py b/src/sdk/pynni/nni/ppo_tuner/policy.py index 980959a49e..309a60b46f 100644 --- a/src/sdk/pynni/nni/ppo_tuner/policy.py +++ b/src/sdk/pynni/nni/ppo_tuner/policy.py @@ -34,14 +34,20 @@ class PolicyWithValue: def __init__(self, env, observations, latent, estimate_q=False, vf_latent=None, sess=None, np_mask=None, is_act_model=False, **tensors): """ - Parameters: + Parameters ---------- - env: RL environment - observations: tensorflow placeholder in which the observations will be fed - latent: latent state from which policy distribution parameters should be inferred - vf_latent: latent state from which value function should be inferred (if None, then latent is used) - sess: tensorflow session to run calculations in (if None, default session is used) - **tensors: tensorflow tensors for additional attributes such as state or mask + env : obj + RL environment + observations : tensorflow placeholder + Tensorflow placeholder in which the observations will be fed + latent : tensor + Latent state from which policy distribution parameters should be inferred + vf_latent : tensor + Latent state from which value function should be inferred (if None, then latent is used) + sess : tensorflow session + Tensorflow session to run calculations in (if None, default session is used) + **tensors + Tensorflow tensors for additional attributes such as state or mask """ self.X = observations @@ -138,12 +144,14 @@ def step(self, step, observation, **extra_feed): """ Compute next action(s) given the observation(s) - Parameters: + Parameters ---------- - observation: observation data (either single or a batch) - **extra_feed: additional data such as state or mask (names of the arguments should match the ones in constructor, see __init__) + observation : np array + Observation data (either single or a batch) + **extra_feed + Additional data such as state or mask (names of the arguments should match the ones in constructor, see __init__) - Returns: + Returns ------- (action, value estimate, next state, negative log likelihood of the action under current policy parameters) tuple """ @@ -157,22 +165,40 @@ def value(self, ob, *args, **kwargs): """ Compute value estimate(s) given the observation(s) - Parameters: + Parameters ---------- - observation: observation data (either single or a batch) - **extra_feed: additional data such as state or mask (names of the arguments should match the ones in constructor, see __init__) + observation : np array + Observation data (either single or a batch) + **extra_feed + Additional data such as state or mask (names of the arguments should match the ones in constructor, see __init__) - Returns: + Returns ------- - value estimate + Value estimate """ return self._evaluate(self.vf, ob, *args, **kwargs) def build_lstm_policy(model_config, value_network=None, estimate_q=False, **policy_kwargs): """ - build lstm policy and value network, they share the same lstm network. + Build lstm policy and value network, they share the same lstm network. the parameters all use their default values. + + Parameter + --------- + model_config : obj + Configurations of the model + value_network : obj + The network for value function + estimate_q : bool + Whether to estimate ``q`` + **policy_kwargs + The kwargs for policy network, i.e., lstm model + + Returns + ------- + func + The policy network """ policy_network = lstm_model(**policy_kwargs) diff --git a/src/sdk/pynni/nni/ppo_tuner/ppo_tuner.py b/src/sdk/pynni/nni/ppo_tuner/ppo_tuner.py index 4b5009d45d..ef57148d5e 100644 --- a/src/sdk/pynni/nni/ppo_tuner/ppo_tuner.py +++ b/src/sdk/pynni/nni/ppo_tuner/ppo_tuner.py @@ -38,8 +38,10 @@ class PPOTuner logger = logging.getLogger('ppo_tuner_AutoML') -def constfn(val): - """wrap as function""" +def _constfn(val): + """ + Wrap as function + """ def f(_): return val return f @@ -90,7 +92,7 @@ def __init__(self, obs, actions, values, neglogpacs, dones, last_value, inf_batc def get_next(self): """ - get actions of the next trial + Get actions of the next trial """ if self.iter >= self.inf_batch_size: return None, None @@ -102,14 +104,14 @@ def get_next(self): def update_rewards(self, rewards, returns): """ - after the trial is finished, reward and return of this trial is updated + After the trial is finished, reward and return of this trial is updated """ self.rewards = rewards self.returns = returns def convert_shape(self): """ - convert shape + Convert shape """ def sf01(arr): """ @@ -138,9 +140,9 @@ def __init__(self, model_config, mask): set_global_seeds(None) assert isinstance(self.model_config.lr, float) - self.lr = constfn(self.model_config.lr) + self.lr = _constfn(self.model_config.lr) assert isinstance(self.model_config.cliprange, float) - self.cliprange = constfn(self.model_config.cliprange) + self.cliprange = _constfn(self.model_config.cliprange) # build lstm policy network, value share the same network policy = build_lstm_policy(model_config) @@ -165,12 +167,28 @@ def __init__(self, model_config, mask): def inference(self, num): """ - generate actions along with related info from policy network. + Generate actions along with related info from policy network. observation is the action of the last step. - Parameters: + Parameters ---------- - num: the number of trials to generate + num: int + The number of trials to generate + + Returns + ------- + mb_obs : list + Observation of the ``num`` configurations + mb_actions : list + Actions of the ``num`` configurations + mb_values : list + Values from the value function of the ``num`` configurations + mb_neglogpacs : list + ``neglogp`` of the ``num`` configurations + mb_dones : list + To show whether the play is done, always ``True`` + last_values : tensorflow tensor + The last values of the ``num`` configurations, got with session run """ # Here, we init the lists that will contain the mb of experiences mb_obs, mb_actions, mb_values, mb_dones, mb_neglogpacs = [], [], [], [], [] @@ -212,13 +230,15 @@ def inference(self, num): def compute_rewards(self, trials_info, trials_result): """ - compute the rewards of the trials in trials_info based on trials_result, + Compute the rewards of the trials in trials_info based on trials_result, and update the rewards in trials_info - Parameters: + Parameters ---------- - trials_info: info of the generated trials - trials_result: final results (e.g., acc) of the generated trials + trials_info : TrialsInfo + Info of the generated trials + trials_result : list + Final results (e.g., acc) of the generated trials """ mb_rewards = np.asarray([trials_result for _ in trials_info.actions], dtype=np.float32) # discount/bootstrap off value fn @@ -243,12 +263,14 @@ def compute_rewards(self, trials_info, trials_result): def train(self, trials_info, nenvs): """ - train the policy/value network using trials_info + Train the policy/value network using trials_info - Parameters: + Parameters ---------- - trials_info: complete info of the generated trials from the previous inference - nenvs: the batch size of the (previous) inference + trials_info : TrialsInfo + Complete info of the generated trials from the previous inference + nenvs : int + The batch size of the (previous) inference """ # keep frac decay for future optimization if self.cur_update <= self.nupdates: @@ -282,27 +304,40 @@ def train(self, trials_info, nenvs): class PPOTuner(Tuner): """ - PPOTuner + PPOTuner, the implementation inherits the main logic of the implementation + [ppo2 from openai](https://github.com/openai/baselines/tree/master/baselines/ppo2), and is adapted for NAS scenario. + It uses ``lstm`` for its policy network and value network, policy and value share the same network. """ def __init__(self, optimize_mode, trials_per_update=20, epochs_per_update=4, minibatch_size=4, ent_coef=0.0, lr=3e-4, vf_coef=0.5, max_grad_norm=0.5, gamma=0.99, lam=0.95, cliprange=0.2): """ - initialization, PPO model is not initialized here as search space is not received yet. + Initialization, PPO model is not initialized here as search space is not received yet. - Parameters: + Parameters ---------- - optimize_mode: maximize or minimize - trials_per_update: number of trials to have for each model update - epochs_per_update: number of epochs to run for each model update - minibatch_size: minibatch size (number of trials) for the update - ent_coef: policy entropy coefficient in the optimization objective - lr: learning rate of the model (lstm network), constant - vf_coef: value function loss coefficient in the optimization objective - max_grad_norm: gradient norm clipping coefficient - gamma: discounting factor - lam: advantage estimation discounting factor (lambda in the paper) - cliprange: cliprange in the PPO algorithm, constant + optimize_mode : str + maximize or minimize + trials_per_update : int + Number of trials to have for each model update + epochs_per_update : int + Number of epochs to run for each model update + minibatch_size : int + Minibatch size (number of trials) for the update + ent_coef : float + Policy entropy coefficient in the optimization objective + lr : float + Learning rate of the model (lstm network), constant + vf_coef : float + Value function loss coefficient in the optimization objective + max_grad_norm : float + Gradient norm clipping coefficient + gamma : float + Discounting factor + lam : float + Advantage estimation discounting factor (lambda in the paper) + cliprange : float + Cliprange in the PPO algorithm, constant """ self.optimize_mode = OptimizeMode(optimize_mode) self.model_config = ModelConfig() @@ -330,21 +365,25 @@ def __init__(self, optimize_mode, trials_per_update=20, epochs_per_update=4, min self.model_config.nminibatches = minibatch_size self.send_trial_callback = None - logger.info('=== finished PPOTuner initialization') + logger.info('Finished PPOTuner initialization') def _process_one_nas_space(self, block_name, block_space): """ - process nas space to determine observation space and action space + Process nas space to determine observation space and action space - Parameters: + Parameters ---------- - block_name: the name of the mutable block - block_space: search space of this mutable block + block_name : str + The name of the mutable block + block_space : dict + Search space of this mutable block - Returns: - ---------- - actions_spaces: list of the space of each action - actions_to_config: the mapping from action to generated configuration + Returns + ------- + actions_spaces : list + List of the space of each action + actions_to_config : list + The mapping from action to generated configuration """ actions_spaces = [] actions_to_config = [] @@ -385,7 +424,7 @@ def _process_one_nas_space(self, block_name, block_space): def _process_nas_space(self, search_space): """ - process nas search space to get action/observation space + Process nas search space to get action/observation space """ actions_spaces = [] actions_to_config = [] @@ -412,7 +451,7 @@ def _process_nas_space(self, search_space): def _generate_action_mask(self): """ - different step could have different action space. to deal with this case, we merge all the + Different step could have different action space. to deal with this case, we merge all the possible actions into one action space, and use mask to indicate available actions for each step """ two_masks = [] @@ -439,15 +478,13 @@ def _generate_action_mask(self): def update_search_space(self, search_space): """ - get search space, currently the space only includes that for NAS + Get search space, currently the space only includes that for NAS - Parameters: + Parameters ---------- - search_space: search space for NAS - - Returns: - ------- - no return + search_space : dict + Search space for NAS + the format could be referred to search space spec (https://nni.readthedocs.io/en/latest/Tutorial/SearchSpaceSpec.html). """ logger.info('=== update search space %s', search_space) assert self.search_space is None @@ -470,7 +507,7 @@ def update_search_space(self, search_space): def _actions_to_config(self, actions): """ - given actions, to generate the corresponding trial configuration + Given actions, to generate the corresponding trial configuration """ chosen_arch = copy.deepcopy(self.chosen_arch_template) for cnt, act in enumerate(actions): @@ -490,6 +527,19 @@ def _actions_to_config(self, actions): def generate_multiple_parameters(self, parameter_id_list, **kwargs): """ Returns multiple sets of trial (hyper-)parameters, as iterable of serializable objects. + + Parameters + ---------- + parameter_id_list : list of int + Unique identifiers for each set of requested hyper-parameters. + These will later be used in :meth:`receive_trial_result`. + **kwargs + Not used + + Returns + ------- + list + A list of newly generated configurations """ result = [] self.send_trial_callback = kwargs['st_callback'] @@ -506,7 +556,17 @@ def generate_multiple_parameters(self, parameter_id_list, **kwargs): def generate_parameters(self, parameter_id, **kwargs): """ - generate parameters, if no trial configration for now, self.credit plus 1 to send the config later + Generate parameters, if no trial configration for now, self.credit plus 1 to send the config later + + parameter_id : int + Unique identifier for requested hyper-parameters. This will later be used in :meth:`receive_trial_result`. + **kwargs + Not used + + Returns + ------- + dict + One newly generated configuration """ if self.first_inf: self.trials_result = [None for _ in range(self.inf_batch_size)] @@ -527,6 +587,7 @@ def generate_parameters(self, parameter_id, **kwargs): def _next_round_inference(self): """ + Run a inference to generate next batch of configurations """ self.finished_trials = 0 self.model.compute_rewards(self.trials_info, self.trials_result) @@ -554,8 +615,17 @@ def _next_round_inference(self): def receive_trial_result(self, parameter_id, parameters, value, **kwargs): """ - receive trial's result. if the number of finished trials equals self.inf_batch_size, start the next update to - train the model + Receive trial's result. if the number of finished trials equals self.inf_batch_size, start the next update to + train the model. + + Parameters + ---------- + parameter_id : int + Unique identifier of used hyper-parameters, same with :meth:`generate_parameters`. + parameters : dict + Hyper-parameters generated by :meth:`generate_parameters`. + value : dict + Result from trial (the return value of :func:`nni.report_final_result`). """ trial_info_idx = self.running_trials.pop(parameter_id, None) assert trial_info_idx is not None @@ -572,7 +642,17 @@ def receive_trial_result(self, parameter_id, parameters, value, **kwargs): def trial_end(self, parameter_id, success, **kwargs): """ - to deal with trial failure + To deal with trial failure. If a trial fails, it is popped out from ``self.running_trials``, + and the final result of this trial is assigned with the average of the finished trials. + + Parameters + ---------- + parameter_id : int + Unique identifier for hyper-parameters used by this trial. + success : bool + True if the trial successfully completed; False if failed or terminated. + **kwargs + Not used """ if not success: if parameter_id not in self.running_trials: @@ -582,7 +662,7 @@ def trial_end(self, parameter_id, success, **kwargs): assert trial_info_idx is not None # use mean of finished trials as the result of this failed trial values = [val for val in self.trials_result if val is not None] - logger.warning('zql values: %s', values) + logger.warning('In trial_end, values: %s', values) self.trials_result[trial_info_idx] = (sum(values) / len(values)) if values else 0 self.finished_trials += 1 if self.finished_trials == self.inf_batch_size: @@ -590,10 +670,11 @@ def trial_end(self, parameter_id, success, **kwargs): def import_data(self, data): """ - Import additional data for tuning + Import additional data for tuning, not supported yet. Parameters ---------- - data: a list of dictionarys, each of which has at least two keys, 'parameter' and 'value' + data : list + A list of dictionarys, each of which has at least two keys, ``parameter`` and ``value`` """ logger.warning('PPOTuner cannot leverage imported data.') diff --git a/src/sdk/pynni/nni/ppo_tuner/util.py b/src/sdk/pynni/nni/ppo_tuner/util.py index acf704accc..5e553045d7 100644 --- a/src/sdk/pynni/nni/ppo_tuner/util.py +++ b/src/sdk/pynni/nni/ppo_tuner/util.py @@ -94,12 +94,14 @@ def lstm_model(nlstm=128, layer_norm=False): An example of usage of lstm-based policy can be found here: common/tests/test_doc_examples.py/test_lstm_example - Parameters: + Parameters ---------- - nlstm: int LSTM hidden state size - layer_norm: bool if True, layer-normalized version of LSTM is used + nlstm : int + LSTM hidden state size + layer_norm : bool + if True, layer-normalized version of LSTM is used - Returns: + Returns ------- function that builds LSTM with a given input tensor / placeholder """ @@ -171,11 +173,15 @@ def adjust_shape(placeholder, data): adjust shape of the data to the shape of the placeholder if possible. If shape is incompatible, AssertionError is thrown - Parameters: - placeholder: tensorflow input placeholder - data: input data to be (potentially) reshaped to be fed into placeholder + Parameters + ---------- + placeholder + tensorflow input placeholder + data + input data to be (potentially) reshaped to be fed into placeholder - Returns: + Returns + ------- reshaped data """ if not isinstance(data, np.ndarray) and not isinstance(data, list): @@ -230,13 +236,16 @@ def observation_placeholder(ob_space, batch_size=None, name='Ob'): """ Create placeholder to feed observations into of the size appropriate to the observation space - Parameters: + Parameters ---------- - ob_space: gym.Space observation space - batch_size: int size of the batch to be fed into input. Can be left None in most cases. - name: str name of the placeholder - - Returns: + ob_space : gym.Space + observation space + batch_size : int + size of the batch to be fed into input. Can be left None in most cases. + name : str + name of the placeholder + + Returns ------- tensorflow placeholder tensor """ diff --git a/src/sdk/pynni/nni/smac_tuner/convert_ss_to_scenario.py b/src/sdk/pynni/nni/smac_tuner/convert_ss_to_scenario.py index 527bd41d34..e61b9fe8d0 100644 --- a/src/sdk/pynni/nni/smac_tuner/convert_ss_to_scenario.py +++ b/src/sdk/pynni/nni/smac_tuner/convert_ss_to_scenario.py @@ -24,11 +24,14 @@ def get_json_content(file_path): - """Load json file content + """ + Load json file content + Parameters ---------- file_path: path to the file + Raises ------ TypeError @@ -43,7 +46,8 @@ def get_json_content(file_path): def generate_pcs(nni_search_space_content): - """Generate the Parameter Configuration Space (PCS) which defines the + """ + Generate the Parameter Configuration Space (PCS) which defines the legal ranges of the parameters to be optimized and their default values. Generally, the format is: # parameter_name categorical {value_1, ..., value_N} [default value] @@ -53,14 +57,17 @@ def generate_pcs(nni_search_space_content): # parameter_name real [min_value, max_value] [default value] # parameter_name real [min_value, max_value] [default value] log Reference: https://automl.github.io/SMAC3/stable/options.html + Parameters ---------- nni_search_space_content: search_space The search space in this experiment in nni + Returns ------- Parameter Configuration Space (PCS) the legal ranges of the parameters to be optimized and their default values + Raises ------ RuntimeError @@ -122,7 +129,8 @@ def dump_categorical(fd, key, categories): def generate_scenario(ss_content): - """Generate the scenario. The scenario-object (smac.scenario.scenario.Scenario) is used to configure SMAC and + """ + Generate the scenario. The scenario-object (smac.scenario.scenario.Scenario) is used to configure SMAC and can be constructed either by providing an actual scenario-object, or by specifing the options in a scenario file. Reference: https://automl.github.io/SMAC3/stable/options.html The format of the scenario file is one option per line: @@ -191,6 +199,7 @@ def generate_scenario(ss_content): wallclock_limit: int Maximum amount of wallclock-time used for optimization. Default: inf. Use default because this is controlled by nni + Returns ------- Scenario: diff --git a/src/sdk/pynni/nni/smac_tuner/smac_tuner.py b/src/sdk/pynni/nni/smac_tuner/smac_tuner.py index 4e2f876b9e..eed1b27946 100644 --- a/src/sdk/pynni/nni/smac_tuner/smac_tuner.py +++ b/src/sdk/pynni/nni/smac_tuner/smac_tuner.py @@ -41,13 +41,17 @@ class SMACTuner(Tuner): """ - Parameters - ---------- - optimize_mode: str - optimize mode, 'maximize' or 'minimize', by default 'maximize' + This is a wrapper of [SMAC](https://github.com/automl/SMAC3) following NNI tuner interface. + It only supports ``SMAC`` mode, and does not support the multiple instances of SMAC3 (i.e., + the same configuration is run multiple times). """ def __init__(self, optimize_mode="maximize"): - """Constructor""" + """ + Parameters + ---------- + optimize_mode : str + Optimize mode, 'maximize' or 'minimize', by default 'maximize' + """ self.logger = logging.getLogger( self.__module__ + "." + self.__class__.__name__) self.optimize_mode = OptimizeMode(optimize_mode) @@ -61,11 +65,14 @@ def __init__(self, optimize_mode="maximize"): self.cs = None def _main_cli(self): - """Main function of SMAC for CLI interface + """ + Main function of SMAC for CLI interface. Some initializations of the wrapped SMAC are done + in this function. + Returns ------- - instance - optimizer + obj + The object of the SMAC optimizer """ self.logger.info("SMAC call: %s", " ".join(sys.argv)) @@ -126,20 +133,23 @@ def _main_cli(self): def update_search_space(self, search_space): """ - NOTE: updating search space is not supported. + Convert search_space to the format that ``SMAC3`` could recognize, thus, not all the search space types + are supported. In this function, we also do the initialization of `SMAC3`, i.e., calling ``self._main_cli``. + + NOTE: updating search space during experiment running is not supported. + Parameters ---------- - search_space: dict - search space + search_space : dict + The format could be referred to search space spec (https://nni.readthedocs.io/en/latest/Tutorial/SearchSpaceSpec.html). """ - # TODO: this is ugly, we put all the initialization work in this method, because initialization relies - # on search space, also because update_search_space is called at the beginning. - if not self.update_ss_done: self.categorical_dict = generate_scenario(search_space) if self.categorical_dict is None: raise RuntimeError('categorical dict is not correctly returned after parsing search space.') + # TODO: this is ugly, we put all the initialization work in this method, because initialization relies + # on search space, also because update_search_space is called at the beginning. self.optimizer = self._main_cli() self.smbo_solver = self.optimizer.solver self.loguniform_key = {key for key in search_space.keys() if search_space[key]['_type'] == 'loguniform'} @@ -148,19 +158,23 @@ def update_search_space(self, search_space): self.logger.warning('update search space is not supported.') def receive_trial_result(self, parameter_id, parameters, value, **kwargs): - """receive_trial_result + """ + Receive a trial's final performance result reported through :func:``nni.report_final_result`` by the trial. + GridSearchTuner does not need trial's results. + Parameters ---------- - parameter_id: int - parameter id - parameters: - parameters - value: - value + parameter_id : int + Unique identifier of used hyper-parameters, same with :meth:`generate_parameters`. + parameters : dict + Hyper-parameters generated by :meth:`generate_parameters`. + value : dict + Result from trial (the return value of :func:`nni.report_final_result`). + Raises ------ RuntimeError - Received parameter id not in total_data + Received parameter id not in ``self.total_data`` """ reward = extract_scalar_reward(value) if self.optimize_mode is OptimizeMode.Maximize: @@ -176,14 +190,16 @@ def receive_trial_result(self, parameter_id, parameters, value, **kwargs): def param_postprocess(self, challenger_dict): """ - Postprocessing for a set of parameter includes: - 1. Convert the values of type `loguniform` back to their initial range. - 2. Convert categorical: categorical values in search space are changed to list of numbers before, - those original values will be changed back in this function. + Postprocessing for a set of hyperparameters includes: + 1. Convert the values of type ``loguniform`` back to their initial range. + 2. Convert ``categorical``: categorical values in search space are changed to list of numbers before, + those original values will be changed back in this function. + Parameters ---------- - challenger_dict: dict + challenger_dict : dict challenger dict + Returns ------- dict @@ -203,15 +219,21 @@ def param_postprocess(self, challenger_dict): return converted_dict def generate_parameters(self, parameter_id, **kwargs): - """generate one instance of hyperparameters + """ + Generate one instance of hyperparameters (i.e., one configuration). + Get one from SMAC3's ``challengers``. + Parameters ---------- - parameter_id: int - parameter id + parameter_id : int + Unique identifier for requested hyper-parameters. This will later be used in :meth:`receive_trial_result`. + **kwargs + Not used + Returns ------- - list - new generated parameters + dict + One newly generated configuration """ if self.first_one: init_challenger = self.smbo_solver.nni_smac_start() @@ -224,15 +246,23 @@ def generate_parameters(self, parameter_id, **kwargs): return self.param_postprocess(challenger.get_dictionary()) def generate_multiple_parameters(self, parameter_id_list, **kwargs): - """generate mutiple instances of hyperparameters + """ + Generate mutiple instances of hyperparameters. If it is a first request, + retrieve the instances from initial challengers. While if it is not, request + new challengers and retrieve instances from the requested challengers. + Parameters ---------- - parameter_id_list: list - list of parameter id + parameter_id_list: list of int + Unique identifiers for each set of requested hyper-parameters. + These will later be used in :meth:`receive_trial_result`. + **kwargs + Not used + Returns ------- list - list of new generated parameters + a list of newly generated configurations """ if self.first_one: params = [] @@ -254,11 +284,12 @@ def generate_multiple_parameters(self, parameter_id_list, **kwargs): def import_data(self, data): """ - Import additional data for tuning + Import additional data for tuning. + Parameters ---------- - data: list of dict - Each of which has at least two keys, `parameter` and `value`. + data : list of dict + Each of which has at least two keys, ``parameter`` and ``value``. """ _completed_num = 0 for trial_info in data: diff --git a/src/sdk/pynni/nni/tuner.py b/src/sdk/pynni/nni/tuner.py index a39ed9ff11..6a280a2856 100644 --- a/src/sdk/pynni/nni/tuner.py +++ b/src/sdk/pynni/nni/tuner.py @@ -76,10 +76,11 @@ class Tuner(Recoverable): Builtin tuners: :class:`~nni.hyperopt_tuner.hyperopt_tuner.HyperoptTuner` :class:`~nni.evolution_tuner.evolution_tuner.EvolutionTuner` - :class:`~nni.smac_tuner.smac_tuner.SMACTuner` - :class:`~nni.gridsearch_tuner.gridsearch_tuner.GridSearchTuner` + :class:`~nni.smac_tuner.SMACTuner` + :class:`~nni.gridsearch_tuner.GridSearchTuner` :class:`~nni.networkmorphism_tuner.networkmorphism_tuner.NetworkMorphismTuner` - :class:`~nni.metis_tuner.metis_tuner.MetisTuner` + :class:`~nni.metis_tuner.mets_tuner.MetisTuner` + :class:`~nni.ppo_tuner.PPOTuner` :class:`~nni.gp_tuner.gp_tuner.GPTuner` """ From 187494aafdda77bc22a5309c068528a132db26fc Mon Sep 17 00:00:00 2001 From: Yan Ni Date: Wed, 13 Nov 2019 08:15:03 +0800 Subject: [PATCH 14/14] update docstring doc (#1645) --- docs/en_US/Tutorial/Contributing.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/en_US/Tutorial/Contributing.md b/docs/en_US/Tutorial/Contributing.md index acfd0a5068..2dea24bc6c 100644 --- a/docs/en_US/Tutorial/Contributing.md +++ b/docs/en_US/Tutorial/Contributing.md @@ -40,6 +40,9 @@ A person looking to contribute can take up an issue by claiming it as a comment/ ## Code Styles & Naming Conventions * We follow [PEP8](https://www.python.org/dev/peps/pep-0008/) for Python code and naming conventions, do try to adhere to the same when making a pull request or making a change. One can also take the help of linters such as `flake8` or `pylint` * We also follow [NumPy Docstring Style](https://www.sphinx-doc.org/en/master/usage/extensions/example_numpy.html#example-numpy) for Python Docstring Conventions. During the [documentation building](Contributing.md#documentation), we use [sphinx.ext.napoleon](https://www.sphinx-doc.org/en/master/usage/extensions/napoleon.html) to generate Python API documentation from Docstring. +* For docstrings, please refer to [numpydoc docstring guide](https://numpydoc.readthedocs.io/en/latest/format.html) and [pandas docstring guide](https://python-sprints.github.io/pandas/guide/pandas_docstring.html) + * For function docstring, **description**, **Parameters**, and **Returns**/**Yields** are mandatory. + * For class docstring, **description**, **Attributes** are mandatory. ## Documentation Our documentation is built with [sphinx](http://sphinx-doc.org/), supporting [Markdown](https://guides.github.com/features/mastering-markdown/) and [reStructuredText](http://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html) format. All our documentations are placed under [docs/en_US](https://github.com/Microsoft/nni/tree/master/docs). @@ -48,4 +51,4 @@ Our documentation is built with [sphinx](http://sphinx-doc.org/), supporting [Ma * For links, please consider using __relative paths__ first. However, if the documentation is written in Markdown format, and: * It's an image link which needs to be formatted with embedded html grammar, please use global URL like `https://user-images.githubusercontent.com/44491713/51381727-e3d0f780-1b4f-11e9-96ab-d26b9198ba65.png`, which can be automatically generated by dragging picture onto [Github Issue](https://github.com/Microsoft/nni/issues/new) Box. - * It cannot be re-formatted by sphinx, such as source code, please use its global URL. For source code that links to our github repo, please use URLs rooted at `https://github.com/Microsoft/nni/tree/master/` ([mnist.py](https://github.com/Microsoft/nni/blob/master/examples/trials/mnist/mnist.py) for example). \ No newline at end of file + * It cannot be re-formatted by sphinx, such as source code, please use its global URL. For source code that links to our github repo, please use URLs rooted at `https://github.com/Microsoft/nni/tree/master/` ([mnist.py](https://github.com/Microsoft/nni/blob/master/examples/trials/mnist/mnist.py) for example).