diff --git a/docs/en_US/Compressor/Pruner.md b/docs/en_US/Compressor/Pruner.md index e059834eca..9efcce8e7b 100644 --- a/docs/en_US/Compressor/Pruner.md +++ b/docs/en_US/Compressor/Pruner.md @@ -38,7 +38,7 @@ Tensorflow code ```python from nni.compression.tensorflow import LevelPruner config_list = [{ 'sparsity': 0.8, 'op_types': ['default'] }] -pruner = LevelPruner(model_graph, config_list) +pruner = LevelPruner(model, config_list) pruner.compress() ``` @@ -117,17 +117,6 @@ FPGMPruner prune filters with the smallest geometric median. ### Usage -Tensorflow code -```python -from nni.compression.tensorflow import FPGMPruner -config_list = [{ - 'sparsity': 0.5, - 'op_types': ['Conv2D'] -}] -pruner = FPGMPruner(model, config_list) -pruner.compress() -``` - PyTorch code ```python from nni.compression.torch import FPGMPruner @@ -146,11 +135,6 @@ pruner.compress() .. autoclass:: nni.compression.torch.FPGMPruner ``` -##### Tensorflow -```eval_rst -.. autoclass:: nni.compression.tensorflow.FPGMPruner -``` - ## L1Filter Pruner This is an one-shot pruner, In ['PRUNING FILTERS FOR EFFICIENT CONVNETS'](https://arxiv.org/abs/1608.08710), authors Hao Li, Asim Kadav, Igor Durdanovic, Hanan Samet and Hans Peter Graf. @@ -383,12 +367,6 @@ You can view [example](https://github.com/microsoft/nni/blob/master/examples/mod .. autoclass:: nni.compression.torch.AGPPruner ``` -##### Tensorflow - -```eval_rst -.. autoclass:: nni.compression.tensorflow.AGPPruner -``` - *** ## NetAdapt Pruner @@ -620,4 +598,4 @@ pruner.compress(eval_args=[model], finetune_args=[model]) ```eval_rst .. autoclass:: nni.compression.torch.SensitivityPruner -``` \ No newline at end of file +``` diff --git a/docs/zh_CN/Compressor/Pruner.md b/docs/zh_CN/Compressor/Pruner.md index f78b8e0f1c..d11829e4b5 100644 --- a/docs/zh_CN/Compressor/Pruner.md +++ b/docs/zh_CN/Compressor/Pruner.md @@ -37,7 +37,7 @@ TensorFlow 代码 ```python from nni.compression.tensorflow import LevelPruner config_list = [{ 'sparsity': 0.8, 'op_types': ['default'] }] -pruner = LevelPruner(model_graph, config_list) +pruner = LevelPruner(model, config_list) pruner.compress() ``` @@ -102,16 +102,6 @@ pruner.compress() ### 用法 -TensorFlow 代码 -```python -from nni.compression.tensorflow import FPGMPruner -config_list = [{ - 'sparsity': 0.5, - 'op_types': ['Conv2D'] -}] -pruner = FPGMPruner(model, config_list) -pruner.compress() -``` PyTorch 代码 ```python from nni.compression.torch import FPGMPruner diff --git a/examples/model_compress/model_prune_tf.py b/examples/model_compress/model_prune_tf.py new file mode 100644 index 0000000000..99e8278df4 --- /dev/null +++ b/examples/model_compress/model_prune_tf.py @@ -0,0 +1,82 @@ +import argparse + +import tensorflow as tf + +import nni.compression.tensorflow + +prune_config = { + 'level': { + 'dataset_name': 'mnist', + 'model_name': 'naive', + 'pruner_class': nni.compression.tensorflow.LevelPruner, + 'config_list': [{ + 'sparsity': 0.9, + 'op_types': ['default'], + }] + }, +} + + +def get_dataset(dataset_name='mnist'): + assert dataset_name == 'mnist' + + (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data() + x_train = x_train[..., tf.newaxis] / 255.0 + x_test = x_test[..., tf.newaxis] / 255.0 + return (x_train, y_train), (x_test, y_test) + + +def create_model(model_name='naive'): + assert model_name == 'naive' + return tf.keras.Sequential([ + tf.keras.layers.Conv2D(filters=20, kernel_size=5), + tf.keras.layers.BatchNormalization(), + tf.keras.layers.ReLU(), + tf.keras.layers.MaxPool2D(pool_size=2), + tf.keras.layers.Conv2D(filters=20, kernel_size=5), + tf.keras.layers.BatchNormalization(), + tf.keras.layers.ReLU(), + tf.keras.layers.MaxPool2D(pool_size=2), + tf.keras.layers.Flatten(), + tf.keras.layers.Dense(units=500), + tf.keras.layers.ReLU(), + tf.keras.layers.Dense(units=10), + tf.keras.layers.Softmax() + ]) + + +def create_pruner(model, pruner_name): + pruner_class = prune_config[pruner_name]['pruner_class'] + config_list = prune_config[pruner_name]['config_list'] + return pruner_class(model, config_list) + + +def main(args): + model_name = prune_config[args.pruner_name]['model_name'] + dataset_name = prune_config[args.pruner_name]['dataset_name'] + train_set, test_set = get_dataset(dataset_name) + model = create_model(model_name) + + optimizer = tf.keras.optimizers.SGD(learning_rate=0.1, momentum=0.9, decay=1e-4) + model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy']) + + print('start training') + model.fit(train_set[0], train_set[1], batch_size=args.batch_size, epochs=args.pretrain_epochs, validation_data=test_set) + + print('start model pruning') + optimizer_finetune = tf.keras.optimizers.SGD(learning_rate=0.001, momentum=0.9, decay=1e-4) + pruner = create_pruner(model, args.pruner_name) + model = pruner.compress() + model.compile(optimizer=optimizer_finetune, loss='sparse_categorical_crossentropy', metrics=['accuracy']) + model.fit(train_set[0], train_set[1], batch_size=args.batch_size, epochs=args.prune_epochs, validation_data=test_set) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--pruner_name', type=str, default='level') + parser.add_argument('--batch_size', type=int, default=256) + parser.add_argument('--pretrain_epochs', type=int, default=10) + parser.add_argument('--prune_epochs', type=int, default=10) + + args = parser.parse_args() + main(args) diff --git a/src/sdk/pynni/nni/compression/tensorflow/__init__.py b/src/sdk/pynni/nni/compression/tensorflow/__init__.py index 45b6c4e7b8..00d41ee55b 100644 --- a/src/sdk/pynni/nni/compression/tensorflow/__init__.py +++ b/src/sdk/pynni/nni/compression/tensorflow/__init__.py @@ -1,6 +1,5 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. -from .compressor import LayerInfo, Compressor, Pruner, Quantizer -from .builtin_pruners import * -from .builtin_quantizers import * +from .compressor import Compressor, Pruner +from .pruning import * diff --git a/src/sdk/pynni/nni/compression/tensorflow/builtin_pruners.py b/src/sdk/pynni/nni/compression/tensorflow/builtin_pruners.py deleted file mode 100644 index 89ea1a722d..0000000000 --- a/src/sdk/pynni/nni/compression/tensorflow/builtin_pruners.py +++ /dev/null @@ -1,195 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT license. - -import logging -import numpy as np -import tensorflow as tf -from .compressor import Pruner - -__all__ = ['LevelPruner', 'AGPPruner', 'FPGMPruner'] - -_logger = logging.getLogger(__name__) - - -class LevelPruner(Pruner): - """ - Parameters - ---------- - model : tensorflow model - Model to be pruned - config_list : list - Supported keys: - - sparsity : This is to specify the sparsity operations to be compressed to. - - op_types : Operation types to prune. - """ - def __init__(self, model, config_list): - super().__init__(model, config_list) - self.mask_list = {} - self.if_init_list = {} - - def calc_mask(self, layer, config): - weight = layer.weight - op_name = layer.name - if self.if_init_list.get(op_name, True): - threshold = tf.contrib.distributions.percentile(tf.abs(weight), config['sparsity'] * 100) - mask = tf.cast(tf.math.greater(tf.abs(weight), threshold), weight.dtype) - self.mask_list.update({op_name: mask}) - self.if_init_list.update({op_name: False}) - else: - mask = self.mask_list[op_name] - return mask - - -class AGPPruner(Pruner): - """ - Parameters - ---------- - model : torch.nn.Module - Model to be pruned. - config_list : listlist - Supported keys: - - initial_sparsity: This is to specify the sparsity when compressor starts to compress. - - final_sparsity: This is to specify the sparsity when compressor finishes to compress. - - start_epoch: This is to specify the epoch number when compressor starts to compress, default start from epoch 0. - - end_epoch: This is to specify the epoch number when compressor finishes to compress. - - frequency: This is to specify every *frequency* number epochs compressor compress once, default frequency=1. - """ - - def __init__(self, model, config_list): - super().__init__(model, config_list) - self.mask_list = {} - self.if_init_list = {} - self.now_epoch = tf.Variable(0) - self.assign_handler = [] - - def calc_mask(self, layer, config): - weight = layer.weight - op_name = layer.name - start_epoch = config.get('start_epoch', 0) - freq = config.get('frequency', 1) - if self.now_epoch >= start_epoch and self.if_init_list.get(op_name, True) and ( - self.now_epoch - start_epoch) % freq == 0: - target_sparsity = self.compute_target_sparsity(config) - threshold = tf.contrib.distributions.percentile(weight, target_sparsity * 100) - # stop gradient in case gradient change the mask - mask = tf.stop_gradient(tf.cast(tf.math.greater(weight, threshold), weight.dtype)) - self.assign_handler.append(tf.assign(weight, weight * mask)) - self.mask_list.update({op_name: tf.constant(mask)}) - self.if_init_list.update({op_name: False}) - else: - mask = self.mask_list[op_name] - return mask - - def compute_target_sparsity(self, config): - end_epoch = config.get('end_epoch', 1) - start_epoch = config.get('start_epoch', 0) - freq = config.get('frequency', 1) - final_sparsity = config.get('final_sparsity', 0) - initial_sparsity = config.get('initial_sparsity', 0) - - if end_epoch <= start_epoch or initial_sparsity >= final_sparsity: - _logger.warning('your end epoch <= start epoch or initial_sparsity >= final_sparsity') - return final_sparsity - - now_epoch = tf.minimum(self.now_epoch, tf.constant(end_epoch)) - span = int(((end_epoch - start_epoch - 1) // freq) * freq) - assert span > 0 - base = tf.cast(now_epoch - start_epoch, tf.float32) / span - target_sparsity = (final_sparsity + - (initial_sparsity - final_sparsity) * - (tf.pow(1.0 - base, 3))) - return target_sparsity - - def update_epoch(self, epoch, sess): - sess.run(self.assign_handler) - sess.run(tf.assign(self.now_epoch, int(epoch))) - for k in self.if_init_list: - self.if_init_list[k] = True - - -class FPGMPruner(Pruner): - """ - Parameters - ---------- - model : tensorflow model - Model to be pruned - config_list : list - Supported keys: - - sparsity : percentage of convolutional filters to be pruned. - - op_types : Only Conv2d is supported in FPGM Pruner. - """ - def __init__(self, model, config_list): - super().__init__(model, config_list) - self.mask_dict = {} - self.assign_handler = [] - self.epoch_pruned_layers = set() - - def calc_mask(self, layer, config): - """ - Supports Conv1D, Conv2D - filter dimensions for Conv1D: - LEN: filter length - IN: number of input channel - OUT: number of output channel - - filter dimensions for Conv2D: - H: filter height - W: filter width - IN: number of input channel - OUT: number of output channel - - Parameters - ---------- - layer : LayerInfo - calculate mask for `layer`'s weight - config : dict - the configuration for generating the mask - """ - - weight = layer.weight - op_type = layer.type - op_name = layer.name - assert 0 <= config.get('sparsity') < 1 - assert op_type in ['Conv1D', 'Conv2D'] - assert op_type in config['op_types'] - - if layer.name in self.epoch_pruned_layers: - assert layer.name in self.mask_dict - return self.mask_dict.get(layer.name) - - try: - w = tf.stop_gradient(tf.transpose(tf.reshape(weight, (-1, weight.shape[-1])), [1, 0])) - masks = np.ones(w.shape) - num_filters = w.shape[0] - num_prune = int(num_filters * config.get('sparsity')) - if num_filters < 2 or num_prune < 1: - return masks - min_gm_idx = self._get_min_gm_kernel_idx(w, num_prune) - - for idx in min_gm_idx: - masks[idx] = 0. - finally: - masks = tf.reshape(tf.transpose(masks, [1, 0]), weight.shape) - masks = tf.Variable(masks) - self.mask_dict.update({op_name: masks}) - self.epoch_pruned_layers.add(layer.name) - - return masks - - def _get_min_gm_kernel_idx(self, weight, n): - dist_list = [] - for out_i in range(weight.shape[0]): - dist_sum = self._get_distance_sum(weight, out_i) - dist_list.append((dist_sum, out_i)) - min_gm_kernels = sorted(dist_list, key=lambda x: x[0])[:n] - return [x[1] for x in min_gm_kernels] - - def _get_distance_sum(self, weight, out_idx): - anchor_w = tf.tile(tf.expand_dims(weight[out_idx], 0), [weight.shape[0], 1]) - x = weight - anchor_w - x = tf.math.reduce_sum((x*x), -1) - x = tf.math.sqrt(x) - return tf.math.reduce_sum(x) - - def update_epoch(self, epoch): - self.epoch_pruned_layers = set() diff --git a/src/sdk/pynni/nni/compression/tensorflow/builtin_quantizers.py b/src/sdk/pynni/nni/compression/tensorflow/builtin_quantizers.py deleted file mode 100644 index 3f54cbfb12..0000000000 --- a/src/sdk/pynni/nni/compression/tensorflow/builtin_quantizers.py +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT license. - -import logging -import tensorflow as tf -from .compressor import Quantizer - -__all__ = ['NaiveQuantizer', 'QAT_Quantizer', 'DoReFaQuantizer'] - -_logger = logging.getLogger(__name__) - - -class NaiveQuantizer(Quantizer): - """quantize weight to 8 bits - """ - def __init__(self, model, config_list): - super().__init__(model, config_list) - self.layer_scale = {} - - def quantize_weight(self, weight, config, op_name, **kwargs): - new_scale = tf.reduce_max(tf.abs(weight)) / 127 - scale = tf.maximum(self.layer_scale.get(op_name, tf.constant(0.0)), new_scale) - self.layer_scale[op_name] = scale - orig_type = weight.dtype - return tf.cast(tf.cast(weight / scale, tf.int8), orig_type) * scale - - -class QAT_Quantizer(Quantizer): - """Quantizer using the Quantization and Training scheme, as defined in: - Quantization and Training of Neural Networks for Efficient Integer-Arithmetic-Only Inference - http://openaccess.thecvf.com/content_cvpr_2018/papers/Jacob_Quantization_and_Training_CVPR_2018_paper.pdf - """ - def __init__(self, model, config_list): - """ - config_list: supported keys: - - q_bits - """ - super().__init__(model, config_list) - - def quantize_weight(self, weight, config, **kwargs): - a = tf.stop_gradient(tf.reduce_min(weight)) - b = tf.stop_gradient(tf.reduce_max(weight)) - n = tf.cast(2 ** config['q_bits'], tf.float32) - scale = b-a/(n-1) - - # use gradient_override_map to change round to idetity for gradient - with tf.get_default_graph().gradient_override_map({'Round': 'Identity'}): - qw = tf.round((weight-a)/scale)*scale +a - - return qw - - -class DoReFaQuantizer(Quantizer): - """Quantizer using the DoReFa scheme, as defined in: - Zhou et al., DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients - (https://arxiv.org/abs/1606.06160) - """ - def __init__(self, model, config_list): - """ - config_list: supported keys: - - q_bits - """ - super().__init__(model, config_list) - - def quantize_weight(self, weight, config, **kwargs): - a = tf.math.tanh(weight) - b = a/(2*tf.reduce_max(tf.abs(weight))) + 0.5 - - scale = pow(2, config['q_bits'] - 1) - # use gradient_override_map to change round to idetity for gradient - with tf.get_default_graph().gradient_override_map({'Round': 'Identity'}): - qw = tf.round(b*scale)/scale - r_qw = 2 * qw - 1 - return r_qw diff --git a/src/sdk/pynni/nni/compression/tensorflow/compressor.py b/src/sdk/pynni/nni/compression/tensorflow/compressor.py index 62580738a3..bbe4a21a52 100644 --- a/src/sdk/pynni/nni/compression/tensorflow/compressor.py +++ b/src/sdk/pynni/nni/compression/tensorflow/compressor.py @@ -1,204 +1,300 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. +""" +Abstract base classes for TensorFlow model compression. +""" + import logging import tensorflow as tf from . import default_layers -tf.config.experimental_run_functions_eagerly(True) _logger = logging.getLogger(__name__) class LayerInfo: - def __init__(self, keras_layer): - self.keras_layer = keras_layer - self.name = keras_layer.name - self.type = default_layers.get_op_type(type(keras_layer)) - self.weight_index = default_layers.get_weight_index(self.type) - if self.weight_index is not None: - self.weight = keras_layer.weights[self.weight_index] - self._call = None + """ + This structure contains all infomation needed to compress a TensorFlow ``Layer``. + + + Attributes + ---------- + layer : tf.keras.layers.Layer + The layer. + name : str + The layer's name. Note that it's local to sub-model and may differ from its attribute name. + type : str + Name of the layer's class. + path : list of str/int + The layer object's and its parents' attribute name / list index. + For example, if the path is `['cells', 2, 'conv']`, then the layer can be accessed as `model.cells[2].conv`. + config : JSON object + Selected configuration for this layer. The format is detailed in tutorial. + + Parameters + ---------- + layer : tf.keras.layers.Layer + See attributes section. + path : list of str/int + See attributes section. + """ + + def __init__(self, layer, path=None): + self.layer = layer + self.name = layer.name + self.type = type(layer).__name__ + self.path = path + self.config = None + class Compressor: """ - Abstract base TensorFlow compressor + Common base class for all compressors. + + This class is designed for other base classes. + Algorithms should inherit ``Pruner`` or ``Quantizer`` instead. + + + Attributes + ---------- + bound_model : tf.keras.Model + Compressed user model. + wrappers : list of tf.keras.Model + A wrapper is an instrumented TF ``Layer``, in ``Model`` format. + The list is ordered by preorder traversal. + + Parameters + ---------- + LayerWrapperClass : a class derive from Model + The class used to instrument layers. + model : tf.keras.Model + The user model to be compressed. + config_list : list of JSON object + User configuration. The format is detailed in tutorial. """ - def __init__(self, model, config_list): - """ - Record necessary info in class members + def __init__(self, LayerWrapperClass, model, config_list): + assert isinstance(model, tf.keras.Model) + self.validate_config(model, config_list) - Parameters - ---------- - model : keras model - the model user wants to compress - config_list : list - the configurations that users specify for compression - """ self.bound_model = model - self.config_list = config_list - self.modules_to_compress = [] + self.wrappers = [] - def detect_modules_to_compress(self): - """ - detect all modules should be compressed, and save the result in `self.modules_to_compress`. + for layer_info in _detect_layers_to_compress(model, config_list): + self.wrappers.append(LayerWrapperClass(layer_info, self)) + if not self.wrappers: + _logger.warning('Nothing is configured to compress, please check your model and config list') - The model will be instrumented and user should never edit it after calling this method. - """ - if self.modules_to_compress is None: - self.modules_to_compress = [] - for keras_layer in self.bound_model.layers: - layer = LayerInfo(keras_layer) - config = self.select_config(layer) - if config is not None: - self.modules_to_compress.append((layer, config)) - return self.modules_to_compress + _instrument_model(model, self.wrappers) - def compress(self): + def set_wrappers_attribute(self, name, value): """ - Compress the model with algorithm implemented by subclass. - - The model will be instrumented and user should never edit it after calling this method. - `self.modules_to_compress` records all the to-be-compressed layers + Call ``setattr`` on all wrappers. """ - modules_to_compress = self.detect_modules_to_compress() - for layer, config in modules_to_compress: - self._instrument_layer(layer, config) - return self.bound_model + for wrapper in self.wrappers: + setattr(wrapper, name, value) - def get_modules_to_compress(self): - """ - To obtain all the to-be-compressed layers. - Returns - ------- - self.modules_to_compress : list - a list of the layers, each of which is a tuple (`layer`, `config`), - `layer` is `LayerInfo`, `config` is a `dict` - """ - return self.modules_to_compress +class Pruner(Compressor): + """ + Base class for pruning algorithms. - def select_config(self, layer): - """ - Find the configuration for `layer` by parsing `self.config_list` + End users should use ``compress`` and callback APIs (WIP) to prune their models. - Parameters - ---------- - layer: LayerInfo - one layer + The underlying model is instrumented upon initialization of pruner object. + So if you want to pre-train the model, train it before creating pruner object. - Returns - ------- - ret : config or None - the retrieved configuration for this layer, if None, this layer should - not be compressed - """ - ret = None - if layer.type is None: - return None - for config in self.config_list: - config = config.copy() - config['op_types'] = self._expand_config_op_types(config) - if layer.type not in config['op_types']: - continue - if config.get('op_names') and layer.name not in config['op_names']: - continue - ret = config - if ret is None or ret.get('exclude'): - return None - return ret + The compressed model can only execute in eager mode. - def update_epoch(self, epoch): - """ - If user want to update model every epoch, user can override this method. - This method should be called at the beginning of each epoch + Algorithm developers should override ``calc_masks`` method to specify pruning strategy. - Parameters - ---------- - epoch : num - the current epoch number - """ + Parameters + ---------- + model : tf.keras.Model + The user model to prune. + config_list : list of JSON object + User configuration. The format is detailed in tutorial. + """ + def __init__(self, model, config_list): + super().__init__(PrunerLayerWrapper, model, config_list) + #self.callback = PrunerCallback(self) - def step(self): - """ - If user want to update mask every step, user can override this method + def compress(self): """ + Apply compression on a pre-trained model. + If you want to prune the model during training, use callback API (WIP) instead. - def _instrument_layer(self, layer, config): + Returns + ------- + tf.keras.Model + The compressed model, for convenience. This is exactly the same object to constructor argument. """ - This method is implemented in the subclasses, i.e., `Pruner` and `Quantizer` + self._update_mask() + return self.bound_model - Parameters - ---------- - layer : LayerInfo - the layer to instrument the compression operation - config : dict - the configuration for compressing this layer + def calc_masks(self, wrapper, **kwargs): """ - raise NotImplementedError() - - def _expand_config_op_types(self, config): - if config is None: - return [] - op_types = [] - - for op_type in config.get('op_types', []): - if op_type == 'default': - op_types.extend(default_layers.default_layers) - else: - op_types.append(op_type) - return op_types + Abstract method to be overridden by algorithm. End users should ignore it. - -class Pruner(Compressor): - """ - Abstract base TensorFlow pruner - """ - - def calc_mask(self, layer, config): - """ - Pruners should overload this method to provide mask for weight tensors. - The mask must have the same shape and type comparing to the weight. - It will be applied with `mul()` operation on the weight. - This method is effectively hooked to `forward()` method of the model. + If the callback is set up, this method will be invoked at end of each training minibatch. + If not, it will only be called when end user invokes ``compress``. Parameters ---------- - layer : LayerInfo - calculate mask for `layer`'s weight - config : dict - the configuration for generating the mask - """ - raise NotImplementedError("Pruners must overload calc_mask()") + wrapper : PrunerLayerWrapper + The instrumented layer. + **kwargs + Reserved for forward compatibility. - def _instrument_layer(self, layer, config): - """ - Create a wrapper forward function to replace the original one. - - Parameters - ---------- - layer : LayerInfo - the layer to instrument the mask - config : dict - the configuration for generating the mask + Returns + ------- + dict of (str, tf.Tensor), or None + The key is weight ``Variable``'s name. The value is a mask ``Tensor`` of weight's shape and dtype. + If a weight's key does not appear in the return value, that weight will not be pruned. + Returning ``None`` means the mask is not changed since last time. + Weight names are globally unique, e.g. `model/conv_1/kernel:0`. """ - layer._call = layer.keras_layer.call + # TODO: maybe it should be able to calc on weight-granularity, beside from layer-granularity + raise NotImplementedError("Pruners must overload calc_masks()") - def new_call(*inputs): - weights = [x.numpy() for x in layer.keras_layer.weights] - mask = self.calc_mask(layer, config) - weights[layer.weight_index] = weights[layer.weight_index] * mask - layer.keras_layer.set_weights(weights) - ret = layer._call(*inputs) - return ret + def _update_mask(self): + for wrapper_idx, wrapper in enumerate(self.wrappers): + masks = self.calc_masks(wrapper, wrapper_idx=wrapper_idx) + if masks is not None: + wrapper.masks = masks - layer.keras_layer.call = new_call -class Quantizer(Compressor): +class PrunerLayerWrapper(tf.keras.Model): """ - Abstract base TensorFlow quantizer + Instrumented TF layer. + + Wrappers will be passed to pruner's ``calc_masks`` API, + and the pruning algorithm should use wrapper's attributes to calculate masks. + + Once instrumented, underlying layer's weights will get **modified** by masks before forward pass. + + Attributes + ---------- + layer_info : LayerInfo + All static information of the original layer. + layer : tf.keras.layers.Layer + The original layer. + config : JSON object + Selected configuration. The format is detailed in tutorial. + pruner : Pruner + Bound pruner object. + masks : dict of (str, tf.Tensor) + Current masks. The key is weight's name and the value is mask tensor. + On initialization, `masks` is an empty dict, which means no weight is pruned. + Afterwards, `masks` is the last return value of ``Pruner.calc_masks``. + See ``Pruner.calc_masks`` for details. """ - - def quantize_weight(self, weight, config, op, op_type, op_name): - raise NotImplementedError("Quantizer must overload quantize_weight()") + def __init__(self, layer_info, pruner): + super().__init__() + self.layer_info = layer_info + self.layer = layer_info.layer + self.config = layer_info.config + self.pruner = pruner + self.masks = {} + _logger.info('Layer detected to compress: %s', self.layer.name) + + def call(self, *inputs): + new_weights = [] + for weight in self.layer.weights: + mask = self.masks.get(weight.name) + if mask is not None: + new_weights.append(tf.math.multiply(weight, mask).numpy()) + else: + new_weights.append(weight.numpy()) + self.layer.set_weights(new_weights) + return self.layer(*inputs) + + +# TODO: designed to replace `patch_optimizer` +#class PrunerCallback(tf.keras.callbacks.Callback): +# def __init__(self, pruner): +# super().__init__() +# self._pruner = pruner +# +# def on_train_batch_end(self, batch, logs=None): +# self._pruner.update_mask() + + +def _detect_layers_to_compress(model, config_list): + # Returns list of LayerInfo. + located_layers = _locate_layers(model) + ret = [] + for layer in model.layers: + config = _select_config(LayerInfo(layer), config_list) + if config is not None: + if id(layer) not in located_layers: + _logger.error('Failed to locate layer %s in model. The layer will not be compressed. ' + 'This is a bug in NNI, feel free to fire an issue.', layer.name) + continue + layer_info = located_layers[id(layer)] + layer_info.config = config + ret.append(layer_info) + return ret + +def _locate_layers(model, cur_path=[]): + # Find out how to access layers from model object. + # Returns dict of (layer's object ID, LayerInfo). + # This function is required because TF framework does not track layer's attribute name, + # and to my knowledge `Layer.name` is only useful for read-only access. + # `cur_path`s format is documented in `LayerInfo.path`. + # TODO: it can only find layers in `Model` and `list` for now. + ret = {} + + if isinstance(model, tf.keras.Model): + for key, value in model.__dict__.items(): + if isinstance(value, tf.keras.Model): + ret.update(_locate_layers(value, cur_path + [key])) + elif isinstance(value, list): + ret.update(_locate_layers(value, cur_path + [key])) + elif isinstance(value, tf.keras.layers.Layer): + ret[id(value)] = LayerInfo(value, cur_path + [key]) + + elif isinstance(model, list): + for i, item in enumerate(model): + if isinstance(item, tf.keras.Model): + ret.update(_locate_layers(item, cur_path + [i])) + elif isinstance(item, tf.keras.layers.Layer): + ret[id(item)] = LayerInfo(item, cur_path + [i]) + + else: + raise ValueError('Unexpected model type: {}'.format(type(model))) + return ret + +def _select_config(layer_info, config_list): + # Find the last matching config block for given layer. + # Returns None if the layer should not be compressed. + ret = None + for config in config_list: + if 'op_types' in config: + match = layer_info.type in config['op_types'] + match_default = 'default' in config['op_types'] and layer_info.type in default_layers.weighted_modules + if not match and not match_default: + continue + if 'op_names' in config and layer_info.name not in config['op_names']: + continue + ret = config + if ret is None or 'exclude' in ret: + return None + return ret + + +def _instrument_model(model, wrappers): + # Replace layers to wrappers + for wrapper in reversed(wrappers): + cur = model + for key in wrapper.layer_info.path[:-1]: + if isinstance(key, int): + cur = cur[key] + else: + cur = getattr(cur, key) + key = wrapper.layer_info.path[-1] + if isinstance(key, int): + cur[key] = wrapper + else: + setattr(cur, key, wrapper) diff --git a/src/sdk/pynni/nni/compression/tensorflow/default_layers.py b/src/sdk/pynni/nni/compression/tensorflow/default_layers.py index 2ecc46e3e3..0c729bd883 100644 --- a/src/sdk/pynni/nni/compression/tensorflow/default_layers.py +++ b/src/sdk/pynni/nni/compression/tensorflow/default_layers.py @@ -1,31 +1,9 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. -from tensorflow import keras - -supported_layers = { - keras.layers.Conv1D: ('Conv1D', 0), - keras.layers.Conv2D: ('Conv2D', 0), - keras.layers.Conv2DTranspose: ('Conv2DTranspose', 0), - keras.layers.Conv3D: ('Conv3D', 0), - keras.layers.Conv3DTranspose: ('Conv3DTranspose', 0), - keras.layers.ConvLSTM2D: ('ConvLSTM2D', 0), - keras.layers.Dense: ('Dense', 0), - keras.layers.Embedding: ('Embedding', 0), - keras.layers.GRU: ('GRU', 0), - keras.layers.LSTM: ('LSTM', 0), -} - -default_layers = [x[0] for x in supported_layers.values()] - -def get_op_type(layer_type): - if layer_type in supported_layers: - return supported_layers[layer_type][0] - else: - return None - -def get_weight_index(op_type): - for k in supported_layers: - if supported_layers[k][0] == op_type: - return supported_layers[k][1] - return None +weighted_modules = [ + 'Conv1D', 'Conv2D', 'Conv3D', 'Conv1DTranspose', 'Conv2DTranspose', 'Conv3DTranspose', + 'Dense', + 'PReLU', + 'Embedding', +] diff --git a/src/sdk/pynni/nni/compression/tensorflow/pruning/__init__.py b/src/sdk/pynni/nni/compression/tensorflow/pruning/__init__.py new file mode 100644 index 0000000000..f8ac8ea9b9 --- /dev/null +++ b/src/sdk/pynni/nni/compression/tensorflow/pruning/__init__.py @@ -0,0 +1 @@ +from .one_shot import * diff --git a/src/sdk/pynni/nni/compression/tensorflow/pruning/one_shot.py b/src/sdk/pynni/nni/compression/tensorflow/pruning/one_shot.py new file mode 100644 index 0000000000..ace3d39e4e --- /dev/null +++ b/src/sdk/pynni/nni/compression/tensorflow/pruning/one_shot.py @@ -0,0 +1,67 @@ +import tensorflow as tf + +from ..compressor import Pruner + +__all__ = [ + 'OneshotPruner', + 'LevelPruner', +] + +class OneshotPruner(Pruner): + def __init__(self, model, config_list, pruning_algorithm='level', **algo_kwargs): + super().__init__(model, config_list) + self.set_wrappers_attribute('calculated', False) + self.masker = MASKER_DICT[pruning_algorithm](model, self, **algo_kwargs) + + def validate_config(self, model, config_list): + pass # TODO + + def calc_masks(self, wrapper, wrapper_idx=None): + if wrapper.calculated: + return None + sparsity = wrapper.config['sparsity'] + masks = self.masker.calc_masks(sparsity, wrapper, wrapper_idx) + if masks is not None: + wrapper.calculated = True + return masks + + +class LevelPruner(OneshotPruner): + def __init__(self, model, config_list): + super().__init__(model, config_list, pruning_algorithm='level') + + +class WeightMasker: + def __init__(self, model, pruner, **kwargs): + self.model = model + self.pruner = pruner + + def calc_masks(self, sparsity, wrapper, wrapper_idx=None): + raise NotImplementedError() + + +class LevelPrunerMasker(WeightMasker): + def calc_masks(self, sparsity, wrapper, wrapper_idx=None): + masks = {} + for weight_variable in wrapper.layer.weights: + if weight_variable.name == 'bias': + continue + + k = int(tf.size(weight_variable).numpy() * sparsity) + if k == 0: + continue + + weight = weight_variable.read_value() + if wrapper.masks.get(weight_variable.name) is not None: + weight = tf.math.multiply(weight, wrapper.masks[weight_variable.name]) + + w_abs = tf.math.abs(tf.reshape(weight, [-1])) + threshold = tf.math.top_k(w_abs, k)[0][0] + mask = tf.math.greater(w_abs, threshold) + masks[weight_variable.name] = tf.cast(mask, weight.dtype) + return masks + + +MASKER_DICT = { + 'level': LevelPrunerMasker, +} diff --git a/src/sdk/pynni/tests/test_compressor.py b/src/sdk/pynni/tests/test_compressor_torch.py similarity index 87% rename from src/sdk/pynni/tests/test_compressor.py rename to src/sdk/pynni/tests/test_compressor_torch.py index 87afb5f23c..8d631da25a 100644 --- a/src/sdk/pynni/tests/test_compressor.py +++ b/src/sdk/pynni/tests/test_compressor_torch.py @@ -3,33 +3,12 @@ from unittest import TestCase, main import numpy as np -import tensorflow as tf import torch import torch.nn.functional as F import schema import nni.compression.torch as torch_compressor import math -if tf.__version__ >= '2.0': - import nni.compression.tensorflow as tf_compressor - - -def get_tf_model(): - model = tf.keras.models.Sequential([ - tf.keras.layers.Conv2D(filters=5, kernel_size=7, input_shape=[28, 28, 1], activation='relu', padding="SAME"), - tf.keras.layers.MaxPooling2D(pool_size=2), - tf.keras.layers.Conv2D(filters=10, kernel_size=3, activation='relu', padding="SAME"), - tf.keras.layers.MaxPooling2D(pool_size=2), - tf.keras.layers.Flatten(), - tf.keras.layers.Dense(units=128, activation='relu'), - tf.keras.layers.Dropout(0.5), - tf.keras.layers.Dense(units=10, activation='softmax'), - ]) - model.compile(loss="sparse_categorical_crossentropy", - optimizer=tf.keras.optimizers.SGD(lr=1e-3), - metrics=["accuracy"]) - return model - class TorchModel(torch.nn.Module): def __init__(self): @@ -52,13 +31,6 @@ def forward(self, x): return F.log_softmax(x, dim=1) -def tf2(func): - def test_tf2_func(*args): - if tf.__version__ >= '2.0': - func(*args) - - return test_tf2_func - class CompressorTestCase(TestCase): def test_torch_quantizer_modules_detection(self): # test if modules can be detected @@ -92,11 +64,6 @@ def test_torch_level_pruner(self): configure_list = [{'sparsity': 0.8, 'op_types': ['default']}] torch_compressor.LevelPruner(model, configure_list, optimizer).compress() - @tf2 - def test_tf_level_pruner(self): - configure_list = [{'sparsity': 0.8, 'op_types': ['default']}] - tf_compressor.LevelPruner(get_tf_model(), configure_list).compress() - def test_torch_naive_quantizer(self): model = TorchModel() configure_list = [{ @@ -108,10 +75,6 @@ def test_torch_naive_quantizer(self): }] torch_compressor.NaiveQuantizer(model, configure_list).compress() - @tf2 - def test_tf_naive_quantizer(self): - tf_compressor.NaiveQuantizer(get_tf_model(), [{'op_types': ['default']}]).compress() - def test_torch_fpgm_pruner(self): """ With filters(kernels) weights defined as above (w), it is obvious that w[4] and w[5] is the Geometric Median @@ -141,23 +104,7 @@ def test_torch_fpgm_pruner(self): masks = pruner.calc_mask(model.conv2) assert all(torch.sum(masks['weight_mask'], (1, 2, 3)).numpy() == np.array([125., 125., 0., 0., 0., 0., 0., 0., 125., 125.])) - @tf2 - def test_tf_fpgm_pruner(self): - w = np.array([np.ones((5, 3, 3)) * (i+1) for i in range(10)]).astype(np.float32) - model = get_tf_model() - config_list = [{'sparsity': 0.2, 'op_types': ['Conv2D']}] - - pruner = tf_compressor.FPGMPruner(model, config_list) - weights = model.layers[2].weights - weights[0] = np.array(w).astype(np.float32).transpose([2, 3, 0, 1]).transpose([0, 1, 3, 2]) - model.layers[2].set_weights([weights[0], weights[1].numpy()]) - - layer = tf_compressor.compressor.LayerInfo(model.layers[2]) - masks = pruner.calc_mask(layer, config_list[0]).numpy() - masks = masks.reshape((-1, masks.shape[-1])).transpose([1, 0]) - - assert all(masks.sum((1)) == np.array([45., 45., 45., 45., 0., 0., 45., 45., 45., 45.])) - + def test_torch_l1filter_pruner(self): """ Filters with the minimum sum of the weights' L1 norm are pruned in this paper: