microsoft · chicm-ms · Jun 24, 2020 · May 14, 2020 · May 14, 2020 · May 14, 2020
diff --git a/docs/en_US/Compressor/CompressionReference.md b/docs/en_US/Compressor/CompressionReference.md
@@ -18,6 +18,16 @@
 ..  autoclass:: nni.compression.torch.utils.shape_dependency.ChannelDependency
     :members:
 
-..  autoclass:: nni.compression.torch.utils.mask_conflict.MaskConflict
+..  autoclass:: nni.compression.torch.utils.shape_dependency.GroupDependency
     :members:
+
+..  autoclass:: nni.compression.torch.utils.mask_conflict.CatMaskPadding
+    :members:
+
+..  autoclass:: nni.compression.torch.utils.mask_conflict.GroupMaskConflict
+    :members:
+
+..  autoclass:: nni.compression.torch.utils.mask_conflict.ChannelMaskConflict
+    :members:
+
 ```
diff --git a/docs/en_US/Compressor/CompressionUtils.md b/docs/en_US/Compressor/CompressionUtils.md
@@ -116,8 +116,6 @@ Set 12,layer4.1.conv1
 When the masks of different layers in a model have conflict (for example, assigning different sparsities for the layers that have channel dependency), we can fix the mask conflict by MaskConflict. Specifically, the MaskConflict loads the masks exported by the pruners(L1FilterPruner, etc), and check if there is mask conflict, if so, MaskConflict sets the conflicting masks to the same value.
 
 ```
-from nni.compression.torch.utils.mask_conflict import MaskConflict
-mc = MaskConflict('./resnet18_mask', net, data)
-mc.fix_mask_conflict()
-mc.export('./resnet18_fixed_mask')
+from nni.compression.torch.utils.mask_conflict import fix_mask_conflict
+fixed_mask = fix_mask_conflict('./resnet18_mask', net, data)
 ```
diff --git a/src/sdk/pynni/nni/_graph_utils.py b/src/sdk/pynni/nni/_graph_utils.py
@@ -10,6 +10,7 @@
 from torch.utils.tensorboard._pytorch_graph import NodePy, NodePyIO, NodePyOP, GraphPy
 CLASSTYPE_KIND = 'ClassType'
 GETATTR_KIND = 'prim::GetAttr'
+CAT_KIND = 'aten::cat'
 
 _logger = logging.getLogger(__name__)
 
@@ -236,6 +237,7 @@ def __init__(self, model=None, dummy_input=None, traced_model=None):
         super().__init__(model, dummy_input, traced_model)
         self.global_count = 0
         self.name_to_node, self.input_to_node, self.output_to_node = self._build_graph()
+        self._extract_auxiliary_info()
 
     def _expand_non_prim_node(self, node, nodes, input_to_node, output_to_node,
                               module_type):
@@ -364,6 +366,58 @@ def _expand_module_node(self, node, node_name, unique_name, op_type, nodes,
                              node_group, inputs=inputs, outputs=outputs)
         return nodepy
 
+    def _extract_cat_info(self, node_group, cpp_node):
+        """
+        Extract the detail information of the cat operation,
+        such the order of the input tensor, the shape of each
+        input tensor, the output shape, and the cat dimension.
+
+        Parameters
+        ----------
+        node_group : NodePyGroup
+        cpp_node: torch._C.Node
+            It should be ```aten::cat``` node
+
+        Returns
+        -------
+        dict
+            Include auxiliary information for the cat operation.
+            This dict objec has four keys: 'cat_dim', 'out_shape',
+            'in_order' and 'in_shape'. cat_dim is the dimension of
+            the cat operation to concat the input tensors. out_shape
+            is the shape of the output tensor of the cat operation.
+            in_order is an ordered list which contains the corresponding
+            parent operaion nodes of the input tensors. in_shape is also
+            an ordered list that contains the input shapes of the input
+            tensor.
+        """
+        # only suport the cat operation
+        assert cpp_node.kind() == CAT_KIND
+        cat_info = {}
+        # get the shape of the output tensor
+        t_output = cpp_node.output()
+        out_shape = t_output.type().sizes()
+        cat_info['out_shape'] = out_shape
+        # get the cat dimension
+        inputs = cpp_node.inputs()
+        cat_dim = list(inputs)[1].toIValue()
+        cat_info['cat_dim'] = cat_dim
+        # get the order of the input tensors
+        # To get the order of the input tensors, we need
+        # to be aware of the topology of the model, which
+        # means we should extract the auxiliary information
+        # after the build_index function.
+        input_order = []
+        list_construct_cpp = list(cpp_node.inputs())[0].node()
+        input_tensors = list(list_construct_cpp.inputs())
+        for _tensor in input_tensors:
+            debug_name = _tensor.debugName()
+            input_order.append(self.output_to_node[debug_name].unique_name)
+        cat_info['in_order'] = input_order
+        input_shapes = [t.type().sizes() for t in input_tensors]
+        cat_info['in_shape'] = input_shapes
+        return cat_info
+
     def _extract_shape_info(self, node):
         """
         Extract the shape information of ```aten::view``` node
@@ -541,8 +595,8 @@ def _build_graph(self):
                     node, nodes, input_to_node, output_to_node, 'func')
                 nodes_py.nodes_op.append(node_group)
                 # get shape infor for view (aten::view) func
-                if node_group.op_type in ['aten::view', 'aten::flatten']:
-                    node_group.auxiliary = self._extract_shape_info(node)
+                # if node_group.op_type in ['aten::view', 'aten::flatten']:
+                #     node_group.auxiliary = self._extract_shape_info(node)
 
         for node in graph.outputs():  # Create sink nodes for output ops
             node_py = NodePyIO(node, 'output')
@@ -552,6 +606,26 @@ def _build_graph(self):
         # build index
         return self._build_index(self.nodes_py.nodes_op)
 
+    def _extract_auxiliary_info(self):
+        """
+        Extract the auxiliary information for the nodegroups
+        if necessary. For example, view/flatten operations may
+        need the shape of the input tensor and output tensor.
+        """
+        # extract the input & output shape for the view and flatten
+        for node_group in self.nodes_py.nodes_op:
+            if node_group.op_type in ['aten::view', 'aten::flatten', 'aten::mean', 'aten::reshape']:
+                # get shape infor for view (aten::view) func
+                cpp_node = list(filter(lambda x: x.kind() == node_group.op_type,
+                                       node_group.node_cpps))[0]
+                node_group.auxiliary = self._extract_shape_info(cpp_node)
+            elif node_group.op_type == CAT_KIND:
+                # get the detail information for cat func
+                cpp_node = list(filter(lambda x: x.kind() == node_group.op_type,
+                                       node_group.node_cpps))[0]
+                node_group.auxiliary = self._extract_cat_info(
+                    node_group, cpp_node)
+
     def find_predecessors(self, unique_name):
         """
         Find predecessor node of the given node

diff --git a/src/sdk/pynni/nni/compression/torch/speedup/compress_modules.py b/src/sdk/pynni/nni/compression/torch/speedup/compress_modules.py
@@ -14,7 +14,11 @@
     'AvgPool2d': lambda module, mask: no_replace(module, mask),
     'AdaptiveAvgPool2d': lambda module, mask: no_replace(module, mask),
     'ReLU': lambda module, mask: no_replace(module, mask),
-    'Linear': lambda module, mask: replace_linear(module, mask)
+    'ReLU6': lambda module, mask: no_replace(module, mask),
+    'Linear': lambda module, mask: replace_linear(module, mask),
+    'Dropout': lambda module, mask: no_replace(module, mask),
+    'Dropout2d': lambda module, mask: no_replace(module, mask),
+    'Dropout3d': lambda module, mask: no_replace(module, mask)
 }
 
 def no_replace(module, mask):
@@ -111,28 +115,48 @@ def replace_conv2d(conv, mask):
     else:
         out_channels_index = mask.output_mask.mask_index[1]
         out_channels = out_channels_index.size()[0]
+
     _logger.debug("replace conv2d with in_channels: %d, out_channels: %d", in_channels, out_channels)
     new_conv = torch.nn.Conv2d(in_channels=in_channels,
                                out_channels=out_channels,
                                kernel_size=conv.kernel_size,
                                stride=conv.stride,
                                padding=conv.padding,
                                dilation=conv.dilation,
-                               groups=1, # currently only support groups is 1
+                               groups=conv.groups,
                                bias=conv.bias is not None,
                                padding_mode=conv.padding_mode)
+
     new_conv.to(conv.weight.device)
     tmp_weight_data = tmp_bias_data = None
+
     if mask.output_mask is not None:
         tmp_weight_data = torch.index_select(conv.weight.data, 0, out_channels_index)
         if conv.bias is not None:
             tmp_bias_data = torch.index_select(conv.bias.data, 0, out_channels_index)
-    # NOTE: does not support group
+    else:
+        tmp_weight_data = conv.weight.data
+    # For the convolutional layers that have more than one group
+    # we need to copy the weight group by group, because the input
+    # channal is also divided into serveral groups and each group
+    # filter may have different input channel indexes.
+    input_step = int(conv.in_channels / conv.groups)
+    filter_step = int(out_channels / conv.groups)
     if mask.input_mask is not None:
-        tmp_weight_data = torch.index_select(conv.weight.data if tmp_weight_data is None else tmp_weight_data,
-                                             1, in_channels_index)
-    assert tmp_weight_data is not None, "Conv2d weight should be updated based on masks"
-    new_conv.weight.data.copy_(tmp_weight_data)
+        for groupid in range(conv.groups):
+            start = groupid * input_step
+            end = (groupid + 1) * input_step
+            current_input_index = list(filter(lambda x: start <= x and x < end, in_channels_index.tolist()))
+            # shift the global index into the group index
+            current_input_index = [x-start for x in current_input_index]
+            current_input_index = torch.tensor(current_input_index).to(tmp_weight_data.device) # pylint: disable=not-callable
+            f_start = groupid * filter_step
+            f_end = (groupid + 1) * filter_step
+            new_conv.weight.data[f_start:f_end] = torch.index_select(tmp_weight_data[f_start:f_end], 1, current_input_index)
+    else:
+        new_conv.weight.data.copy_(tmp_weight_data)
+
     if conv.bias is not None:
         new_conv.bias.data.copy_(conv.bias.data if tmp_bias_data is None else tmp_bias_data)
+
     return new_conv
diff --git a/src/sdk/pynni/nni/compression/torch/speedup/compressor.py b/src/sdk/pynni/nni/compression/torch/speedup/compressor.py
@@ -4,6 +4,7 @@
 import logging
 import torch
 from nni._graph_utils import build_module_graph
+from nni.compression.torch.utils.mask_conflict import fix_mask_conflict
 from .compress_modules import replace_module
 from .infer_shape import ModuleMasks, infer_from_mask, infer_from_inshape, infer_from_outshape
 
@@ -53,9 +54,10 @@ def __init__(self, model, dummy_input, masks_file, map_location=None):
         self.bound_model = model
         self.masks = torch.load(masks_file, map_location)
         self.inferred_masks = dict() # key: module_name, value: ModuleMasks
+        self.dummy_input = dummy_input
         self.torch_graph = build_module_graph(model, dummy_input)
 
-    def infer_module_mask(self, module_name, mask=None, in_shape=None, out_shape=None):
+    def infer_module_mask(self, module_name, last_module, mask=None, in_shape=None, out_shape=None):
         """
         Infer input shape / output shape based on the module's weight mask / input shape / output shape.
 
@@ -71,6 +73,8 @@ def infer_module_mask(self, module_name, mask=None, in_shape=None, out_shape=Non
         ----------
         module_name : str
             The name of the node
+        last_module : str
+            The name of last visited node
         mask : tensor of mask or ModuleMasks
             Mask of the weights in this node (i.e., module)
         in_shape : ModuleMasks
@@ -100,10 +104,17 @@ def infer_module_mask(self, module_name, mask=None, in_shape=None, out_shape=Non
                 raise RuntimeError(
                     "Has not supported infering output shape from input shape for module/function: `{}`, {}"
                     .format(m_type, module_name))
-            if m_type in ['aten::view', 'aten::flatten']:
+            if m_type in ['aten::view', 'aten::flatten', 'aten::mean', 'aten::reshape']:
                 output_cmask = infer_from_inshape[m_type](module_masks,
                                                           in_shape,
                                                           self.torch_graph.name_to_node[module_name].auxiliary)
+            elif m_type in ['aten::cat']:
+                # To calculate the mask for concat operation, the output shape
+                # , cat dimension, and the order of the input parameters.
+                output_cmask = infer_from_inshape[m_type](module_masks,
+                                                          in_shape,
+                                                          self.torch_graph.name_to_node[module_name].auxiliary,
+                                                          last_module)
             else:
                 output_cmask = infer_from_inshape[m_type](module_masks, in_shape)
         if out_shape is not None:
@@ -117,18 +128,19 @@ def infer_module_mask(self, module_name, mask=None, in_shape=None, out_shape=Non
         if input_cmask:
             predecessors = self.torch_graph.find_predecessors(module_name)
             for _module_name in predecessors:
-                self.infer_module_mask(_module_name, out_shape=input_cmask)
+                self.infer_module_mask(_module_name, module_name, out_shape=input_cmask)
         if output_cmask:
             successors = self.torch_graph.find_successors(module_name)
             for _module_name in successors:
-                self.infer_module_mask(_module_name, in_shape=output_cmask)
+                self.infer_module_mask(_module_name, module_name, in_shape=output_cmask)
 
     def infer_modules_masks(self):
         """
         Do shape inference of involved modules, including the shape of weights, inputs, output
         """
         for module_name, mask in self.masks.items():
-            self.infer_module_mask(module_name, mask=mask)
+            _logger.debug('Start mask inference from %s', module_name)
+            self.infer_module_mask(module_name, None, mask=mask)
 
     def replace_compressed_modules(self):
         """
@@ -144,19 +156,20 @@ def replace_compressed_modules(self):
             _logger.debug("replace %s, in %s type, with op_type %s",
                           module_name, g_node.type, g_node.op_type)
             if g_node.type == 'module':
-                super_module, leaf_module = get_module_by_name(self.bound_model, module_name)
+                super_module, leaf_module = get_module_by_name(self.bound_model, g_node.name)
                 m_type = g_node.op_type
                 if not m_type in replace_module:
                     raise RuntimeError("Has not supported replacing the module: `{}`".format(m_type))
-                _logger.info("replace module (name: %s, op_type: %s)", module_name, m_type)
+                _logger.info("replace module (name: %s, op_type: %s)", g_node.name, m_type)
                 compressed_module = replace_module[m_type](leaf_module, self.inferred_masks[module_name])
-                setattr(super_module, module_name.split('.')[-1], compressed_module)
+                setattr(super_module, g_node.name.split('.')[-1], compressed_module)
             elif g_node.type == 'func':
                 _logger.info("Warning: cannot replace (name: %s, op_type: %s) which is func type",
                              module_name, g_node.op_type)
             else:
                 raise RuntimeError("Unsupported node type: {}".format(g_node.type))
 
+
     def speedup_model(self):
         """
         There are basically two steps:
@@ -165,6 +178,8 @@ def speedup_model(self):
         """
         training = self.bound_model.training
         _logger.info("start to speed up the model")
+        _logger.info("fix the mask conflict of the interdependent layers")
+        fix_mask_conflict(self.masks, self.bound_model, self.dummy_input)
         _logger.info("infer module masks...")
         self.infer_modules_masks()
         _logger.info("replace compressed modules...")