kubeflow · k8s-ci-robot · May 9, 2019 · Mar 12, 2019 · Apr 24, 2019 · May 1, 2019
diff --git a/examples/NAS-Envelopenet-trainingcontainer/Dockerfile b/examples/NAS-Envelopenet-trainingcontainer/Dockerfile
@@ -0,0 +1,32 @@
+ARG cuda_version=9.0
+ARG cudnn_version=7
+FROM nvidia/cuda:${cuda_version}-cudnn${cudnn_version}-devel
+
+# Install system packages
+RUN apt-get update && apt-get install -y software-properties-common && \
+      add-apt-repository ppa:deadsnakes/ppa && \
+      apt-get update && \
+      apt-get install -y --no-install-recommends \
+      bzip2 \
+      g++ \
+      git \
+      graphviz \
+      libgl1-mesa-glx \
+      libhdf5-dev \
+      openmpi-bin \
+      python3.5 \
+      python3-pip \
+      python3-setuptools \
+      python3-dev \
+      wget && \
+    rm -rf /var/lib/apt/lists/*
+
+
+ADD . /app
+WORKDIR /app
+
+RUN pip3 install --upgrade pip
+RUN pip3 install --no-cache-dir -r requirements.txt
+ENV PYTHONPATH /app
+
+ENTRYPOINT ["python3.5", "-u", "run_trial.py"]
diff --git a/examples/NAS-Envelopenet-trainingcontainer/README.md b/examples/NAS-Envelopenet-trainingcontainer/README.md
@@ -0,0 +1,18 @@
+# About the training container
+
+The algorithm follows the idea proposed in *Fast Neural Architecture Construction using EnvelopeNets* by Kamath et al.(https://arxiv.org/pdf/1803.06744.pdf). It is not a Reinforcement Learning or evolution based NAS,
+rather a method to construct deep network
+
+# How this code works
+
+Firstly the yaml file is parsed using Operation.py and suggestion_param.py. Then in nasenvelopenet_service.py suggestion, calls nac_gen.py to generate initial architecture. Then it passes this to run_trial.py.
+run_trial.py is entrypoint. This is called from the suggestion. It invokes Model Constructor which constructs the model. There is a parameter in the algorithm which is max_iterations, is used as a maximum number of restructuring
+iterations of the model. When this is reached, it evaluates the model. 
+Based on this, suggestion calls generate_arch.py to improve the architecture from the metrics collected, and this loop runs till max_iterations.
+
+Model Constructor uses net.py various methods to build the model, which itself uses cell_classification.py, cell_init.py and cell_main.py as a definition of the initial cell, the envelopecell and the classification cell used
+to build the model. cifar10_input.py is used for various methods needed for the CIFAR-10 dataset. Evaluate.py has various methods for testing.
+
+# How to run this code
+
+I have attached a testing code test.py which I used to parse the yaml file and run this locally. But there have been changes in the code after I tested it on Katib. So you might need to change something.
diff --git a/examples/NAS-Envelopenet-trainingcontainer/cell.py b/examples/NAS-Envelopenet-trainingcontainer/cell.py
@@ -0,0 +1,10 @@
+"""Base cell."""
+
+import tensorflow as tf
+class Cell:
+    """Base cell: All cells derived from this class."""
+    def __init__(self):
+        pass
+    def get_params(self):
+        """Get tf params"""
+        print([tensor.name for tensor in tf.get_default_graph().as_graph_def().node])
diff --git a/examples/NAS-Envelopenet-trainingcontainer/cell_classification.py b/examples/NAS-Envelopenet-trainingcontainer/cell_classification.py
@@ -0,0 +1,52 @@
+"""Classification cell"""
+
+import tensorflow as tf
+from cell import Cell
+
+slim = tf.contrib.slim
+
+def trunc_normal(stddev):
+    return tf.truncated_normal_initializer(0.0, stddev)
+
+class Classification(Cell):
+    """Classification cell: The final classification block of a CNN"""
+    def __init__(self):
+        self.cellname = "Classification"
+        Cell.__init__(self)
+
+    def cell(self, inputs, arch, is_training):
+        """Create the cell by instantiating the cell blocks"""
+        nscope = 'Cell_{}'.format(self.cellname)
+        net = inputs
+        reuse = None
+        with tf.variable_scope(nscope, 'classification_block', [inputs], reuse=reuse) as scope:
+            for layer in sorted(arch.keys()):
+                for branch in sorted(arch[layer].keys()):
+                    block = arch[layer][branch]
+                    if block["block"] == "reduce_mean":
+                        net = tf.reduce_mean(net, [1, 2])
+                    elif block["block"] == "flatten":
+                        net = slim.flatten(net)
+                    elif block["block"] == "fc":
+                        outputs = block["outputs"]
+                        net = slim.fully_connected(net, outputs)
+                    elif block["block"] == "fc-final":
+                        outputs = block["outputs"]
+                        inputs = block["inputs"]
+                        weights_initializer = trunc_normal(1 / float(inputs))
+                        biases_initializer = tf.zeros_initializer()
+                        net = slim.fully_connected(
+                            net,
+                            outputs,
+                            biases_initializer=biases_initializer,
+                            weights_initializer=weights_initializer,
+                            weights_regularizer=None,
+                            activation_fn=None)
+                    elif block["block"] == "dropout":
+                        keep_prob = block["keep_prob"]
+                        net = slim.dropout(
+                            net, keep_prob=keep_prob, is_training=is_training)
+                    else:
+                        print("Invalid block")
+                        exit(-1)
+        return net
diff --git a/examples/NAS-Envelopenet-trainingcontainer/cell_init.py b/examples/NAS-Envelopenet-trainingcontainer/cell_init.py
@@ -0,0 +1,76 @@
+"""Initialization (Stem) cell"""
+import tensorflow as tf
+from cell import Cell
+
+slim = tf.contrib.slim
+
+def trunc_normal(stddev):
+    return tf.truncated_normal_initializer(0.0, stddev)
+
+class Init(Cell):
+    """Initialization (Stem) cell: The first cell of a CNN"""
+    def __init__(self, cellidx):
+        self.cellidx = cellidx
+        self.cellname = "Init"
+        Cell.__init__(self)
+
+    def cell(self, inputs, arch, is_training):
+        """Create the cell by instantiating the cell blocks"""
+        nscope = "Cell_{}_{}".format(self.cellname,self.cellidx)
+        reuse = None
+        with tf.variable_scope(nscope, 'initial_block', [inputs], reuse=reuse) as scope:
+            with slim.arg_scope([slim.conv2d, slim.max_pool2d], stride=1, padding='SAME'):
+                net = inputs
+                for layer in sorted(arch.keys()):
+                    cells = []
+                    for branch in sorted(arch[layer].keys()):
+                        block = arch[layer][branch]
+                        if block["block"] == "conv2d":
+                            output_filters = int(block["outputs"])
+                            kernel_size = block["kernel_size"]
+                            if "stride" not in block.keys():
+                                stride = 1
+                            else:
+                                stride = block["stride"]
+                            cell = slim.conv2d(
+                                net,
+                                output_filters,
+                                kernel_size,
+                                stride=stride,
+                                padding='SAME') 
+                        elif block["block"] == "max_pool":
+                            kernel_size = block["kernel_size"]
+                            cell = slim.max_pool2d(
+                                net, kernel_size, padding='SAME', stride=2)
+                        elif block["block"] == "lrn":
+                            cell = tf.nn.lrn(
+                                net, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
+                        elif block["block"] == "dropout":
+                            keep_prob = block["keep_prob"]
+                            cell = slim.dropout(net, keep_prob=keep_prob, is_training=is_training)
+                        elif block["block"] == "cutout":
+                            if not is_training:
+                              cell = net
+                            else:
+                              cutout_size = block["size"]
+                              img_dim = int(net.shape[1])
+                              batch_size = int(net.shape[0])
+                              channels = int(net.shape[3])
+                              """ Puts white rectange on a RGB image with random x,y coordinates """
+                              mask = tf.ones([cutout_size, cutout_size], dtype=tf.int32)
+                              start = tf.random_uniform([2], minval=0, maxval=img_dim, dtype=tf.int32)
+                              mask = tf.pad(mask, [[cutout_size + start[0], img_dim - start[0]],
+                                                   [cutout_size + start[1], img_dim - start[1]]])
+                              mask = mask[cutout_size: cutout_size + img_dim,
+                                          cutout_size: cutout_size + img_dim]
+                              mask = tf.reshape(mask, [img_dim, img_dim, 1])
+                              mask = tf.expand_dims(mask, axis=0)
+                              mask = tf.tile(mask, [batch_size, 1, 1, channels])
+                              cell = tf.where(tf.equal(mask, 0), x=net, y=tf.zeros_like(net))
+                        else:
+                            print("Invalid block")
+                            exit(-1)
+                        cells.append(cell)
+                    net = tf.concat(cells, axis=-1)
+
+        return net
diff --git a/examples/NAS-Envelopenet-trainingcontainer/cell_main.py b/examples/NAS-Envelopenet-trainingcontainer/cell_main.py
@@ -0,0 +1,115 @@
+""" Envelope Cell"""
+import tensorflow as tf
+from cell import Cell
+
+slim = tf.contrib.slim
+
+class CellEnvelope(Cell):
+    """ Defintion of an envelope cell"""
+    def __init__(
+            self,
+            cellidx,
+            channelwidth,
+            net,
+            filters,
+            log_stats,
+            outputs):
+        self.cellidx = cellidx
+        self.log_stats = log_stats
+        self.cellname = "Envelope"
+        self.numbins = 100
+        self.batchsize = int(net.shape[0])
+        self.output_per_filter = outputs
+        img_dims = int(net.shape[1])
+        self.imagesize = [img_dims, img_dims]
+        Cell.__init__(self)
+        scope = 'Cell{}'.format(self.cellidx)
+        if self.log_stats:
+            with tf.variable_scope(scope, reuse=False):
+                for branch in filters:
+                    with tf.variable_scope(branch, reuse=False):
+                        self.init_stats()
+
+    def cell(self, inputs, channelwidth, is_training=True, filters=None):
+        """
+        Args:
+          inputs: a tensor of size [batch_size, height, width, channels].
+          By default use stride=1 and SAME padding
+        """
+        dropout_keep_prob = 0.8
+        nscope = 'Cell_{}_{}'.format(self.cellname,self.cellidx)
+
+        scope = 'Cell{}'.format(self.cellidx)
+        nets = []
+        with tf.variable_scope(scope):
+            for branch in sorted(filters):
+                with tf.variable_scope(branch):
+                    conv_h, conv_w = branch[0], branch[0]
+                    outchannels = self.output_per_filter
+                    if branch.endswith("sep"):
+                        net = slim.separable_conv2d(
+                            inputs, outchannels, [
+                                conv_h, conv_w], 1, normalizer_fn=slim.batch_norm)
+                    else:
+                        net = slim.conv2d(
+                            inputs, outchannels, [
+                                conv_h, conv_w], normalizer_fn=slim.batch_norm)
+                if self.log_stats:
+                    msss = self.calc_stats(net, branch)
+                    net = tf.Print(
+                        net,
+                        [msss],
+                        message="MeanSSS=:{}/{}:".format(scope, branch))
+                net = slim.dropout(
+                    net,
+                    keep_prob=dropout_keep_prob,
+                    scope='dropout',
+                    is_training=is_training)
+                nets.append(net)
+            net = tf.concat(axis=3, values=nets)
+        return net
+
+    def init_stats(self):
+        size = [
+            self.batchsize,
+            self.imagesize[0],
+            self.imagesize[1],
+            self.output_per_filter]
+        sumsquaredsamples = tf.contrib.framework.model_variable(
+            "sumsquaredsamples", size, initializer=tf.zeros_initializer)
+        sumsamples = tf.contrib.framework.model_variable(
+            "sumsamples", size, initializer=tf.zeros_initializer)
+        samplecount = tf.contrib.framework.model_variable(
+            "samplecount", [1], initializer=tf.zeros_initializer)
+
+    def calc_stats(self, inputs, scope):
+        with tf.variable_scope(scope, reuse=True):
+            size = [
+                self.batchsize,
+                self.imagesize[0],
+                self.imagesize[1],
+                self.output_per_filter]
+            sumsquaredsamples = tf.get_variable("sumsquaredsamples", size)
+            sumsamples = tf.get_variable("sumsamples", size)
+
+            samplecount = tf.get_variable("samplecount", [1])
+            tsamplecount = tf.add(samplecount, tf.to_float(tf.constant(1)))
+            samplecount = samplecount.assign(tsamplecount)
+
+            """ input is N*H*W*C. We need to calcualte running variance over 
+            time (i.e over the N Images in this batch and in all batches.
+             Hence need to reduce across the N dimension """
+            sum_across_batch = tf.reduce_sum(inputs, axis=0)
+            tsumsamples = tf.add(sumsamples, sum_across_batch)
+            sumsamples = sumsamples.assign(tsumsamples)
+            squared_inputs = tf.square(inputs)
+            squared_sum_across_batch = tf.reduce_sum(squared_inputs, axis=0)
+            tsumsquaredsamples = tf.add(
+                sumsquaredsamples, squared_sum_across_batch)
+            sumsquaredsamples = sumsquaredsamples.assign(tsumsquaredsamples)
+
+            msss = (1 / samplecount) * (sumsquaredsamples)
+            msss = tf.reduce_mean(msss)
+
+            return msss
+