Various fixes and calibrations

cytomining · Aug 19, 2020 · da0a993 · da0a993
1 parent f323a1e
commit da0a993
Show file tree

Hide file tree

Showing 6 changed files with 69 additions and 69 deletions.
diff --git a/deepprofiler/dataset/sampling.py b/deepprofiler/dataset/sampling.py
@@ -25,6 +25,7 @@ def process_batch(self, batch):
         for i in range(len(batch["keys"])):
             batch["locations"][i]["Key"] = batch["keys"][i]
             batch["locations"][i]["Target"] = batch["targets"][i][0]
+            batch["locations"][i]["Class_Name"] = self.dset.targets[0].values[batch["targets"][i][0]]
         metadata = pd.concat(batch["locations"])
         cols = ["Key","Target","Nuclei_Location_Center_X","Nuclei_Location_Center_Y"]
         seps = ["+","@","x",".png"]

diff --git a/deepprofiler/dataset/target.py b/deepprofiler/dataset/target.py
@@ -4,9 +4,10 @@ class MetadataColumnTarget(object):
     def __init__(self, field_name, values):
         self.field_name = field_name
         self.index = {}
-        values.sort()
-        for i in range(len(values)):
-            self.index[values[i]] = i
+        self.values = values
+        self.values.sort()
+        for i in range(len(self.values)):
+            self.index[self.values[i]] = i
         print(self.index)
 
     def get_values(self, record):

diff --git a/deepprofiler/imaging/cropping.py b/deepprofiler/imaging/cropping.py
@@ -21,8 +21,6 @@ def crop_graph(image_ph, boxes_ph, box_ind_ph, mask_ind_ph, box_size, mask_boxes
             mask_values = tf.ones_like(crops[:,:,:,-1], dtype=tf.float32) * tf.cast(mask_ind, dtype=tf.float32)
             masks = tf.to_float( tf.equal(crops[:,:,:,-1], mask_values) )
             crops = crops[:,:,:,0:-1] * tf.expand_dims(masks, -1)
-        #max_intensities = tf.reduce_max( tf.reduce_max( crops, axis=1, keepdims=True), axis=2, keepdims=True)
-        #crops = crops / (max_intensities + 1e-6)
         mean = tf.math.reduce_mean(crops, axis=[1,2], keepdims=True)
         std = tf.math.reduce_std(crops, axis=[1,2], keepdims=True)
 
@@ -40,6 +38,7 @@ def unfold_channels(crop, mode=0):
     ).astype(np.uint8)
     return unfolded
 
+
 def fold_channels(crop):
     # Expected input image shape: (h, w * c), with h = w
     # Output image shape: (h, w, c), with h = w
@@ -54,10 +53,9 @@ def fold_channels(crop):
 # TODO: implement abstract crop generator
 class CropGenerator(object):
 
-    def __init__(self, config, dset): #TODO: add mode="train"
+    def __init__(self, config, dset): 
         self.config = config
         self.dset = dset
-        #TODO: add self.mode = mode
 
     #################################################
     ## INPUT GRAPH DEFINITION
@@ -83,7 +81,7 @@ def build_input_graph(self):
         imgs_shape = [None, img_height, img_width, img_channels]
         batch_shape = (-1, img_height, img_width, img_channels)
 
-        # Inputs to the load data cache
+        # Inputs to cropping graph
         image_ph = tf.placeholder(tf.float32, shape=imgs_shape, name="raw_images")
         boxes_ph = tf.placeholder(tf.float32, shape=[None, 4], name="cell_boxes")
         box_ind_ph = tf.placeholder(tf.int32, shape=[None], name="box_indicators")
@@ -118,6 +116,8 @@ def build_input_graph(self):
                 "batch": batch_shape
             },
         }
+
+        # Training variables
         self.train_variables = {
                 "image_batch": self.input_variables["labeled_crops"][0],
                 "target_0": tf.one_hot(
@@ -127,30 +127,6 @@ def build_input_graph(self):
         }
 
 
-    #################################################
-    ## AUGMENTATION GRAPH DEFINITION: Deprecated?
-    #################################################
-
-    def build_augmentation_graph(self):
-        num_targets = len(self.dset.targets)
-
-        # Outputs and cache of the data augmentation graph
-        augmented_op = deepprofiler.imaging.augmentations.augment_multiple(
-            tf.cast(self.input_variables["labeled_crops"][0], tf.float32),
-            self.config["train"]["model"]["params"]["batch_size"]
-        )
-        train_inputs = tf.tuple([augmented_op] + self.input_variables["labeled_crops"][1:]) 
-
-        self.train_variables = {
-            "image_batch":train_inputs[0],
-        }
-
-        for i in range(num_targets):
-            tname = "target_" + str(i)
-            tgt = self.dset.targets[i]
-            self.train_variables[tname] = tf.one_hot(train_inputs[i+1], tgt.shape[1])
-
-
     #################################################
     ## START TRAINING QUEUES
     #################################################
@@ -170,9 +146,6 @@ def data_loading_thread():
                     if len(batch["images"]) == 0: continue
                     images = np.reshape(batch["images"], self.input_variables["shapes"]["batch"])
                     boxes, box_ind, targets, masks = deepprofiler.imaging.boxes.prepare_boxes(batch, self.config)
-                    # Pre-crop augmentation: random zoom
-                    #zoom = np.random.uniform(low=0.85, high=1.15, size=(boxes.shape[0],1))
-                    #boxes = boxes * zoom
 
                     feed_dict = {
                             self.input_variables["image_ph"]:images,
@@ -184,11 +157,9 @@ def data_loading_thread():
                         tname = "target_" + str(i)
                         feed_dict[self.input_variables["targets_phs"][tname]] = targets[i]
 
-                    # The cache reuses augmented examples :(
-                    # How can we do augmentation on the GPU on the fly?
                     output = sess.run(self.train_variables, feed_dict)
 
-                    # Find block of the pool to store data
+                    # Find block in the pool to store data
                     lock.acquire()
                     first = self.pool_pointer
                     records = output["image_batch"].shape[0]
@@ -235,10 +206,11 @@ def start(self, session):
         # Define input data batches
         with tf.variable_scope("train_inputs"):
             self.build_input_graph()
-            #self.build_augmentation_graph()
             targets = [self.train_variables[t] for t in self.train_variables.keys() if t.startswith("target_")]
 
-            self.image_pool = np.zeros([self.config["train"]["sampling"]["cache_size"]] + list(self.input_variables["shapes"]["crops"][0]))
+            self.image_pool = np.zeros(
+                    [self.config["train"]["sampling"]["cache_size"]] + list(self.input_variables["shapes"]["crops"][0])
+            )
             self.label_pool = [np.zeros([self.config["train"]["sampling"]["cache_size"], t.shape[1]]) for t in targets]
             self.pool_pointer = 0
             self.ready_to_sample = False
@@ -255,7 +227,7 @@ def start(self, session):
     def sample_batch(self, pool_index):
         while not self.ready_to_sample:
             time.sleep(2)
-        np.random.shuffle(pool_index) #TODO
+        np.random.shuffle(pool_index) 
         idx = pool_index[0:self.config["train"]["model"]["params"]["batch_size"]]
         # TODO: make outputs for all targets
         data = [self.image_pool[idx,...], self.label_pool[0][idx,:], 0]
@@ -269,12 +241,8 @@ def generate(self, sess, global_step=0):
                 break
             data = self.sample_batch(pool_index)
             # Indices of data => [0] images, [1:-1] targets, [-1] summary
-            #ms = data[-1]
 
             global_step += 1
-            #if global_step % 10 == 0:
-            #    self.summary_writer.add_summary(ms, global_step)
-
             yield (data[0], data[1:-1])
 
     def stop(self, session):

diff --git a/deepprofiler/learning/validation.py b/deepprofiler/learning/validation.py
@@ -32,5 +32,5 @@ def load_validation_data(config, dset, crop_generator, session):
 
     validation = Validation(config, dset, crop_generator, session)
     dset.scan(validation.process_batches, frame="val")
-    print("Validation data loaded")
+    print("Validation data loaded ")
     return np.concatenate(validation.batch_inputs), np.concatenate(validation.batch_outputs)
diff --git a/plugins/crop_generators/sampled_crop_generator.py b/plugins/crop_generators/sampled_crop_generator.py
@@ -1,6 +1,7 @@
 import os
 import numpy as np
 import pandas as pd
+import skimage.io
 import tensorflow as tf
 
 import deepprofiler.imaging.cropping
@@ -16,45 +17,66 @@ class GeneratorClass(deepprofiler.imaging.cropping.CropGenerator):
 
     def __init__(self, config, dset):
         super(GeneratorClass, self).__init__(config, dset)
-        self.datagen = tf.keras.preprocessing.image.ImageDataGenerator()
+        #self.datagen = tf.keras.preprocessing.image.ImageDataGenerator()
         self.directory = config["paths"]["single_cell_sample"]
         self.num_channels = len(config["dataset"]["images"]["channels"])
-        self.box_size = config["dataset"]["locations"]["box_size"]
-
+        self.box_size = self.config["dataset"]["locations"]["box_size"]
+        self.batch_size = self.config["train"]["model"]["params"]["batch_size"]
+
+
     def start(self, session):
-        samples = pd.read_csv(os.path.join(self.directory, "sc-metadata.csv"))
-        self.num_classes = len(samples["Target"].unique())
+        self.samples = pd.read_csv(os.path.join(self.directory, "sc-metadata.csv"))
+        self.samples = self.samples.sample(frac=1.0).reset_index(drop=True)
+        self.num_classes = len(self.samples["Target"].unique())
+        '''
         self.generator = self.datagen.flow_from_dataframe(
                 dataframe=samples, 
                 x_col="Image_Name",
-                y_col="Target",
-                class_mode="raw",
+                y_col="Class_Name",
+                class_mode="categorical",
                 directory=self.directory,
                 color_mode="grayscale",
                 target_size=(self.box_size, self.box_size * self.num_channels),
                 batch_size=self.config["train"]["model"]["params"]["batch_size"]
         )
+        '''
 
     def generate(self, sess, global_step=0):
+        pointer = 0
+        while True:
+            #try:
+                x = np.zeros([self.batch_size, self.box_size, self.box_size, self.num_channels])
+                y = []
+                for i in range(self.batch_size):
+                    if pointer >= len(self.samples):
+                        self.samples = self.samples.sample(frac=1.0).reset_index(drop=True)
+                        pointer = 0
+                    filename = os.path.join(self.directory, self.samples.loc[pointer, "Image_Name"])
+                    im = skimage.io.imread(filename).astype(np.float32)
+                    x[i,:,:,:] = deepprofiler.imaging.cropping.fold_channels(im)
+                    y.append(self.samples.loc[pointer, "Target"])
+                    pointer += 1
+                yield(x, tf.keras.utils.to_categorical(y, num_classes=self.num_classes))
+            #except:
+            #   break
+
+
+    def generate_old(self, sess, global_step=0):
         while True:
             try:
                 x_, y = next(self.generator)
                 x = np.zeros([x_.shape[0], self.box_size, self.box_size, self.num_channels])
                 for i in range(x_.shape[0]):
                     x[i,:,:,:] = deepprofiler.imaging.cropping.fold_channels(x_[i])
-                yield (x, tf.keras.utils.to_categorical(y, num_classes=self.num_classes))
+                yield (x, y) #tf.keras.utils.to_categorical(y, num_classes=self.num_classes))
             except:
                 break
 
+
     def stop(self, session):
         session.close()
         return
 
-## TODO: Next steps:
-## 1. Fix the session closing error at the end (not a big deal for now, but better to fix it)
-## 2. DONE => Integrate augmentations (this is important)
-## 3. Reconsider the cache usage statistics and steps per epoch in ImageDataset
-
 ## Reusing the Single Image Crop Generator. No changes needed
 
 SingleImageGeneratorClass = deepprofiler.imaging.cropping.SingleImageCropGenerator
diff --git a/plugins/models/resnet.py b/plugins/models/resnet.py
@@ -53,8 +53,7 @@ def define_model(self, config, dset):
         input_shape = (
             config["dataset"]["locations"]["box_size"],  # height
             config["dataset"]["locations"]["box_size"],  # width
-            len(config["dataset"]["images"][
-                "channels"])  # channels
+            len(config["dataset"]["images"]["channels"]) # channels
         )
         input_image = keras.layers.Input(input_shape)
         model = self.get_model(config, input_image=input_image)
@@ -80,13 +79,19 @@ def define_model(self, config, dset):
         for layer in model.layers:
             if hasattr(layer, "kernel_regularizer"):
                 setattr(layer, "kernel_regularizer", regularizer)
-        model = keras.models.model_from_json(model.to_json(), {'AugmentationLayer': AugmentationLayer})
-        optimizer = keras.optimizers.SGD(lr=config["train"]["model"]["params"]["learning_rate"], momentum=0.9, nesterov=True)
+        model = keras.models.model_from_json(
+                model.to_json(), 
+                {'AugmentationLayer': AugmentationLayer}
+        )
+        optimizer = keras.optimizers.SGD(
+                lr=config["train"]["model"]["params"]["learning_rate"], 
+                momentum=0.9, 
+                nesterov=True
+        )
 
         return model, optimizer, loss_func
 
 
-
     ## Support for ImageNet initialization
     def copy_pretrained_weights(self):
         base_model = self.get_model(self.config, weights="imagenet")
@@ -104,13 +109,16 @@ def copy_pretrained_weights(self):
         available_channels = weights[0].shape[2]
         target_shape = self.feature_model.layers[2 + lshift].weights[0].shape
         new_weights = numpy.zeros(target_shape)
+
         for i in range(new_weights.shape[2]):
-            j = i%available_channels
+            j = i % available_channels
             new_weights[:,:,i,:] = weights[0][:,:,j,:]
-            weights_array = [new_weights]
-            if len(weights) > 1: 
-                weights_array += weights[1:]
-            self.feature_model.layers[2 + lshift].set_weights(weights_array)
+
+        weights_array = [new_weights]
+        if len(weights) > 1: 
+            weights_array += weights[1:]
+
+        self.feature_model.layers[2 + lshift].set_weights(weights_array)
         print("Network initialized with pretrained ImageNet weights")