Adding checkpoints and evaluation notebook

prachigarg23 · Jan 23, 2022 · f92fd99 · f92fd99
1 parent d2bd673
commit f92fd99
Show file tree

Hide file tree

Showing 8 changed files with 1,634 additions and 97 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/Evaluation_Notebook.ipynb b/Evaluation_Notebook.ipynb
diff --git a/Plot_Tsne_Notebook.ipynb b/Plot_Tsne_Notebook.ipynb
diff --git a/README.md b/README.md
@@ -3,6 +3,8 @@ This is the Pytorch implementation of our work "Multi-Domain Incremental Learnin
 
 Full paper: http://arxiv.org/abs/2110.12205
 
+**Model checkpoints and evaluation notebook now out for easy reproducibility!**
+
 ![image](final-main-diagram-wacv1.png)
 
 ## Requirements
@@ -42,17 +44,21 @@ _Step 3: Learn CS|BDD model on IDD_ \
 Training commands for the Fine-tuning model, Multi-task (joint, offline) model and Single-task (independent models) can be found in the bash scripts inside `trainer_files` directory. Other ablation experiment files can be requested.
 
 ### Pretrained Models
-coming soon
+Our checkpoints for (1) Proposed model, (2) Fine-tuning, and (3) Single-Task baselines on ERFNet for CS, BDD and IDD can be found [here](https://drive.google.com/drive/folders/1RrWlm4hismw9srbQJ-Q_-cs_dHUwISqq?usp=sharing). Checkpoints for other settings (like BDD->CS or IDD->BDD) can be released if required.
+
 #### Testing
-#### Tensorboard use
+Refer to jupyter notebook `Evaluation_Notebook.ipynb` for evaluation of our models. Make sure to set suitable paths for dataset, models and checkpoints.
+
 #### T-SNE plots for segmentation
+Refer to file `Plot_Tsne_Notebook.ipynb` for T-sne plots. We plot the output of the encoder before and after step 2. We compare finetuning versus our method.
 
 ## Citation
-`@article{garg2021multi,
+`@inproceedings{garg2022multi,
   title={Multi-Domain Incremental Learning for Semantic Segmentation},
   author={Garg, Prachi and Saluja, Rohit and Balasubramanian, Vineeth N and Arora, Chetan and Subramanian, Anbumani and Jawahar, CV},
-  journal={arXiv preprint arXiv:2110.12205},
-  year={2021}
+  booktitle={Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision},
+  pages={761--771},
+  year={2022}
 }
 `
 

diff --git a/dataset_custom.py b/dataset_custom.py
@@ -0,0 +1,209 @@
+import numpy as np
+import os
+
+from PIL import Image
+
+from torch.utils.data import Dataset
+
+EXTENSIONS = ['.jpg', '.png']
+
+
+def load_image(file):
+    return Image.open(file)
+
+
+def is_image(filename):
+    return any(filename.endswith(ext) for ext in EXTENSIONS)
+
+
+def is_label_city(filename):
+    return filename.endswith("_labelTrainIds.png")
+
+
+def is_label_IDD(filename):
+    return filename.endswith("_labellevel3Ids.png")
+
+
+def is_label_BDD(filename):
+    return filename.endswith("_train_id.png")
+
+
+def image_path(root, basename, extension):
+    return os.path.join(root, f'{basename}{extension}')
+
+
+def image_path_city(root, name):
+    return os.path.join(root, f'{name}')
+
+
+def image_basename(filename):
+    return os.path.basename(os.path.splitext(filename)[0])
+
+
+class VOC12(Dataset):
+
+    def __init__(self, root, input_transform=None, target_transform=None):
+        self.images_root = os.path.join(root, 'images')
+        self.labels_root = os.path.join(root, 'labels')
+
+        self.filenames = [image_basename(f)
+                          for f in os.listdir(self.labels_root) if is_image(f)]
+        self.filenames.sort()
+
+        self.input_transform = input_transform
+        self.target_transform = target_transform
+
+    def __getitem__(self, index):
+        filename = self.filenames[index]
+
+        with open(image_path(self.images_root, filename, '.jpg'), 'rb') as f:
+            image = load_image(f).convert('RGB')
+        with open(image_path(self.labels_root, filename, '.png'), 'rb') as f:
+            label = load_image(f).convert('P')
+
+        if self.input_transform is not None:
+            image = self.input_transform(image)
+        if self.target_transform is not None:
+            label = self.target_transform(label)
+
+        return image, label
+
+    def __len__(self):
+        return len(self.filenames)
+
+
+class cityscapes(Dataset):
+
+    def __init__(self, root, input_transform=None, target_transform=None, subset='train'):
+        self.images_root = os.path.join(root, 'leftImg8bit/')
+        self.labels_root = os.path.join(root, 'gtFine/')
+
+        self.images_root += subset
+        self.labels_root += subset
+
+        print(self.images_root)
+        self.filenames = [os.path.join(dp, f) for dp, dn, fn in os.walk(
+            os.path.expanduser(self.images_root)) for f in fn if is_image(f)]
+        self.filenames.sort()
+
+        # [os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser(".")) for f in fn]
+        # self.filenamesGt = [image_basename(f) for f in os.listdir(self.labels_root) if is_image(f)]
+        self.filenamesGt = [os.path.join(dp, f) for dp, dn, fn in os.walk(
+            os.path.expanduser(self.labels_root)) for f in fn if is_label_city(f)]
+        self.filenamesGt.sort()
+        # self.filenames = self.filenames[:1]
+        # self.filenamesGt = self.filenamesGt[:1]  # trying to plot the t-sne
+
+        self.input_transform = input_transform
+        self.target_transform = target_transform
+
+    def __getitem__(self, index):
+        filename = self.filenames[index]
+        filenameGt = self.filenamesGt[index]
+
+        with open(image_path_city(self.images_root, filename), 'rb') as f:
+            image = load_image(f).convert('RGB')
+        with open(image_path_city(self.labels_root, filenameGt), 'rb') as f:
+            label = load_image(f).convert('P')
+
+        if self.input_transform is not None:
+            image = self.input_transform(image)
+        if self.target_transform is not None:
+            label = self.target_transform(label)
+
+        return image, label, filename, filenameGt
+
+    def __len__(self):
+        return len(self.filenames)
+
+# added
+
+
+class IDD(Dataset):
+
+    def __init__(self, root, input_transform=None, target_transform=None, subset='train'):
+        self.images_root = os.path.join(root, 'leftImg8bit/')
+        self.labels_root = os.path.join(root, 'gtFine/')
+
+        self.images_root += subset
+        self.labels_root += subset
+
+        print(self.images_root)
+        self.filenames = [os.path.join(dp, f) for dp, dn, fn in os.walk(
+            os.path.expanduser(self.images_root)) for f in fn if is_image(f)]
+        self.filenames.sort()
+
+        self.filenamesGt = [os.path.join(dp, f) for dp, dn, fn in os.walk(
+            os.path.expanduser(self.labels_root)) for f in fn if is_label_IDD(f)]
+        self.filenamesGt.sort()
+
+#        self.filenames = self.filenames[:20]
+#        self.filenamesGt = self.filenamesGt[:20]
+
+        self.input_transform = input_transform
+        self.target_transform = target_transform
+
+    def __getitem__(self, index):
+        filename = self.filenames[index]
+        filenameGt = self.filenamesGt[index]
+
+        # image_path_city will work for IDD also as the images already have a .png extension
+        with open(image_path_city(self.images_root, filename), 'rb') as f:
+            image = load_image(f).convert('RGB')
+        with open(image_path_city(self.labels_root, filenameGt), 'rb') as f:
+            label = load_image(f).convert('P')
+
+        if self.input_transform is not None:
+            image = self.input_transform(image)
+        if self.target_transform is not None:
+            label = self.target_transform(label)
+
+        return image, label, filename, filenameGt
+
+    def __len__(self):
+        return len(self.filenames)
+
+
+class BDD(Dataset):
+
+    def __init__(self, root, input_transform=None, target_transform=None, subset='train'):
+        self.images_root = os.path.join(root, 'images/')
+        self.labels_root = os.path.join(root, 'labels/')
+
+        self.images_root += subset
+        self.labels_root += subset
+
+        print(self.images_root)
+        # self.filenames = [image_basename(f) for f in os.listdir(self.images_root) if is_image(f)]
+        self.filenames = [f for f in os.listdir(self.images_root) if is_image(f)]
+        self.filenames.sort()
+
+        # [os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser(".")) for f in fn]
+        # self.filenamesGt = [image_basename(f) for f in os.listdir(self.labels_root) if is_image(f)]
+        self.filenamesGt = [fn for fn in os.listdir(self.labels_root) if is_label_BDD(fn)]
+        self.filenamesGt.sort()
+
+#        self.filenames = self.filenames[:20]
+#        self.filenamesGt = self.filenamesGt[:20]
+
+        self.input_transform = input_transform
+        self.target_transform = target_transform
+
+    def __getitem__(self, index):
+        filename = self.filenames[index]
+        filenameGt = self.filenamesGt[index]
+
+        with open(image_path_city(self.images_root, filename), 'rb') as f:
+            image = load_image(f).convert('RGB')
+        with open(image_path_city(self.labels_root, filenameGt), 'rb') as f:
+            label = load_image(f).convert('P')
+
+        if self.input_transform is not None:
+            image = self.input_transform(image)
+        if self.target_transform is not None:
+            label = self.target_transform(label)
+
+        return image, label, filename, filenameGt
+
+    def __len__(self):
+        return len(self.filenames)
diff --git a/train_RAPFT_step1.py b/train_RAPFT_step1.py
@@ -1,36 +1,10 @@
 '''
-# edited to train step3 - IDD.
-getting cuda out of memory. so not keeping entire model for training. loading only shared weights and adding IDD DS weights and training, CS & BDD DS weights are not being loaded. they will be loaded later.
--> init shared enc parts using "../save/Adaptations/RAP_FT/stepbdd/model_best_BDD_erfnet_RA_parallel_150_6RAP_FT_step2.pth.tar"
--> train as step 0 model on IDD with 27 classes. so the decoder and DS parts will get labeled as .0.
--> after training, transfer the IDD-DS parts to another model where .0. becomes .2.; transfer the BDD checkpoint .0. to .0. and .1. to .1. in the new model
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-This file does a finetuning + RAP style of training wherein,
+Training protocol for 1st dataset as per our algorithm: (This helps reparameterize the architecture in subsequent steps)
 -> init encoder with imagenet pretrained weights.
 -> add the DS layers in encoder
--> train the entire architecture on CS without freezing any layers.
--> for BDD, add its DS layers and finetune entire archi except the DS layers for CS.
--> repeat the same for IDD
+-> train the entire architecture on 1st step dataset without freezing any layers.
 
-The main_RAP.py file: RAP blocks with fixed, frozen encoder conv layers: here IL Ti is not dependent o IL Ti-1.
-But in this file, (RAP + FT) setting, each subsequent step is dependent on the previous steps. so init of IL Ti is from IL Ti-1.
-
-This Code file contains code for 2 types of models:
-1. RAPs
-
-2. BN - where encoder weights are common, fixed, imagenet pretrained encoder with DS BN layers; and decoder is DS. so train this setting also sequentially, adding new DSBN layers and decoder heads in each step
-
-nb_tasks:
-        task1: cityscapes
-        task2: BDD
-        task3: IDD
-this order doesn't change. its fixed. so pass dataloaders and task numbers respectively
 '''
-# Sept 2017
-# Eduardo Romera
-#######################
-# individually loads all 3 datasets and handles them separately
-
 import os
 import random
 import time

diff --git a/train_new_task_step2.py b/train_new_task_step2.py
@@ -1,44 +1,9 @@
 '''
-RAP-FT-dlr
-# for CS to BDD.
-# hard coded some things for step2 - BDD. Using differential lr for shared weights.
-In the RAPFT experiments, the shared weights are init from previous model. the RAP-current_task weights are randomly initialized. This is causing the shared weights to completely forget previous task and not learn properly.
-Training CS->BDD RAPFT model with: differential learning rate (dlr). Training {shared conv layers in the encoder} with a 10x lower learning rate than shared parameters, to help them learn.
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-This file does a finetuning + RAP style of training wherein,
-STEP 1:
--> init encoder with imagenet pretrained weights.
--> add the DS layers in encoder
--> train the entire architecture on CS without freezing any layers.
-STEP 2:
--> for BDD, add its DS layers.
-init:
-previous model into RAPCS + shared
-RAPCS into RAPBDD, Decoder_CS into Decoder_BDD, enc-BNCS into enc-BNBDD -----IMP---difference from main_RAP_FT_dlr.py---------------------
--> finetune entire archi except the DS layers for CS.
---------------------------------------------------------------------
-STEP 3: (later)
--> repeat the same for IDD
-
-The main_RAP.py file: RAP blocks with fixed, frozen encoder conv layers: here IL Ti is not dependent o IL Ti-1.
-But in this file, (RAP + FT) setting, each subsequent step is dependent on the previous steps. so init of IL Ti is from IL Ti-1.
-
-This Code file contains code for 2 types of models:
-1. RAPs
-
-2. BN - where encoder weights are common, fixed, imagenet pretrained encoder with DS BN layers; and decoder is DS. so train this setting also sequentially, adding new DSBN layers and decoder heads in each step
-
-nb_tasks:
-        task1: cityscapes
-        task2: BDD
-        task3: IDD
-this order doesn't change. its fixed. so pass dataloaders and task numbers respectively
+RAP_FT_KLD (proposed method) for step 2
+Example Dataset Setting: take model trained on CS, incrementally learn BDD. (CS->BDD)
+Trained using init scheme, differential learning rates and knowledge distillation as explained in Algorithm 1 of paper.
+compute KLD between {cs_curr, cs_old} - domain adaptive knowledge distillation between previous and current step CS model.
 '''
-# Sept 2017
-# Eduardo Romera
-#######################
-# individually loads all 3 datasets and handles them separately
-
 import os
 import random
 import time
@@ -201,10 +166,6 @@ def train(args, model, model_old):
     loader_val = DataLoader(dataset_val, num_workers=args.num_workers,
                             batch_size=args.batch_size, shuffle=False)
 
-    # dataset_val_cs = cityscapes(CS_datadir, co_transform_val, 'val')
-    # loader_val_cs = DataLoader(dataset_val_cs, num_workers=args.num_workers,
-    #                            batch_size=args.batch_size, shuffle=False)
-
     if args.dataset_old == 'cityscapes':
         print('loading CS as validation dataset, (old - step 1)')
         loader_val_old = DataLoader(dataset_cs_val, num_workers=args.num_workers,
@@ -226,9 +187,7 @@ def train(args, model, model_old):
     if args.cuda:
         weight = weight.cuda()
         weight_old = weight_old.cuda()
-        # weight_city = weight_city.cuda()
 
-    # criterion_city = CrossEntropyLoss2d(weight_city)
     criterion_old = CrossEntropyLoss2d(weight_old)
     criterion = CrossEntropyLoss2d(weight)
     print(type(criterion))
@@ -282,11 +241,6 @@ def train(args, model, model_old):
     kl_loss = torch.nn.KLDivLoss()
     kl_loss = kl_loss.cuda()
 
-    # print('\n\n\n')
-    # for name, m in model.named_parameters():
-    #     print(name, m.requires_grad)
-
-    # scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.5) # set up scheduler     ## scheduler 1
     def lambda1(epoch): return pow((1-((epoch-1)/args.num_epochs)), 0.9)  # scheduler 2
     scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda1)  # scheduler 2
 
@@ -360,7 +314,7 @@ def lambda1(epoch): return pow((1-((epoch-1)/args.num_epochs)), 0.9)  # schedule
 
             if (doIouTrain):
                 iouEvalTrain.addBatch(outputs.max(1)[1].unsqueeze(1).data, targets.data)
-                #print ("Time to add confusion matrix: ", time.time() - start_time_iou)
+                # print ("Time to add confusion matrix: ", time.time() - start_time_iou)
 
             if args.steps_loss > 0 and step % args.steps_loss == 0:
                 average = sum(epoch_loss) / len(epoch_loss)

diff --git a/train_new_task_step3.py b/train_new_task_step3.py
@@ -1,21 +1,9 @@
 '''
 RAP_FT_KLD for step 3
-Dataset Setting: take model trained on CS|BDD, do CS|BDD->IDD
+Example Dataset Setting: take model trained on CS|BDD, do CS|BDD->IDD
 compute KLD between {cs_curr, cs_old} and {bdd_curr, bdd_old}. sum them up and use lambdac=0.1 on the sum.
 previous task model CS|BDD in memory and current model being trained on IDD in memory.
-
-we can compute KLD only between {bdd_curr, bdd_old} but in this file we try KLD between all old models.
-
-# 15th August.
-old dataset knowledge comes into play only during validation.
-init is independent of that. it will be correct as long as you give right checkpoint for init.
-
 '''
-# Sept 2017
-# Eduardo Romera
-#######################
-# individually loads all 3 datasets and handles them separately
-
 import os
 import random
 import time