Fixed some bugs

crs4 · Aug 13, 2021 · 2ff77d1 · 2ff77d1
1 parent 7d1bc2c
commit 2ff77d1
Show file tree

Hide file tree

Showing 3 changed files with 18 additions and 14 deletions.
diff --git a/README.md b/README.md
@@ -4,7 +4,6 @@ Pytorch implementation of the CVPR 2021 oral paper: SliceNet: deep dense depth e
 Paper at:
 https://openaccess.thecvf.com/content/CVPR2021/papers/Pintore_SliceNet_Deep_Dense_Depth_Estimation_From_a_Single_Indoor_Panorama_CVPR_2021_paper.pdf
 
-![](assets/intro.png)
 
 We present a novel deep neural network to estimate a depth map from a single monocular indoor panorama, called SliceNet.
 The network directly works on the equirectangular projection, exploiting the properties of indoor 360 images.
@@ -18,7 +17,9 @@ This repo is a **python** implementation where you can test **depth inference**
 ![](assets/overview.png)
 
 ## Updates
-* 2020-07-21: Network source code and demo released
+* 2021-08-13: IMPORTANT: Fixed bug in weights init: model and pre-trained weights updated
+	- REPLACE PREVIOUS MODEL AND WEIGHTS
+* 2021-07-21: Network source code and demo released
 
 ## Requirements
 - Python >=3.6
@@ -36,12 +37,13 @@ Copy to your local ./ckpt directory.
 	- As in previous comparable works we resize the resolution of equirectangular image and depth map into 512 × 1024.
 	- Stitching the original 18-poses format to equirectangular have been perfomed following the official procedure from https://github.com/niessner/Matterport/blob/master/data_organization.md, based on
 	the methods/tools of Zhang https://github.com/yindaz/PanoBasic.
-- [resnet50_stanford.pth](https://vicserver.crs4.it/slicenet/resnet50_stanford.pth)
+- [resnet50_stanford.pth]
 	- Trained with ResNet50 using Stanford-2D-3D-S dataset http://buildingparser.stanford.edu/dataset.html. 
 	- As in previous comparable works we adopt the official Fold#1 as split thus taking fifth area (area 5) for testing and the others for training.
 	- As in previous comparable works we resize the resolution of equirectangular image and depth map into 512 × 1024. 
 	- Invalid measures are masked as 0.
-- [resnet50_s3d.pth](https://vicserver.crs4.it/slicenet/resnet50_s3d.pth)
+	- COMING SOON
+- [resnet50_s3d.pth]
 	- Trained with ResNet50 using Structured3D dataset and their official splitting.
 - [resnet50_360D.pth]
 	- COMING SOON

diff --git a/assets/intro.png b/assets/intro.png
diff --git a/slice_model.py b/slice_model.py
@@ -2,10 +2,9 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-
 import torchvision.models as models
-
 import functools
+import time
 
 ENCODER_RESNET = [
     'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152',
@@ -16,7 +15,6 @@
 def lr_pad(x, padding=1):
     ''' Pad left/right-most to each other instead of zero padding '''
     return torch.cat([x[..., -padding:], x, x[..., :padding]], dim=3)
-
 class LR_PAD(nn.Module):
     ''' Pad left/right-most to each other instead of zero padding '''
     def __init__(self, padding=1):
@@ -25,7 +23,6 @@ def __init__(self, padding=1):
 
     def forward(self, x):
         return lr_pad(x, self.padding)
-
 def wrap_lr_pad(net):
     for name, m in net.named_modules():
         if not isinstance(m, nn.Conv2d):
@@ -108,8 +105,11 @@ def forward(self, x, out_w):
 
         #####HorizonNet-style upsampling        
         x = torch.cat([x[..., -1:], x, x[..., :1]], 3) ## plus 2 on W
-        x = F.interpolate(x, size=(x.shape[2], out_w + 2 * factor), mode='bilinear', align_corners=False)
+        x = F.interpolate(x, size=(x.shape[2], out_w + 2 * factor), mode='bilinear', align_corners=False) ####NB interpolating only W
         x = x[..., factor:-factor] ##minus 2 on W
+
+        ##SIMPLEST
+        ##x = F.interpolate(x, size=(x.shape[2], out_w), mode='bilinear', align_corners=False)
 
         return x
 
@@ -150,10 +150,10 @@ def __init__(self, backbone, full_size = False):
 
         self.full_size = full_size 
 
-        self.out_w_size = 512
+        ##self.out_w_size = 512
 
-        if(self.full_size):
-            self.out_w_size = 1024                     
+        ##if(self.full_size):
+            ##self.out_w_size = 1024                     
 
         self.feature_extractor = Resnet(backbone, pretrained=True)
 
@@ -165,7 +165,9 @@ def __init__(self, backbone, full_size = False):
 
             if(self.full_size):
                 c_last *= 2
-
+
+        ##print('c_last',c_last)
+
         self.slicing_module = MultiSlicing(c1, c2, c3, c4, self.ch_scale)
 
         self.bi_rnn = nn.LSTM(input_size=c_last,
@@ -204,7 +206,7 @@ def __init__(self, backbone, full_size = False):
         ''' Pad left/right-most to each other instead of zero padding '''       
         wrap_lr_pad(self)
 
-        self.apply(xavier_init)
+        ##self.apply(xavier_init)
 
     def _prepare_x(self, x):
         if self.x_mean.device != x.device: