tools/dataloader.py

# Copyright 2019-present NAVER Corp.
# CC BY-NC-SA 3.0
# Available only for non-commercial use

import pdb
from PIL import Image
import numpy as np

import torch
import torchvision.transforms as tvf

from tools.transforms import instanciate_transformation
from tools.transforms_tools import persp_apply


RGB_mean = [0.485, 0.456, 0.406]
RGB_std  = [0.229, 0.224, 0.225]

norm_RGB = tvf.Compose([tvf.ToTensor(), tvf.Normalize(mean=RGB_mean, std=RGB_std)])


class PairLoader:
    """ On-the-fly jittering of pairs of image with dense pixel ground-truth correspondences.
    
    crop:   random crop applied to both images
    scale:  random scaling applied to img2
    distort: random ditorsion applied to img2
    
    self[idx] returns a dictionary with keys: img1, img2, aflow, mask
     - img1: cropped original
     - img2: distorted cropped original
     - aflow: 'absolute' optical flow = (x,y) position of each pixel from img1 in img2
     - mask: (binary image) valid pixels of img1
    """
    def __init__(self, dataset, crop='', scale='', distort='', norm = norm_RGB, 
                       what = 'aflow mask', idx_as_rng_seed = False):
        assert hasattr(dataset, 'npairs')
        assert hasattr(dataset, 'get_pair')
        self.dataset = dataset
        self.distort = instanciate_transformation(distort)
        self.crop = instanciate_transformation(crop)
        self.norm = instanciate_transformation(norm)
        self.scale = instanciate_transformation(scale)
        self.idx_as_rng_seed = idx_as_rng_seed # to remove randomness
        self.what = what.split() if isinstance(what, str) else what
        self.n_samples = 5 # number of random trials per image

    def __len__(self):
        assert len(self.dataset) == self.dataset.npairs, pdb.set_trace() # and not nimg
        return len(self.dataset)

    def __repr__(self):
        fmt_str = 'PairLoader\n'
        fmt_str += repr(self.dataset)
        fmt_str += '  npairs: %d\n' % self.dataset.npairs
        short_repr = lambda s: repr(s).strip().replace('\n',', ')[14:-1].replace('    ',' ')
        fmt_str += '  Distort: %s\n' % short_repr(self.distort)
        fmt_str += '  Crop: %s\n' % short_repr(self.crop)
        fmt_str += '  Norm: %s\n' % short_repr(self.norm)
        return fmt_str

    def __getitem__(self, i):
        #from time import time as now; t0 = now()
        if self.idx_as_rng_seed:
            import random
            random.seed(i)
            np.random.seed(i)

        # Retrieve an image pair and their absolute flow
        img_a, img_b, metadata = self.dataset.get_pair(i, self.what)
        
        # aflow contains pixel coordinates indicating where each 
        # pixel from the left image ended up in the right image
        # as (x,y) pairs, but its shape is (H,W,2)
        aflow = np.float32(metadata['aflow'])
        mask = metadata.get('mask', np.ones(aflow.shape[:2],np.uint8))

        # apply transformations to the second image
        img_b = {'img': img_b, 'persp':(1,0,0,0,1,0,0,0)}
        if self.scale:
            img_b = self.scale(img_b)
        if self.distort:
            img_b = self.distort(img_b)
        
        # apply the same transformation to the flow
        aflow[:] = persp_apply(img_b['persp'], aflow.reshape(-1,2)).reshape(aflow.shape)
        corres = None
        if 'corres' in metadata:
            corres = np.float32(metadata['corres'])
            corres[:,1] = persp_apply(img_b['persp'], corres[:,1])
        
        # apply the same transformation to the homography
        homography = None
        if 'homography' in metadata:
            homography = np.float32(metadata['homography'])
            # p_b = homography * p_a
            persp = np.float32(img_b['persp']+(1,)).reshape(3,3)
            homography = persp @ homography

        # determine crop size
        img_b = img_b['img']
        crop_size = self.crop({'imsize':(10000,10000)})['imsize']
        output_size_a = min(img_a.size, crop_size)
        output_size_b = min(img_b.size, crop_size)
        img_a = np.array(img_a)
        img_b = np.array(img_b)

        ah,aw,p1 = img_a.shape
        bh,bw,p2 = img_b.shape
        assert p1 == 3
        assert p2 == 3
        assert aflow.shape == (ah, aw, 2)
        assert mask.shape == (ah, aw)

        # Let's start by computing the scale of the
        # optical flow and applying a median filter:
        dx = np.gradient(aflow[:,:,0])
        dy = np.gradient(aflow[:,:,1])
        scale = np.sqrt(np.clip(np.abs(dx[1]*dy[0] - dx[0]*dy[1]), 1e-16, 1e16))

        accu2 = np.zeros((16,16), bool)
        Q = lambda x, w: np.int32(16 * (x - w.start) / (w.stop - w.start))
        
        def window1(x, size, w):
            l = x - int(0.5 + size / 2)
            r = l + int(0.5 + size)
            if l < 0: l,r = (0, r - l)
            if r > w: l,r = (l + w - r, w)
            if l < 0: l,r = 0,w # larger than width
            return slice(l,r)
        def window(cx, cy, win_size, scale, img_shape):
            return (window1(cy, win_size[1]*scale, img_shape[0]), 
                    window1(cx, win_size[0]*scale, img_shape[1]))

        n_valid_pixel = mask.sum()
        sample_w = mask / (1e-16 + n_valid_pixel)
        def sample_valid_pixel():
            n = np.random.choice(sample_w.size, p=sample_w.ravel())
            y, x = np.unravel_index(n, sample_w.shape)
            return x, y
        
        # Find suitable left and right windows
        trials = 0 # take the best out of few trials
        best = -np.inf, None
        for _ in range(50*self.n_samples):
            if trials >= self.n_samples: break # finished!

            # pick a random valid point from the first image
            if n_valid_pixel == 0: break
            c1x, c1y = sample_valid_pixel()
            
            # Find in which position the center of the left
            # window ended up being placed in the right image
            c2x, c2y = (aflow[c1y, c1x] + 0.5).astype(np.int32)
            if not(0 <= c2x < bw and 0 <= c2y < bh): continue

            # Get the flow scale
            sigma = scale[c1y, c1x]

            # Determine sampling windows
            if 0.2 < sigma < 1: 
                win1 = window(c1x, c1y, output_size_a, 1/sigma, img_a.shape)
                win2 = window(c2x, c2y, output_size_b, 1, img_b.shape)
            elif 1 <= sigma < 5:
                win1 = window(c1x, c1y, output_size_a, 1, img_a.shape)
                win2 = window(c2x, c2y, output_size_b, sigma, img_b.shape)
            else:
                continue # bad scale

            # compute a score based on the flow
            x2,y2 = aflow[win1].reshape(-1, 2).T.astype(np.int32)
            # Check the proportion of valid flow vectors
            valid = (win2[1].start <= x2) & (x2 < win2[1].stop) \
                  & (win2[0].start <= y2) & (y2 < win2[0].stop)
            score1 = (valid * mask[win1].ravel()).mean()
            # check the coverage of the second window
            accu2[:] = False
            accu2[Q(y2[valid],win2[0]), Q(x2[valid],win2[1])] = True
            score2 = accu2.mean()
            # Check how many hits we got
            score = min(score1, score2)

            trials += 1
            if score > best[0]:
                best = score, win1, win2
        
        if None in best: # counldn't find a good window
            img_a = np.zeros(output_size_a[::-1]+(3,), dtype=np.uint8)
            img_b = np.zeros(output_size_b[::-1]+(3,), dtype=np.uint8)
            aflow = np.nan * np.ones((2,)+output_size_a[::-1], dtype=np.float32)
            homography = np.nan * np.ones((3,3), dtype=np.float32)

        else:
            win1, win2 = best[1:]
            img_a = img_a[win1]
            img_b = img_b[win2]
            aflow = aflow[win1] - np.float32([[[win2[1].start, win2[0].start]]])
            mask = mask[win1]
            aflow[~mask.view(bool)] = np.nan # mask bad pixels!
            aflow = aflow.transpose(2,0,1) # --> (2,H,W)
            
            if corres is not None:
                corres[:,0] -= (win1[1].start, win1[0].start)
                corres[:,1] -= (win2[1].start, win2[0].start)
            
            if homography is not None:
                trans1 = np.eye(3, dtype=np.float32)
                trans1[:2,2] = (win1[1].start, win1[0].start)
                trans2 = np.eye(3, dtype=np.float32)
                trans2[:2,2] = (-win2[1].start, -win2[0].start)
                homography = trans2 @ homography @ trans1
                homography /= homography[2,2]
            
            # rescale if necessary
            if img_a.shape[:2][::-1] != output_size_a:
                sx, sy = (np.float32(output_size_a)-1)/(np.float32(img_a.shape[:2][::-1])-1)
                img_a = np.asarray(Image.fromarray(img_a).resize(output_size_a, Image.ANTIALIAS))
                mask = np.asarray(Image.fromarray(mask).resize(output_size_a, Image.NEAREST))
                afx = Image.fromarray(aflow[0]).resize(output_size_a, Image.NEAREST)
                afy = Image.fromarray(aflow[1]).resize(output_size_a, Image.NEAREST)
                aflow = np.stack((np.float32(afx), np.float32(afy)))
                
                if corres is not None:
                    corres[:,0] *= (sx, sy)
                
                if homography is not None:
                    homography = homography @ np.diag(np.float32([1/sx,1/sy,1]))
                    homography /= homography[2,2]

            if img_b.shape[:2][::-1] != output_size_b:
                sx, sy = (np.float32(output_size_b)-1)/(np.float32(img_b.shape[:2][::-1])-1)
                img_b = np.asarray(Image.fromarray(img_b).resize(output_size_b, Image.ANTIALIAS))
                aflow *= [[[sx]], [[sy]]]
                
                if corres is not None:
                    corres[:,1] *= (sx, sy)
                
                if homography is not None:
                    homography = np.diag(np.float32([sx,sy,1])) @ homography
                    homography /= homography[2,2]
    
        assert aflow.dtype == np.float32, pdb.set_trace()
        assert homography is None or homography.dtype == np.float32, pdb.set_trace()
        if 'flow' in self.what:
            H, W = img_a.shape[:2]
            mgrid = np.mgrid[0:H, 0:W][::-1].astype(np.float32)
            flow = aflow - mgrid
        
        result = dict(img1=self.norm(img_a), img2=self.norm(img_b))
        for what in self.what:
            try: result[what] = eval(what)
            except NameError: pass
        return result


def threaded_loader( loader, iscuda, threads, batch_size=1, shuffle=True):
    """ Get a data loader, given the dataset and some parameters.
    
    Parameters
    ----------
    loader : object[i] returns the i-th training example.
    
    iscuda : bool
        
    batch_size : int
    
    threads : int
    
    shuffle : int
    
    Returns
    -------
        a multi-threaded pytorch loader.
    """
    return torch.utils.data.DataLoader(
        loader,
        batch_size = batch_size,
        shuffle = shuffle,
        sampler = None,
        num_workers = threads,
        pin_memory = iscuda,
        collate_fn=collate)


def collate(batch, _use_shared_memory=True):
    """Puts each data field into a tensor with outer dimension batch size.
    Copied from https://github.com/pytorch in torch/utils/data/_utils/collate.py
    """
    import re
    error_msg = "batch must contain tensors, numbers, dicts or lists; found {}"
    elem_type = type(batch[0])
    if isinstance(batch[0], torch.Tensor):
        out = None
        if _use_shared_memory:
            # If we're in a background process, concatenate directly into a
            # shared memory tensor to avoid an extra copy
            numel = sum([x.numel() for x in batch])
            storage = batch[0].storage()._new_shared(numel)
            out = batch[0].new(storage)
        return torch.stack(batch, 0, out=out)
    elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \
            and elem_type.__name__ != 'string_':
        elem = batch[0]
        assert elem_type.__name__ == 'ndarray'
        # array of string classes and object
        if re.search('[SaUO]', elem.dtype.str) is not None:
            raise TypeError(error_msg.format(elem.dtype))
        batch = [torch.from_numpy(b) for b in batch]
        try:
            return torch.stack(batch, 0)
        except RuntimeError:
            return batch
    elif batch[0] is None:
        return list(batch)
    elif isinstance(batch[0], int):
        return torch.LongTensor(batch)
    elif isinstance(batch[0], float):
        return torch.DoubleTensor(batch)
    elif isinstance(batch[0], str):
        return batch
    elif isinstance(batch[0], dict):
        return {key: collate([d[key] for d in batch]) for key in batch[0]}
    elif isinstance(batch[0], (tuple,list)):
        transposed = zip(*batch)
        return [collate(samples) for samples in transposed]

    raise TypeError((error_msg.format(type(batch[0]))))


def tensor2img(tensor, model=None):
    """ convert back a torch/numpy tensor to a PIL Image
        by undoing the ToTensor() and Normalize() transforms.
    """
    mean = norm_RGB.transforms[1].mean
    std =  norm_RGB.transforms[1].std
    if isinstance(tensor, torch.Tensor):
        tensor = tensor.detach().cpu().numpy()
    
    res = np.uint8(np.clip(255*((tensor.transpose(1,2,0) * std) + mean), 0, 255))
    from PIL import Image
    return Image.fromarray(res)


if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser("Tool to debug/visualize the data loader")
    parser.add_argument("dataloader", type=str, help="command to create the data loader")
    args = parser.parse_args()

    from datasets import *
    auto_pairs = lambda db: SyntheticPairDataset(db,
        'RandomScale(256,1024,can_upscale=True)', 
        'RandomTilting(0.5), PixelNoise(25)')
        
    loader = eval(args.dataloader)
    print("Data loader =", loader)

    from tools.viz import show_flow
    for data in loader:
        aflow = data['aflow']
        H, W = aflow.shape[-2:]
        flow = (aflow - np.mgrid[:H, :W][::-1]).transpose(1,2,0)
        show_flow(tensor2img(data['img1']), tensor2img(data['img2']), flow)