# Dataset parameters dataset_params: # Path to data, data can be stored in several formats: .mp4 or .gif videos, stacked .png images or folders with frames. root_dir: video-preprocessing/croppednew23dataset # Image shape, needed for staked .png format. image_shape: [256, 256, 3] # In case of VoxCeleb or TaiChi single video can be splitted in many chunks, or the maybe several videos for single person. # In this case epoch can be a pass over different videos (if id_sampling=True) or over different chunks (if id_sampling=False) # If the name of the video '12335#adsbf.mp4' the id is assumed to be 12335 id_sampling: False # Augmentation parameters see augmentation.py for all posible augmentations augmentation_params: flip_param: horizontal_flip: True time_flip: True jitter_param: brightness: 0.1 contrast: 0.1 saturation: 0.1 hue: 0.1 # Defines model architecture model_params: common_params: # Number of segments num_segments: 10 # Number of channels per image num_channels: 3 # Use only shift and no-affine part estimate_affine_part: True segmentation_module_params: # Softmax temperature for shift heatmaps temperature: 0.1 # Number of features mutliplier block_expansion: 32 # Maximum allowed number of features max_features: 1024 # Number of block in Unet. Can be increased or decreased depending or resolution. num_blocks: 5 # Segmentations is predicted on smaller images for better performance, # scale_factor=0.25 means that 256x256 image will be resized to 64x64 scale_factor: 0.25 reconstruction_module_params: # Number of features mutliplier block_expansion: 64 # Maximum allowed number of features max_features: 512 # Number of downsampling blocks in Jonson architecture. # Can be increased or decreased depending or resolution. num_down_blocks: 2 # Number of ResBlocks in Jonson architecture. num_bottleneck_blocks: 6 # Use visibility map or not estimate_visibility: True # Parameters of training train_params: num_workers: 5 # Number of training epochs num_epochs: 10 # For better i/o performance when number of videos is small number of epochs can be multiplied by this number. # Thus effectively with num_repeats=100 each epoch is 100 times larger. num_repeats: 2 # Learning rates lr_segmentation_module: 2.0e-4 lr_reconstruction_module: 2.0e-4 batch_size: 4 # Scales for perceptual pyramide loss. If scales = [1, 0.5, 0.25, 0.125] and image resolution is 256x256, # than the loss will be computer on resolutions 256x256, 128x128, 64x64, 32x32. scales: [1, 0.5, 0.25, 0.125] # Save checkpoint this frequently. If checkpoint_freq=50, checkpoint will be saved every 50 epochs. checkpoint_freq: 5 # Parameters of transform for equivariance loss transform_params: # Sigma for affine part sigma_affine: 0.05 # Sigma for deformation part sigma_tps: 0.005 # Number of point in the deformation grid points_tps: 5 loss_weights: equivariance: 10 perceptual: [10, 10, 10, 10, 10] # Visualization parameters visualizer_params: # Draw keypoints (shifts in affine transformations) of this size, increase or decrease depending on resolution kp_size: 5 # Draw white border around images draw_border: True # Color map for keypoints colormap: 'gist_rainbow'