# Dataset parameters
dataset_params:
  # Path to data, data can be stored in several formats: .mp4 or .gif videos, stacked .png images or folders with frames.
  root_dir: video-preprocessing/croppednew23dataset
  # Image shape, needed for staked .png format.
  image_shape: [256, 256, 3]
  # In case of VoxCeleb or TaiChi single video can be splitted in many chunks, or the maybe several videos for single person.
  # In this case epoch can be a pass over different videos (if id_sampling=True) or over different chunks (if id_sampling=False)
  # If the name of the video '12335#adsbf.mp4' the id is assumed to be 12335
  id_sampling: False
  # Augmentation parameters see augmentation.py for all posible augmentations
  augmentation_params:
    flip_param:
      horizontal_flip: True
      time_flip: True
    jitter_param:
      brightness: 0.1
      contrast: 0.1
      saturation: 0.1
      hue: 0.1

# Defines model architecture
model_params:
  common_params:
    # Number of segments
    num_segments: 10
    # Number of channels per image
    num_channels: 3
    # Use only shift and no-affine part
    estimate_affine_part: True
  segmentation_module_params:
     # Softmax temperature for shift heatmaps
     temperature: 0.1
     # Number of features mutliplier
     block_expansion: 32
     # Maximum allowed number of features
     max_features: 1024
     # Number of block in Unet. Can be increased or decreased depending or resolution.
     num_blocks: 5
     # Segmentations is predicted on smaller images for better performance,
     # scale_factor=0.25 means that 256x256 image will be resized to 64x64
     scale_factor: 0.25
  reconstruction_module_params:
    # Number of features mutliplier
    block_expansion: 64
    # Maximum allowed number of features
    max_features: 512
    # Number of downsampling blocks in Jonson architecture.
    # Can be increased or decreased depending or resolution.
    num_down_blocks: 2
    # Number of ResBlocks  in Jonson architecture.
    num_bottleneck_blocks: 6
    # Use visibility map or not
    estimate_visibility: True

# Parameters of training
train_params:
  num_workers: 5
  # Number of training epochs
  num_epochs: 10
  # For better i/o performance when number of videos is small number of epochs can be multiplied by this number.
  # Thus effectively with num_repeats=100 each epoch is 100 times larger.
  num_repeats: 2
  # Learning rates
  lr_segmentation_module: 2.0e-4
  lr_reconstruction_module: 2.0e-4
  batch_size: 4
  # Scales for perceptual pyramide loss. If scales = [1, 0.5, 0.25, 0.125] and image resolution is 256x256,
  # than the loss will be computer on resolutions 256x256, 128x128, 64x64, 32x32.
  scales: [1, 0.5, 0.25, 0.125]
  # Save checkpoint this frequently. If checkpoint_freq=50, checkpoint will be saved every 50 epochs.
  checkpoint_freq: 5
  # Parameters of transform for equivariance loss
  transform_params:
    # Sigma for affine part
    sigma_affine: 0.05
    # Sigma for deformation part
    sigma_tps: 0.005
    # Number of point in the deformation grid
    points_tps: 5
  loss_weights:
    equivariance: 10
    perceptual: [10, 10, 10, 10, 10]


# Visualization parameters
visualizer_params:
  # Draw keypoints (shifts in affine transformations) of this size, increase or decrease depending on resolution
  kp_size: 5
  # Draw white border around images
  draw_border: True
  # Color map for keypoints
  colormap: 'gist_rainbow'