config.yaml

env:
  screen_size: 84
  frame_skip: 4
  noop_max: 30
  terminal_on_life_loss: False
replay_memory_size: 350000 # Original paper was 1000000.
agent_history_length: 3 # Original paper was four. I use three to save memory.
train:
  total_frame: 10000000 # 10M frames. Experiment setting of extended Data Table 3.
  replay_start_size: 50000
  update_frequency: 4 # The number of actions selected by the agent between successive SGD updates.
  minibatch_size: 32 # Original paper was 32.
  discount_factor: 0.99
  learning_rate: 0.00025
  gradient_momentum: 0.95
  squared_gradient_momentum: 0.95 # How can I set in PyTorch?
  min_squared_gradient: 0.01 # This one too.
  target_network_update_frequency: 10000
exploration:
  initial_exploration: 1
  final_exploration: 0.1
  final_exploration_frame: 1000000
debug:
  heldout_step: 100000 # Number of steps to train before get heldout set.
  heldout_minibatch_size: 512
  heldout_state_size: 32768 # 2^15