config.yml

# MCTS config
episodes: 10000 # Single process
episodes_per_epoch: 2 # Multi process
simulations: 500 # m
c: 1.4 # Exploration constant
non_det_moves: 6 # Number of plies to use for non-deterministic playouts
epochs: 1000
epoch_skip: 0

# Neural Network config
model_type: resnet # resnet, convnet, conceptnet
reward_function: zero_sum # zero_sum, concept_fn or jem
resnet_filters: 128 # f
convnet_filters: 64 # f
save_intervals: [1, 2, 3, 5, 7, 10, 13, 15, 20, 30, 40, 60, 80, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]
learning_rate: 0.001
end_learning_rate: 0.0001
decay_steps: 10000
pre_trained: false
pre_trained_path: ../models/pre_trained/board_size_7/
clear_tensorboard: true

# Replay buffer config
rbuf_size: 2048 # Single process (2048)
rbuf_cap: 30000 # Multi process
sample_ratio: 0.4 # 40% chance of adding case to replay buffer, helps with overfitting
batch_size: 128 # 512

# Go config
board_size: 7 # k
komi: 3.5 # gives white a bonus of x points at the end of the game due to black going first (2.5 for 5x5 and 3.5 for 7x7)
move_cap: 120

# Debug config
render: false
plot: false

# Topp config
nr_of_topp_games: 10 # g