-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtrain.py
96 lines (72 loc) · 3.39 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
from mnk import Board
import random
import matplotlib.pyplot as plt
from agent import Agent
from model import Model
from plot import plot_wins, save_plots
from hof import HOF
from utils import run_game, arg_parser
from save_model import save_model
import sys
# Set cmd-line training arguments
verbose, mcts, model_name = arg_parser(sys.argv)
mnk = (3, 3, 3)
def main():
# Hyperparameter List
num_batches = 20_000 # Total training games = num_batches * games_per_batch
games_per_batch = 5
epsilon = 0.2 # Epsilon is the exploration factor: probability with which a random move is chosen to play
hof = HOF(mnk, folder="menagerie")
print("\nTraining model: {}\n".format(model_name))
model, winnersXO, winnersHOF, games = train(hof, num_batches, games_per_batch, epsilon, Model())
save_model(model, model_name)
save_plots(hof, model_name, winnersXO, winnersHOF)
# Can be used after looking at plot to analyze important milestones
ind = 0 # Put into a function
while ind != -1:
ind = int(input("Query a game: "))
for move in games[ind]:
print(move)
pass
def train(hof, num_batches, games_per_batch, epsilon, model):
winnersXO = []
winnersHOF = []
games = []
# Initialize hall of fame
hof.store(model)
try:
for batch_number in range(num_batches):
print("Batch:", batch_number, "(Games {}-{})".format(batch_number * games_per_batch + 1, (batch_number + 1) * games_per_batch))
# Runs a batch of games, after which we can play/save a diagnostic game to see if it improved and store current model to hof
for game in range(games_per_batch):
# Randomly assign sides (X or O) for game to be played
side_best = [-1, 1][random.random() > 0.5]
side_hof = side_best * -1
model_hof = hof.sample("uniform")
# Initialize the agents
agent_best = Agent(model, side_best)
agent_hof = Agent(model_hof, side_hof)
# Play game and train on its outcome
run_game(agent_best, agent_hof, epsilon, training=True)
# Gate will determine if model is worthy, and store in hof only if it is (Currently, it just stores every game)
hof.gate(model)
# Switch sides and resample hof so diagnostic is not biased towards last game played
side_best *= -1
side_hof = side_best * -1
model_hof = hof.sample("uniform")
agent_best = Agent(model, side_best)
agent_hof = Agent(model_hof, side_hof)
# Run a diagnostic (non-training, no exploration) game to collect data
diagnostic_winner, game_data = run_game(agent_best, agent_hof, 0, training=False, mnk=mnk, verbose=verbose)
# Store data from diagnostic game for this batch
games.append(game_data)
winnersXO.append(diagnostic_winner) # X or O
winnersHOF.append(diagnostic_winner*side_best) # Best or HOF
except KeyboardInterrupt:
print("\n=======================")
print("Training interrupted.")
print("=======================")
print("Training completed.")
return model, winnersXO, winnersHOF, games
if __name__ == "__main__":
main()