-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtrain.py
109 lines (81 loc) · 3.67 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
from mnk import Board
import random
import matplotlib.pyplot as plt
from agent import Agent
from model import Model
from plot import plot_wins
from hof import HOF
from utils import run_game
from save_model import save_model, name
import sys
mnk = (3, 3, 3)
def main():
# Hyperparameter List
num_batches = 20_000 # Total training games = num_batches * games_per_batch
games_per_batch = 5
epsilon = 0.2 # Epsilon is the exploration factor: probability with which a random move is chosen to play
# Declare hall of fame
hof = HOF(mnk, folder="menagerie")
print("Training model: {}".format(name(sys.argv)))
# Run training and store final model
model, end_states, victories, games = train(hof, num_batches, games_per_batch, epsilon, Model())
save_model(model, sys.argv)
# Create data plots # All this should be in plot.py preferably
plt.figure()
plt.subplot(3, 1, 1)
plot_wins(end_states, 100)
plt.subplot(3, 1, 2)
plot_wins(victories, 100, ["Best", "HOF"])
plt.subplot(3, 1, 3)
hof.sample_histogram(20)
plt.savefig("plots/plot{}.png".format(num_batches * games_per_batch))
print("Calculating winrate matrix")
hof.winrate_matrix(150)
plt.show()
# Can be used after looking at plot to analyze important milestones
ind = 0 # Put into a function or even separate file
while ind != -1:
ind = int(input("Query a game: "))
for move in games[ind]:
print(move)
pass
def train(hof, num_batches, games_per_batch, epsilon, model):
end_states = []
victories = []
games = []
# Initialize hall of fame
hof.store(model)
try:
for batch_number in range(num_batches):
print("Batch: ", batch_number, "(Games {}-{})".format(batch_number * games_per_batch + 1, (batch_number + 1) * games_per_batch))
# Runs a batch of games, after which we can play/save a diagnostic game to see if it improved and store current model to hof
for game in range(games_per_batch):
# Randomly assign sides (X or O) for game to be played
side_best = [-1, 1][random.random() > 0.5]
side_hof = side_best * -1
model_hof = hof.sample("uniform")
# Initialize the agents
agent_best = Agent(model, side_best)
agent_hof = Agent(model_hof, side_hof)
# Play game and train on its outcome
run_game(agent_best, agent_hof, epsilon, training=True)
# Gate will determine if model is worthy, and store in hof only if it is (Currently, it just stores every game)
hof.gate(model)
# Switch sides and resample hof so diagnostic is not biased towards last game played
side_best *= -1
side_hof = side_best * -1
model_hof = hof.sample("uniform")
agent_best = Agent(model, side_best)
agent_hof = Agent(model_hof, side_hof)
# Run a diagnostic (non-training, no exploration) game to collect data
diagnostic_winner, game_data = run_game(agent_best, agent_hof, 0, training=False, mnk=mnk)
# Store data from diagnostic game for this batch
games.append(game_data)
end_states.append(diagnostic_winner)
victories.append(diagnostic_winner*side_best)
except KeyboardInterrupt:
print("Training interrupted.")
print("Training completed.")
return model, end_states, victories, games
if __name__ == "__main__":
main()