-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtrain.py
98 lines (71 loc) · 2.51 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# TODO: PLOT LOSS CURVES
from mnk import Board
import random
import matplotlib.pyplot as plt
from agent import Agent
from model import Model
from plot import plot_wins
from hof import HOF
from utils import run_game
mnk = (3, 3, 3)
def train(hof, loops, loop_length, epsilon, model):
end_states = []
victories = []
games = []
# Initialize values
hof.store(model)
model_hof = hof.sample()
for loop in range(loops):
print("\n loop: ", loop)
side_best = [-1, 1][random.random() > 0.5]
side_hof = side_best * -1
for game in range(loop_length):
# Initialize the agents
agent_best = Agent(model, side_best)
agent_hof = Agent(model_hof, side_hof)
run_game(agent_best, agent_hof, epsilon, training=True)
# Switch sides for the next game
side_best = [-1, 1][random.random() > 0.5]
side_hof = side_best * -1
model_hof = hof.sample("uniform")
# Update hall of fame and sample from it for the next loop
hof.gate(model)
side_best *= -1
side_hof = side_best * -1
agent_best = Agent(model, side_best)
agent_hof = Agent(model_hof, side_hof)
# Run a diagnostic (non-training, no exploration) game to collect data
diagnostic_winner, game_data = run_game(agent_best, agent_hof, 0, training=False, mnk=mnk)
# Store data from loop
games.append(game_data)
end_states.append(diagnostic_winner)
victories.append(diagnostic_winner*side_best)
return model, end_states, victories, games
if __name__ == "__main__":
# Initialize hall of fame
hof = HOF(mnk, "menagerie")
num_loops = 20000
loop_length = 5
# Run training and store final model
model, end_states, victories, games = train(hof, num_loops, loop_length, 0.2, Model())
print("Training complete.")
print("Saving trained model to models/modelXO and chart to plots folder")
model.save_to('models/modelXO')
# Create data plots
plt.figure()
plt.subplot(3, 1, 1)
plot_wins(end_states, 100)
plt.subplot(3, 1, 2)
plot_wins(victories, 100, ["Best", "HOF"])
plt.subplot(3, 1, 3)
hof.sample_histogram(20)
plt.savefig("plots/plot{}.png".format(num_loops * loop_length))
print("Calculating winrate matrix")
hof.winrate_matrix(150)
plt.show()
ind = 0
while ind != -1:
ind = int(input("Query a game: "))
for move in games[ind]:
print(move)
pass