Skip to content

Commit

Permalink
Better model naming, readability changes, and some annotations
Browse files Browse the repository at this point in the history
  • Loading branch information
PedroContipelli committed Feb 9, 2022
1 parent 986f310 commit 064cfb6
Show file tree
Hide file tree
Showing 18 changed files with 37 additions and 36 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
__pycache__/
.idea/
menagerie/
menagerie/
models/
plots/
2 changes: 1 addition & 1 deletion agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

class Agent:

def __init__(self, model, player):
def __init__(self, model, player): # Where are you using player?
self.model = model
self.player = player

Expand Down
File renamed without changes.
File renamed without changes.

This file was deleted.

Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified models/modelXO/saved_model.pb
Binary file not shown.
Binary file modified models/modelXO/variables/variables.data-00000-of-00001
Binary file not shown.
Binary file modified models/modelXO/variables/variables.index
Binary file not shown.
4 changes: 2 additions & 2 deletions play.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@

board = mnk.Board(3, 3, 3, form="flatten")

assert len(sys.argv) == 2, "Please specify which model you would like to play against (ex: python3 play.py 3LayersModel)"
model = model.Model('models/' + sys.argv[1])
assert len(sys.argv) == 2, "Please specify which model you would like to play against (ex: python3 play.py models/PedrosModel)"
model = model.Model(sys.argv[1])

print("\n\n" + str(board))
current_player = input("\nWho plays first (Me/AI)? ")
Expand Down
8 changes: 4 additions & 4 deletions plot.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
import matplotlib.pyplot as plt

def plot_wins(game_outcomes, run_length, labels=['X', 'O']):
xs = []
os = []
xs = [] # x_wins instead?
os = [] # o_wins instead?
ties = []
num_games = len(game_outcomes)

values = [0, 0, 0]

for i, w in enumerate(game_outcomes):
for i, w in enumerate(game_outcomes): # Needs better variable names
if i < run_length:
values[w] += 1
else:
Expand All @@ -17,7 +17,7 @@ def plot_wins(game_outcomes, run_length, labels=['X', 'O']):
ties.append(values[0])

values[w] += 1
values[game_outcomes[i-run_length]] -= 1
values[game_outcomes[i-run_length]] -= 1 # More comments about what run_length is

game = range(run_length, len(xs)+run_length)
plt.plot(game, xs, label="{} wins".format(labels[0]))
Expand Down
Binary file modified plots/plot100000.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
8 changes: 8 additions & 0 deletions save_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from model import Model
import datetime

def save_model(model, argv):
name_of_model = argv[1] if len(argv) == 2 else "Model__" + str(datetime.datetime.now())[:-7].replace(" ", "__")

print("Saving trained model to models/{}".format(name_of_model))
model.save_to('models/{}'.format(name_of_model))
40 changes: 19 additions & 21 deletions train.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# TODO: PLOT LOSS CURVES
from mnk import Board
import random
import matplotlib.pyplot as plt
Expand All @@ -7,26 +6,24 @@
from plot import plot_wins
from hof import HOF
from utils import run_game
from save_model import save_model
import sys

mnk = (3, 3, 3)


def main():
# Initialize hall of fame
hof = HOF(mnk, "menagerie")

num_loops = 20000
loop_length = 5
num_loops = 20_000 # Give more meaningful name, maybe number of batches?
games_per_loop = 5 # Same ^ (batch_size or games_per_batch)

# Run training and store final model
model, end_states, victories, games = train(hof, num_loops, loop_length, 0.2, Model())

print("Training complete.")
print("Saving trained model to models/modelXO and chart to plots folder")
model, end_states, victories, games = train(hof, num_loops, games_per_loop, 0.2, Model()) # Magic number 0.2

model.save_to('models/modelXO')
save_model(model, sys.argv)

# Create data plots
# Create data plots # All this should be in plot.py preferably
plt.figure()
plt.subplot(3, 1, 1)
plot_wins(end_states, 100)
Expand All @@ -36,21 +33,21 @@ def main():

plt.subplot(3, 1, 3)
hof.sample_histogram(20)
plt.savefig("plots/plot{}.png".format(num_loops * loop_length))
plt.savefig("plots/plot{}.png".format(num_loops * games_per_loop))

print("Calculating winrate matrix")
hof.winrate_matrix(150)
plt.show()

ind = 0
ind = 0 # Put into a function or even separate file
while ind != -1:
ind = int(input("Query a game: "))
for move in games[ind]:
print(move)
pass


def train(hof, loops, loop_length, epsilon, model):
def train(hof, loops, games_per_loop, epsilon, model): # Be consistent (loops vs num_loops above)
end_states = []
victories = []
games = []
Expand All @@ -61,28 +58,28 @@ def train(hof, loops, loop_length, epsilon, model):

try:
for loop in range(loops):
print("\n loop: ", loop)
print("Batch: ", loop, "(Games {}-{})".format(loop * games_per_loop + 1, (loop + 1) * games_per_loop))

side_best = [-1, 1][random.random() > 0.5]
side_best = [-1, 1][random.random() > 0.5] # D.R.Y.
side_hof = side_best * -1

for game in range(loop_length):
for game in range(games_per_loop): # Why are we batching the games again? I forgot.
# Initialize the agents
agent_best = Agent(model, side_best)
agent_hof = Agent(model_hof, side_hof)

run_game(agent_best, agent_hof, epsilon, training=True)
run_game(agent_best, agent_hof, epsilon, training=True) # This logic should be at the end for readability

# Switch sides for the next game
side_best = [-1, 1][random.random() > 0.5]
# Randomly assign sides (X or O) for the next game
side_best = [-1, 1][random.random() > 0.5] # D.R.Y.
side_hof = side_best * -1

model_hof = hof.sample("uniform")

# Update hall of fame and sample from it for the next loop
hof.gate(model)

side_best *= -1
side_best *= -1 # Why is this necessary?
side_hof = side_best * -1

agent_best = Agent(model, side_best)
Expand All @@ -96,8 +93,9 @@ def train(hof, loops, loop_length, epsilon, model):
end_states.append(diagnostic_winner)
victories.append(diagnostic_winner*side_best)
except KeyboardInterrupt:
print("Training interrupted")
print("Training interrupted.")

print("Training completed.")
return model, end_states, victories, games


Expand Down

0 comments on commit 064cfb6

Please sign in to comment.