Better model naming, readability changes, and some annotations

ucfai · Feb 9, 2022 · 064cfb6 · 064cfb6
1 parent 986f310
commit 064cfb6
Show file tree

Hide file tree

Showing 18 changed files with 37 additions and 36 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,5 @@
 __pycache__/
 .idea/
-menagerie/
+menagerie/
+models/
+plots/
diff --git a/agent.py b/agent.py
@@ -6,7 +6,7 @@
 
 class Agent:
 
-    def __init__(self, model, player):
+    def __init__(self, model, player):                              # Where are you using player?
         self.model = model
         self.player = player
 

diff --git a/good-models/modelXO/keras_metadata.pb → good_models/modelXO/keras_metadata.pb b/good-models/modelXO/keras_metadata.pb → good_models/modelXO/keras_metadata.pb
diff --git a/good-models/modelXO/saved_model.pb → good_models/modelXO/saved_model.pb b/good-models/modelXO/saved_model.pb → good_models/modelXO/saved_model.pb
diff --git a/...O/variables/variables.data-00000-of-00001 → ...O/variables/variables.data-00000-of-00001 b/...O/variables/variables.data-00000-of-00001 → ...O/variables/variables.data-00000-of-00001
diff --git a/...-models/modelXO/variables/variables.index → ..._models/modelXO/variables/variables.index b/...-models/modelXO/variables/variables.index → ..._models/modelXO/variables/variables.index
diff --git a/models/Pedro3HiddenLayersTanh50kGamesInterrupted/keras_metadata.pb b/models/Pedro3HiddenLayersTanh50kGamesInterrupted/keras_metadata.pb
diff --git a/models/Pedro3HiddenLayersTanh50kGamesInterrupted/saved_model.pb b/models/Pedro3HiddenLayersTanh50kGamesInterrupted/saved_model.pb
diff --git a/models/Pedro3HiddenLayersTanh50kGamesInterrupted/variables/variables.data-00000-of-00001 b/models/Pedro3HiddenLayersTanh50kGamesInterrupted/variables/variables.data-00000-of-00001
diff --git a/models/Pedro3HiddenLayersTanh50kGamesInterrupted/variables/variables.index b/models/Pedro3HiddenLayersTanh50kGamesInterrupted/variables/variables.index
diff --git a/models/modelXO/saved_model.pb b/models/modelXO/saved_model.pb
diff --git a/models/modelXO/variables/variables.data-00000-of-00001 b/models/modelXO/variables/variables.data-00000-of-00001
diff --git a/models/modelXO/variables/variables.index b/models/modelXO/variables/variables.index
diff --git a/play.py b/play.py
@@ -6,8 +6,8 @@
 
 board = mnk.Board(3, 3, 3, form="flatten")
 
-assert len(sys.argv) == 2, "Please specify which model you would like to play against (ex: python3 play.py 3LayersModel)"
-model = model.Model('models/' + sys.argv[1])
+assert len(sys.argv) == 2, "Please specify which model you would like to play against (ex: python3 play.py models/PedrosModel)"
+model = model.Model(sys.argv[1])
 
 print("\n\n" + str(board))
 current_player = input("\nWho plays first (Me/AI)? ")

diff --git a/plot.py b/plot.py
@@ -1,14 +1,14 @@
 import matplotlib.pyplot as plt
 
 def plot_wins(game_outcomes, run_length, labels=['X', 'O']):
-    xs = []
-    os = []
+    xs = []                                                 # x_wins instead?
+    os = []                                                 # o_wins instead?
     ties = []
     num_games = len(game_outcomes)
 
     values = [0, 0, 0]
 
-    for i, w in enumerate(game_outcomes):
+    for i, w in enumerate(game_outcomes):                   # Needs better variable names
         if i < run_length:
             values[w] += 1
         else:
@@ -17,7 +17,7 @@ def plot_wins(game_outcomes, run_length, labels=['X', 'O']):
             ties.append(values[0])
 
             values[w] += 1
-            values[game_outcomes[i-run_length]] -= 1
+            values[game_outcomes[i-run_length]] -= 1        # More comments about what run_length is
 
     game = range(run_length, len(xs)+run_length)
     plt.plot(game, xs, label="{} wins".format(labels[0]))

diff --git a/plots/plot100000.png b/plots/plot100000.png
diff --git a/save_model.py b/save_model.py
@@ -0,0 +1,8 @@
+from model import Model
+import datetime
+
+def save_model(model, argv):
+    name_of_model = argv[1] if len(argv) == 2 else "Model__" + str(datetime.datetime.now())[:-7].replace(" ", "__")
+
+    print("Saving trained model to models/{}".format(name_of_model))
+    model.save_to('models/{}'.format(name_of_model))
diff --git a/train.py b/train.py
@@ -1,4 +1,3 @@
-# TODO: PLOT LOSS CURVES
 from mnk import Board
 import random
 import matplotlib.pyplot as plt
@@ -7,26 +6,24 @@
 from plot import plot_wins
 from hof import HOF
 from utils import run_game
+from save_model import save_model
+import sys
 
 mnk = (3, 3, 3)
 
-
 def main():
     # Initialize hall of fame
     hof = HOF(mnk, "menagerie")
 
-    num_loops = 20000
-    loop_length = 5
+    num_loops = 20_000                                                                # Give more meaningful name, maybe number of batches?
+    games_per_loop = 5                                                                # Same ^ (batch_size or games_per_batch)
 
     # Run training and store final model
-    model, end_states, victories, games = train(hof, num_loops, loop_length, 0.2, Model())
-
-    print("Training complete.")
-    print("Saving trained model to models/modelXO and chart to plots folder")
+    model, end_states, victories, games = train(hof, num_loops, games_per_loop, 0.2, Model())   # Magic number 0.2
 
-    model.save_to('models/modelXO')
+    save_model(model, sys.argv)
 
-    # Create data plots
+    # Create data plots                                                               # All this should be in plot.py preferably
     plt.figure()
     plt.subplot(3, 1, 1)
     plot_wins(end_states, 100)
@@ -36,21 +33,21 @@ def main():
 
     plt.subplot(3, 1, 3)
     hof.sample_histogram(20)
-    plt.savefig("plots/plot{}.png".format(num_loops * loop_length))
+    plt.savefig("plots/plot{}.png".format(num_loops * games_per_loop))
 
     print("Calculating winrate matrix")
     hof.winrate_matrix(150)
     plt.show()
 
-    ind = 0
+    ind = 0                                                                         # Put into a function or even separate file
     while ind != -1:
         ind = int(input("Query a game: "))
         for move in games[ind]:
             print(move)
         pass
 
 
-def train(hof, loops, loop_length, epsilon, model):
+def train(hof, loops, games_per_loop, epsilon, model):                              # Be consistent (loops vs num_loops above)
     end_states = []
     victories = []
     games = []
@@ -61,28 +58,28 @@ def train(hof, loops, loop_length, epsilon, model):
 
     try:
         for loop in range(loops):
-            print("\n loop: ", loop)
+            print("Batch: ", loop, "(Games {}-{})".format(loop * games_per_loop + 1, (loop + 1) * games_per_loop))
 
-            side_best = [-1, 1][random.random() > 0.5]
+            side_best = [-1, 1][random.random() > 0.5]                                          # D.R.Y.
             side_hof = side_best * -1
 
-            for game in range(loop_length):
+            for game in range(games_per_loop):                                                  # Why are we batching the games again? I forgot.
                 # Initialize the agents
                 agent_best = Agent(model, side_best)
                 agent_hof = Agent(model_hof, side_hof)
 
-                run_game(agent_best, agent_hof, epsilon, training=True)
+                run_game(agent_best, agent_hof, epsilon, training=True)                         # This logic should be at the end for readability
 
-                # Switch sides for the next game
-                side_best = [-1, 1][random.random() > 0.5]
+                # Randomly assign sides (X or O) for the next game
+                side_best = [-1, 1][random.random() > 0.5]                                      # D.R.Y.
                 side_hof = side_best * -1
 
                 model_hof = hof.sample("uniform")
 
             # Update hall of fame and sample from it for the next loop
             hof.gate(model)
 
-            side_best *= -1
+            side_best *= -1                                                                     # Why is this necessary?
             side_hof = side_best * -1
 
             agent_best = Agent(model, side_best)
@@ -96,8 +93,9 @@ def train(hof, loops, loop_length, epsilon, model):
             end_states.append(diagnostic_winner)
             victories.append(diagnostic_winner*side_best)
     except KeyboardInterrupt:
-        print("Training interrupted")
+        print("Training interrupted.")
 
+    print("Training completed.")
     return model, end_states, victories, games