Fixed plot.py + readability changes

ucfai · Feb 20, 2022 · caab566 · caab566
1 parent dbb2025
commit caab566
Show file tree

Hide file tree

Showing 11 changed files with 93 additions and 55 deletions.
diff --git a/hof.py b/hof.py
@@ -24,7 +24,7 @@ def store(self, model):
         model.save_to("{}/{}".format(self.folder, self.pop_size))
         self.hof.append(self.pop_size)
         self.pop_size += 1
-        self.basel += 1/self.pop_size**2
+        self.basel += 1 / self.pop_size**2
 
     # Gating method decides whether to add the model to the hall of fame
     def gate(self, model):
@@ -54,11 +54,12 @@ def sample(self, method='uniform'):
         return Model("{}/{}".format(self.folder, name))
 
     # Displays a histogram of the model iterations sampled from the hall of fame
-    def sample_histogram(self, num=100):
+    def sample_histogram(self, num=100):                                                # Move to plot.py?
         pyplot.hist(self.sample_history, num)
         pyplot.title("Sampling of Model Indices from HOF")
         pyplot.show()
 
+    ''' === MOVED TO PLOT.PY LMK IF I CAN DELETE IT FROM HERE ===
     # Displays a winrate matrix of the historical policies for the given player
     def winrate_matrix(self, iterations):
         matrix = []
@@ -68,12 +69,9 @@ def winrate_matrix(self, iterations):
                 model_i = Model("{}/{}".format(self.folder, self.hof[i]))
                 model_j = Model("{}/{}".format(self.folder, self.hof[j]))
 
-                value = run_game(Agent(model_i, 1), Agent(model_j, -1))
+                value = run_game(Agent(model_i, 1), Agent(model_j, -1))[0]
                 matrix[-1].append(value)
-        pyplot.imshow(matrix, cmap="bwr")
-
-
-
-
-
 
+        pyplot.imshow(matrix, cmap="bwr")
+        pyplot.imsave("plots/Matrix.png", matrix, cmap="bwr")
+    '''
diff --git a/kept_plots/plot10000.png → kept_plots/OldModel/plot10000.png b/kept_plots/plot10000.png → kept_plots/OldModel/plot10000.png
diff --git a/kept_plots/plotBestHOF100000.png → kept_plots/OldModel/plotBestHOF100000.png b/kept_plots/plotBestHOF100000.png → kept_plots/OldModel/plotBestHOF100000.png
diff --git a/kept_plots/plotXO100000.png → kept_plots/OldModel/plotXO100000.png b/kept_plots/plotXO100000.png → kept_plots/OldModel/plotXO100000.png
diff --git a/kept_plots/PedroPlotTest/HOF.png b/kept_plots/PedroPlotTest/HOF.png
diff --git a/kept_plots/PedroPlotTest/Matrix.png b/kept_plots/PedroPlotTest/Matrix.png
diff --git a/kept_plots/PedroPlotTest/Sampling.png b/kept_plots/PedroPlotTest/Sampling.png
diff --git a/kept_plots/PedroPlotTest/XO.png b/kept_plots/PedroPlotTest/XO.png
diff --git a/kept_plots/plot100.png b/kept_plots/plot100.png
diff --git a/plot.py b/plot.py
@@ -1,29 +1,86 @@
 import matplotlib.pyplot as plt
+from model import Model
+from agent import Agent
+from utils import run_game
+import random
+import os
 
-def plot_wins(game_outcomes, run_length, labels=['X', 'O']):
-    x_wins = []
-    o_wins = []
-    ties = []
-    num_games = len(game_outcomes)
+def plot_wins(outcomes, model_name, players):
 
-    values = [0, 0, 0]                                      # Needs better name / documentation
+    # We don't plot total wins for each playter bc the graph would always increase, making performance evaluation harder.
+    # Instead, we plot runs: how many of the previous n games were won. This way, if a model begins performing worse, its line will decrease.
 
-    for game, w in enumerate(game_outcomes):                   # Rename "w"
-        if game < run_length:
-            values[w] += 1
+    player1_wins, player2_wins, ties = [], [], []
+    run_totals = [0, 0, 0]
+    num_games = len(outcomes)
+    run_length = max(num_games // 10 , 1)
+
+    for i, outcome in enumerate(outcomes):
+        if i < run_length:
+            run_totals[outcome] += 1
         else:
-            x_wins.append(values[1])
-            o_wins.append(values[-1])
-            ties.append(values[0])
+            player1_wins.append(run_totals[1])
+            player2_wins.append(run_totals[-1])
+            ties.append(run_totals[0])
+
+            run_totals[outcome] += 1
+            run_totals[outcomes[i - run_length]] -= 1
 
-            values[w] += 1
-            values[game_outcomes[game - run_length]] -= 1        # More comments about what run_length is
+    game = range(run_length, len(player1_wins) + run_length)
 
-    game = range(run_length, len(x_wins)+run_length)
-    plt.plot(game, x_wins, label="{} wins".format(labels[0]))
-    plt.plot(game, o_wins, label="{} wins".format(labels[1]))
+    plt.plot(game, player1_wins, label="{} wins".format(players[0]))
+    plt.plot(game, player2_wins, label="{} wins".format(players[1]))
     plt.plot(game, ties, label="Ties")
+
     plt.legend()
-    plt.title("Training data for {} Games".format(num_games))
-    plt.xlabel("Game number")
+    plt.title("{}: {} diagnostic games".format(model_name, num_games))
+    plt.xlabel("Game #")
     plt.ylabel("Wins out of previous {} games".format(run_length))
+
+
+# 1v1 matrix for historical models: ideally, newer versions beating earlier ones
+def winrate_matrix(num_games, step):
+    print("Calculating winrate matrix...")
+    matrix = []
+    for i in range (0, num_games, step):
+        matrix.append([])
+        for j in range (0, num_games, step):
+            model_i = Model("menagerie/{}".format(i))
+            model_j = Model("menagerie/{}".format(j))
+
+            side_i = [-1, 1][random.random() > 0.5]
+            side_j = side_i * -1
+
+            value = run_game(Agent(model_i, side_i), Agent(model_j, side_j))[0]
+            matrix[-1].append(value)
+
+    return matrix
+
+
+def save_plots(hof, model_name, winnersXO, winnersHOF):
+
+    # Create model's plots folder
+    plots_dir = "plots/{}".format(model_name)
+    if not os.path.isdir(plots_dir):
+        os.makedirs(plots_dir)
+
+    # Graph and save each plot
+    plt.figure()
+    plot_wins(winnersXO, model_name, ['X', 'O'])
+    plt.savefig("{}/XO.png".format(plots_dir))
+    plt.clf()
+
+    plot_wins(winnersHOF, model_name, ["Best", "HOF"])
+    plt.savefig("{}/HOF.png".format(plots_dir))
+    plt.clf()
+
+    hof.sample_histogram(20)
+    plt.savefig("{}/Sampling.png".format(plots_dir))
+    plt.clf()
+
+    num_games = len(winnersXO)
+    step = max(1, num_games // 20)
+    matrix = winrate_matrix(num_games, step)
+    plt.imshow(matrix, cmap="bwr")
+    plt.imsave("plots/{}/Matrix.png".format(model_name), matrix, cmap="bwr")
+    plt.clf()
diff --git a/train.py b/train.py
@@ -3,7 +3,7 @@
 import matplotlib.pyplot as plt
 from agent import Agent
 from model import Model
-from plot import plot_wins
+from plot import plot_wins, save_plots
 from hof import HOF
 from utils import run_game, arg_parser
 from save_model import save_model
@@ -23,30 +23,13 @@ def main():
     hof = HOF(mnk, folder="menagerie")
 
     print("\nTraining model: {}\n".format(model_name))
-
-    # Run training and store final model
-    model, end_states, victories, games = train(hof, num_batches, games_per_batch, epsilon, Model())
+    model, winnersXO, winnersHOF, games = train(hof, num_batches, games_per_batch, epsilon, Model())
 
     save_model(model, model_name)
-
-    # Create data plots                                                              # All this should be in plot.py preferably
-    plt.figure()
-    plt.subplot(3, 1, 1)
-    plot_wins(end_states, 100)
-
-    plt.subplot(3, 1, 2)
-    plot_wins(victories, 100, ["Best", "HOF"])
-
-    plt.subplot(3, 1, 3)
-    hof.sample_histogram(20)
-    plt.savefig("plots/plot{}.png".format(num_batches * games_per_batch))
-
-    print("Calculating winrate matrix")
-    hof.winrate_matrix(150)
-    plt.show()
+    save_plots(hof, model_name, winnersXO, winnersHOF)
 
     # Can be used after looking at plot to analyze important milestones
-    ind = 0                                                                          # Put into a function or even separate file
+    ind = 0                                                                          # Put into a function
     while ind != -1:
         ind = int(input("Query a game: "))
         for move in games[ind]:
@@ -55,16 +38,16 @@ def main():
 
 
 def train(hof, num_batches, games_per_batch, epsilon, model):
-    end_states = []
-    victories = []
+    winnersXO = []
+    winnersHOF = []
     games = []
 
     # Initialize hall of fame
     hof.store(model)
 
     try:
         for batch_number in range(num_batches):
-            print("Batch: ", batch_number, "(Games {}-{})".format(batch_number * games_per_batch + 1, (batch_number + 1) * games_per_batch))
+            print("Batch:", batch_number, "(Games {}-{})".format(batch_number * games_per_batch + 1, (batch_number + 1) * games_per_batch))
 
             # Runs a batch of games, after which we can play/save a diagnostic game to see if it improved and store current model to hof
             for game in range(games_per_batch):
@@ -97,16 +80,16 @@ def train(hof, num_batches, games_per_batch, epsilon, model):
 
             # Store data from diagnostic game for this batch
             games.append(game_data)
-            end_states.append(diagnostic_winner)
-            victories.append(diagnostic_winner*side_best)
+            winnersXO.append(diagnostic_winner)            # X or O
+            winnersHOF.append(diagnostic_winner*side_best)   # Best or HOF
 
     except KeyboardInterrupt:
         print("\n=======================")
         print("Training interrupted.")
         print("=======================")
 
     print("Training completed.")
-    return model, end_states, victories, games
+    return model, winnersXO, winnersHOF, games
 
 
 if __name__ == "__main__":