Added Hall of Fame functionality

ucfai · Nov 7, 2021 · ff983d9 · ff983d9
1 parent 23644e8
commit ff983d9
Show file tree

Hide file tree

Showing 12 changed files with 143 additions and 51 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,3 @@
 __pycache__/
-.idea/
+.idea/
+menagerie/
diff --git a/agent.py b/agent.py
@@ -2,12 +2,13 @@
 import keras.models
 import random
 
-class Agent():
+
+class Agent:
 
     def __init__(self, board, model, player):
-        self.board = board;
-        self.model = model;
-        self.player = player;
+        self.board = board
+        self.model = model
+        self.player = player
 
     def action(self, epsilon=0.01):
         legal_moves = self.board.legal_moves()
@@ -19,17 +20,16 @@ def action(self, epsilon=0.01):
             self.board.move(*legal_moves[random.randint(0, len(legal_moves) - 1)])
             return
 
-        best_move = legal_moves[0]
+        best_move = legal_moves[-1]
         max_evaluation = -1
-
         for move in legal_moves:
             self.board.move(*move)
             evaluation = self.player * self.model(self.board.get_board())
             if evaluation > max_evaluation:
                 best_move = move
                 max_evaluation = evaluation
-            self.board.undo_move(*move)
 
+            self.board.undo_move(*move)
         self.board.move(*best_move)
 
 
diff --git a/hof.py b/hof.py
@@ -0,0 +1,23 @@
+import random
+import tensorflow as tf
+from math import floor
+import os
+
+
+class HOF:
+    def __init__(self, folder):
+        self.hof = []
+        self.folder = folder
+        if not os.path.isdir(folder):
+            os.makedirs(folder)
+
+    def store(self, model, name):
+        model.save("{}/{}".format(self.folder, name))
+        self.hof.append(name)
+
+    def sample_hof(self):
+        pop_size = len(self.hof)
+        ind = floor(pop_size*random.random())
+        name = self.hof[ind]
+        return tf.keras.models.load_model("{}/{}".format(self.folder, name))
+
diff --git a/mnk.py b/mnk.py
@@ -2,7 +2,7 @@
 # Future games should share a similar interface to make conversion of the AI
 # to play different games as seamless as possible.
 import numpy as np
-import random
+
 
 class Board:
     def __init__(self, m, n, k, flatten=True, hist_length=-1):
@@ -34,7 +34,7 @@ def del_history(self):
         if self.hist_length == -1 or len(self.board_history) < self.hist_length:
             self.board_history.pop()
         else:
-            for i in range(0,len(self.board_history)-1,-1):
+            for i in range(0,len(self.board_history)-1, -1):
                 self.board_history[i+1] = self.board_history[i]
             self.board_history[0] = self.undo_buffer
             self.undo_buffer = np.zeros((self.m, self.n), dtype=int)
@@ -79,9 +79,9 @@ def legal_moves(self):
     # reshapes board into 1-dimensional array for feeding as input to model if flatten is True
     def get_board(self):
         if self.flatten:
-            return self.board.reshape(1, self.m * self.n)
+            return np.copy(self.board.reshape(1, self.m * self.n))
         else:
-            return self.board
+            return np.copy(self.board.reshape(1, 3, 3, 1))
 
     # converting numbers to their respective game values
     @staticmethod

diff --git a/model.py b/model.py
@@ -1,17 +1,22 @@
 import mnk
 import pandas as pd
 from keras.models import Sequential
-from keras.layers import Dense
-from tensorflow.keras.optimizers import SGD
+from keras.layers import Dense, Conv2D, Flatten, Dropout
+from tensorflow.keras.optimizers import Adadelta
 
-learning_rate = 0.005
-momentum = 0.8
-sgd = SGD(lr=learning_rate, momentum=momentum, nesterov=False)
+learning_rate = 1.0
+rho = 0.995
+epsilon = 1e-07
+sgd = Adadelta(lr=learning_rate, rho=rho, epsilon=epsilon)
 
 modelXO = Sequential()
-modelXO.add(Dense(6, input_dim=9, kernel_initializer='normal', activation='relu'))
-modelXO.add(Dense(6, input_dim=9, kernel_initializer='normal', activation='relu'))
+modelXO.add(Conv2D(12, 3, padding="same", input_shape=(3, 3, 1), activation='tanh', kernel_initializer="he_normal"))
+modelXO.add(Dropout(0.1))
+modelXO.add(Conv2D(9, 2, padding="valid", input_shape=(3, 3, 1), activation='tanh', kernel_initializer="he_normal"))
+modelXO.add(Dropout(0.1))
+modelXO.add(Flatten())
+modelXO.add(Dense(18, kernel_initializer='normal', activation='tanh'))
 modelXO.add(Dense(1, kernel_initializer='normal', activation='tanh'))
 
-modelXO.compile(loss='mean_squared_error', optimizer = sgd)
+modelXO.compile(loss='mean_squared_error', optimizer=sgd)
 
diff --git a/models/modelXO/keras_metadata.pb b/models/modelXO/keras_metadata.pb
diff --git a/models/modelXO/saved_model.pb b/models/modelXO/saved_model.pb
diff --git a/models/modelXO/variables/variables.data-00000-of-00001 b/models/modelXO/variables/variables.data-00000-of-00001
diff --git a/models/modelXO/variables/variables.index b/models/modelXO/variables/variables.index
diff --git a/play.py b/play.py
@@ -2,17 +2,15 @@
 import mnk
 import tensorflow as tf
 
-board = mnk.Board(3, 3, 3)
+board = mnk.Board(3, 3, 3, flatten=False)
 model = tf.keras.models.load_model('models/modelXO')
 
 print("\n\n" + str(board))
 current_player = input("\nWho plays first (Me/AI)? ")
-
 ai_side = [-1, 1][current_player == "AI"]
 agent = Agent(board, model, ai_side)
 
 while board.who_won() == 2:
-
     if current_player == 'Me':
         played = False
         while not played:
@@ -25,12 +23,11 @@
                 played = True
             except:
                 print("Invalid move! Try again")
-
         current_player = "AI"
-
     else:
         agent.action(0)
         current_player = "Me"
+
     print(board)
 
 if board.who_won() == 0:

diff --git a/plot.py b/plot.py
@@ -0,0 +1,28 @@
+from matplotlib import pyplot
+
+
+def plot_wins(win_states, num, labels=['X', 'O']):
+    xs = []
+    os = []
+    ties = []
+
+    values = [0, 0, 0]
+
+    for i, w in enumerate(win_states):
+        if i < num:
+            values[w] += 1
+        else:
+            xs.append(values[1])
+            os.append(values[-1])
+            ties.append(values[0])
+
+            values[w] += 1
+            values[win_states[i-num]] -= 1
+
+    game = range(num, len(xs)+num)
+    pyplot.plot(game, xs, label="{} wins".format(labels[0]))
+    pyplot.plot(game, os, label="{} wins".format(labels[1]))
+    pyplot.plot(game, ties, label="Ties")
+    pyplot.legend()
+    pyplot.title("Number of Each End State for Previous {} Games".format(num))
+    pyplot.show()
diff --git a/train.py b/train.py
@@ -1,47 +1,77 @@
 # TODO: PLOT LOSS CURVES
-from tensorflow.keras.utils import to_categorical
 import tensorflow as tf
 import numpy as np
 import mnk
+import random
 from agent import Agent
 from model import modelXO
+from plot import plot_wins
+from hof import HOF
+
 
-games = 1010
 m, n, k = 3, 3, 3
-epsilon = 1
-numEpochs = 1
-batchSize = 1
-verbose = 0
+hof = HOF("menagerie")
+hof.store(modelXO, "init")
+modelHOF = hof.sample_hof()
+
+hof_freq = 10  # how often to save the model to the HOF
+hof_duration = 2  # how long to keep using the same HOF model before loading a new one
+
+games = 1000
+epsilon = 0.1  # exploration constant
+decay_freq = 10  # how often to decrease epsilon
+decay_factor = 0.00099  # how much to decrease by
+
+end_states = []
+victories = []
+stored_games = []
 
 for game in range(games):
-    board = mnk.Board(m, n, k, hist_length=2)
+    board = mnk.Board(m, n, k, flatten=False, hist_length=-1)
+
+    # decrease exploration over time
+    if game % decay_freq == 0 and game != 0:
+        epsilon -= decay_factor
 
-    agentX = Agent(board, modelXO, 1)
-    agentO = Agent(board, modelXO, -1)
+    # initialize the agents
+    if game % hof_duration == 0 and game != 0:
+        modelHOF = hof.sample_hof()
+    sideT = [-1, 1][random.random() > 0.5]
+    sideHOF = [None, -1, 1][sideT]
+    agentT = Agent(board, modelXO, sideT)
+    agentHOF = Agent(board, modelHOF, sideHOF)
 
     move = 1
     while not board.player_has_lost() and len(board.legal_moves()) != 0:
+        # have the appropriate agent select a move
+        if board.player == sideHOF:
+            agentHOF.action(epsilon)
+        else:
+            agentT.action(epsilon)
+
+        # back up the current board evaluation to the last action chosen by the current agent
         if move > 2:
             evaluation = modelXO(board.get_board())
-            modelXO.fit(board.history()[-2], evaluation, epochs=numEpochs, batch_size=batchSize, verbose=0)
-
-        if board.player == 1:
-            agentX.action(epsilon)
-        else:
-            agentO.action(epsilon)
+            modelXO.fit(board.history()[-3], evaluation, batch_size=1, verbose=0)
+        move += 1
 
         if game % 50 == 0:
             print(board)
-        move += 1
 
+    # back up the terminal state value to the last actions chosen by either agent
     terminal_eval = tf.constant(board.who_won(), dtype="float32", shape=(1, 1))
-    modelXO.fit(board.history()[-2], terminal_eval, epochs=numEpochs, batch_size=batchSize, verbose=0)
-    modelXO.fit(board.history()[-1], terminal_eval, epochs=numEpochs, batch_size=batchSize, verbose=0)
+    modelXO.fit(board.history()[-3], terminal_eval, batch_size=1, verbose=0)
+    modelXO.fit(board.history()[-2], terminal_eval, batch_size=1, verbose=0)
 
-    if game % 300 == 0:
-        epsilon /= 10
+    # occasionally save new model to hall of fame
+    if game % hof_freq == 0 and game != 0:
+        hof.store(modelXO, game)
 
+    end_states.append(board.who_won())
+    victories.append(board.who_won()*sideT)
     if game % 10 == 0:
-        print("Game " + str(game) + " goes to " + ['Tie', 'X', 'O'][board.who_won()])
+        print("Game {} goes to {} ({})".format(str(game), ["tie", "best", "hof"][board.who_won()*sideT], ['Tie', 'X', 'O'][board.who_won()]))
 
+plot_wins(end_states, 50)
+plot_wins(victories, 50, ["Best", "HOF"])
 modelXO.save('models/modelXO')