See last commit message (woops)

ucfai · Feb 9, 2022 · 986f310 · 986f310
1 parent b93bd9e
commit 986f310
Show file tree

Hide file tree

Showing 4 changed files with 68 additions and 78 deletions.
diff --git a/mnk.py b/mnk.py
@@ -83,28 +83,7 @@ def num_legal_moves(self):
 
     # Reshapes board into the form needed for the model
     def get_board(self):
-        if self.form == "flatten":
-            return np.copy(self.board.reshape(1, 1, self.m * self.n))
-        elif self.form == "planar":
-            return np.copy(self.board.reshape(1, self.m, self.n, 1))
-        elif self.form == "multiplanar":
-            board_planes = np.zeros((self.m, self.n, 2), dtype=int)
-            for i in range(self.m):
-                for j in range(self.n):
-                    if self.board[i][j] == 1:
-                        board_planes[i][j][0] = 1
-                    elif self.board[i][j] == -1:
-                        board_planes[i][j][1] = 1
-            return np.copy(board_planes.reshape(1, self.m, self.n, 2))
-        elif self.form == "multiplanar-turnflipped":
-            board_planes = np.zeros((self.m, self.n, 2), dtype=int)
-            for i in range(self.m):
-                for j in range(self.n):
-                    if self.board[i][j] == self.player:
-                        board_planes[i][j][0] = 1
-                    elif self.board[i][j] == -1*self.player:
-                        board_planes[i][j][1] = 1
-            return np.copy(board_planes.reshape(1, self.m, self.n, 2))
+        return (self.board, self.player)
 
     def game_ongoing(self):
         return not (self.player_has_lost() or (self.num_legal_moves() == 0))

diff --git a/model.py b/model.py
@@ -3,6 +3,7 @@
 from keras.models import Sequential
 from keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D
 from tensorflow.keras.optimizers import SGD
+from state_representation import get_input_rep
 
 
 class Model:
@@ -35,7 +36,7 @@ def raw_value(self, board):
         if board.who_won() != 2:
             return tf.constant(board.who_won(), dtype="float32", shape=(1, 1))
         else:
-            return board.player*self.model(board.get_board())
+            return board.player*self.model(get_input_rep(board.get_board()))
 
     def raw_action_value(self, board, move):
         board.move(*move)
@@ -53,7 +54,7 @@ def state_value(self, board):
         elif board.who_won() == -1*board.player:
             return tf.constant(-1, dtype="float32", shape=(1, 1))
         else:
-            return self.model(board.get_board())
+            return self.model(get_input_rep(board.get_board()))
 
     # Returns the value of taking a move from the given board state
     def action_value(self, board, move):
@@ -83,7 +84,7 @@ def td_update(self, board, greedy_move=None, terminal=False):
         if terminal:
             assert board.who_won() != 2
             assert greedy_move is None
-            self.model.fit(board.history()[-2], self.state_value(board), batch_size=1, verbose=0, callbacks=[callback])
+            self.model.fit(get_input_rep(board.history()[-2]), self.state_value(board), batch_size=1, verbose=0, callbacks=[callback])
         else:
-            self.model.fit(board.history()[-2], self.action_value(board, greedy_move), batch_size=1, verbose=0, callbacks=[callback])
+            self.model.fit(get_input_rep(board.history()[-2]), self.action_value(board, greedy_move), batch_size=1, verbose=0, callbacks=[callback])
 
diff --git a/state_representation.py b/state_representation.py
@@ -2,7 +2,9 @@
 
 
 # Reshapes board into the form needed for the model
-def get_input_rep(self, board, form):
+def get_input_rep(board, form="multiplanar-turnflipped"):
+    player = board[1]
+    board = board[0]
     m, n = np.shape(board)
 
     if form == "flatten":
@@ -22,11 +24,12 @@ def get_input_rep(self, board, form):
         return np.copy(board_planes.reshape(1, m, n, 2))
 
     elif form == "multiplanar-turnflipped":
+        assert player is not None
         board_planes = np.zeros((m, n, 2), dtype=int)
         for i in range(m):
             for j in range(n):
-                if self.board[i][j] == self.player:
+                if board[i][j] == player:
                     board_planes[i][j][0] = 1
-                elif self.board[i][j] == -1 * self.player:
+                elif board[i][j] == -1 * player:
                     board_planes[i][j][1] = 1
         return np.copy(board_planes.reshape(1, m, n, 2))
diff --git a/train.py b/train.py
@@ -11,55 +11,7 @@
 mnk = (3, 3, 3)
 
 
-def train(hof, loops, loop_length, epsilon, model):
-    end_states = []
-    victories = []
-    games = []
-
-    # Initialize values
-    hof.store(model)
-    model_hof = hof.sample()
-
-    for loop in range(loops):
-        print("\n loop: ", loop)
-
-        side_best = [-1, 1][random.random() > 0.5]
-        side_hof = side_best * -1
-
-        for game in range(loop_length):
-            # Initialize the agents
-            agent_best = Agent(model, side_best)
-            agent_hof = Agent(model_hof, side_hof)
-
-            run_game(agent_best, agent_hof, epsilon, training=True)
-
-            # Switch sides for the next game
-            side_best = [-1, 1][random.random() > 0.5]
-            side_hof = side_best * -1
-
-            model_hof = hof.sample("uniform")
-
-        # Update hall of fame and sample from it for the next loop
-        hof.gate(model)
-
-        side_best *= -1
-        side_hof = side_best * -1
-
-        agent_best = Agent(model, side_best)
-        agent_hof = Agent(model_hof, side_hof)
-
-        # Run a diagnostic (non-training, no exploration) game to collect data
-        diagnostic_winner, game_data = run_game(agent_best, agent_hof, 0, training=False, mnk=mnk)
-
-        # Store data from loop
-        games.append(game_data)
-        end_states.append(diagnostic_winner)
-        victories.append(diagnostic_winner*side_best)
-
-    return model, end_states, victories, games
-
-
-if __name__ == "__main__":
+def main():
     # Initialize hall of fame
     hof = HOF(mnk, "menagerie")
 
@@ -96,3 +48,58 @@ def train(hof, loops, loop_length, epsilon, model):
         for move in games[ind]:
             print(move)
         pass
+
+
+def train(hof, loops, loop_length, epsilon, model):
+    end_states = []
+    victories = []
+    games = []
+
+    # Initialize values
+    hof.store(model)
+    model_hof = hof.sample()
+
+    try:
+        for loop in range(loops):
+            print("\n loop: ", loop)
+
+            side_best = [-1, 1][random.random() > 0.5]
+            side_hof = side_best * -1
+
+            for game in range(loop_length):
+                # Initialize the agents
+                agent_best = Agent(model, side_best)
+                agent_hof = Agent(model_hof, side_hof)
+
+                run_game(agent_best, agent_hof, epsilon, training=True)
+
+                # Switch sides for the next game
+                side_best = [-1, 1][random.random() > 0.5]
+                side_hof = side_best * -1
+
+                model_hof = hof.sample("uniform")
+
+            # Update hall of fame and sample from it for the next loop
+            hof.gate(model)
+
+            side_best *= -1
+            side_hof = side_best * -1
+
+            agent_best = Agent(model, side_best)
+            agent_hof = Agent(model_hof, side_hof)
+
+            # Run a diagnostic (non-training, no exploration) game to collect data
+            diagnostic_winner, game_data = run_game(agent_best, agent_hof, 0, training=False, mnk=mnk)
+
+            # Store data from loop
+            games.append(game_data)
+            end_states.append(diagnostic_winner)
+            victories.append(diagnostic_winner*side_best)
+    except KeyboardInterrupt:
+        print("Training interrupted")
+
+    return model, end_states, victories, games
+
+
+if __name__ == "__main__":
+    main()