Added new data plot, added board input format

ucfai · Nov 23, 2021 · 7f5ec92 · 7f5ec92
1 parent 9869cac
commit 7f5ec92
Show file tree

Hide file tree

Showing 21 changed files with 132 additions and 41 deletions.
diff --git a/agent.py b/agent.py
@@ -29,7 +29,7 @@ def random_action(self, board):
         legal_moves = board.legal_moves()
         return legal_moves[random.randint(0, len(legal_moves) - 1)]
 
-    def action(self, board, training, epsilon=0):
+    def action(self, board, training=False, epsilon=0):
         legal_moves = board.legal_moves()
         assert len(legal_moves) > 0, "No legal moves can be played."
 

diff --git a/good-models/modelXO/keras_metadata.pb b/good-models/modelXO/keras_metadata.pb
@@ -0,0 +1,9 @@
+
+�,root"_tf_keras_sequential*�,{"name": "sequential", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "must_restore_from_config": false, "class_name": "Sequential", "config": {"name": "sequential", "layers": [{"class_name": "InputLayer", "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 3, 3, 2]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "conv2d_input"}}, {"class_name": "Conv2D", "config": {"name": "conv2d", "trainable": true, "batch_input_shape": {"class_name": "__tuple__", "items": [null, 3, 3, 2]}, "dtype": "float32", "filters": 48, "kernel_size": {"class_name": "__tuple__", "items": [3, 3]}, "strides": {"class_name": "__tuple__", "items": [1, 1]}, "padding": "valid", "data_format": "channels_last", "dilation_rate": {"class_name": "__tuple__", "items": [1, 1]}, "groups": 1, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Flatten", "config": {"name": "flatten", "trainable": true, "dtype": "float32", "data_format": "channels_last"}}, {"class_name": "Dense", "config": {"name": "dense", "trainable": true, "batch_input_shape": {"class_name": "__tuple__", "items": [null, 1, 18]}, "dtype": "float32", "units": 27, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "RandomNormal", "config": {"mean": 0.0, "stddev": 0.05, "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "dtype": "float32", "units": 1, "activation": "tanh", "use_bias": true, "kernel_initializer": {"class_name": "RandomNormal", "config": {"mean": 0.0, "stddev": 0.05, "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}]}, "shared_object_id": 11, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 4, "axes": {"-1": 2}}, "shared_object_id": 12}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 3, 3, 2]}, "is_graph_network": true, "full_save_spec": {"class_name": "__tuple__", "items": [[{"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 3, 3, 2]}, "float32", "conv2d_input"]}], {}]}, "save_spec": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 3, 3, 2]}, "float32", "conv2d_input"]}, "keras_version": "2.6.0", "backend": "tensorflow", "model_config": {"class_name": "Sequential", "config": {"name": "sequential", "layers": [{"class_name": "InputLayer", "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 3, 3, 2]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "conv2d_input"}, "shared_object_id": 0}, {"class_name": "Conv2D", "config": {"name": "conv2d", "trainable": true, "batch_input_shape": {"class_name": "__tuple__", "items": [null, 3, 3, 2]}, "dtype": "float32", "filters": 48, "kernel_size": {"class_name": "__tuple__", "items": [3, 3]}, "strides": {"class_name": "__tuple__", "items": [1, 1]}, "padding": "valid", "data_format": "channels_last", "dilation_rate": {"class_name": "__tuple__", "items": [1, 1]}, "groups": 1, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 1}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 2}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "shared_object_id": 3}, {"class_name": "Flatten", "config": {"name": "flatten", "trainable": true, "dtype": "float32", "data_format": "channels_last"}, "shared_object_id": 4}, {"class_name": "Dense", "config": {"name": "dense", "trainable": true, "batch_input_shape": {"class_name": "__tuple__", "items": [null, 1, 18]}, "dtype": "float32", "units": 27, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "RandomNormal", "config": {"mean": 0.0, "stddev": 0.05, "seed": null}, "shared_object_id": 5}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 6}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "shared_object_id": 7}, {"class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "dtype": "float32", "units": 1, "activation": "tanh", "use_bias": true, "kernel_initializer": {"class_name": "RandomNormal", "config": {"mean": 0.0, "stddev": 0.05, "seed": null}, "shared_object_id": 8}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 9}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "shared_object_id": 10}]}}, "training_config": {"loss": "mean_squared_error", "metrics": null, "weighted_metrics": null, "loss_weights": null, "optimizer_config": {"class_name": "SGD", "config": {"name": "SGD", "learning_rate": 0.009999999776482582, "decay": 0.0, "momentum": 0.0, "nesterov": false}}}}2
+�
+root.layer_with_weights-0"_tf_keras_layer*�
+{"name": "conv2d", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": {"class_name": "__tuple__", "items": [null, 3, 3, 2]}, "stateful": false, "must_restore_from_config": false, "class_name": "Conv2D", "config": {"name": "conv2d", "trainable": true, "batch_input_shape": {"class_name": "__tuple__", "items": [null, 3, 3, 2]}, "dtype": "float32", "filters": 48, "kernel_size": {"class_name": "__tuple__", "items": [3, 3]}, "strides": {"class_name": "__tuple__", "items": [1, 1]}, "padding": "valid", "data_format": "channels_last", "dilation_rate": {"class_name": "__tuple__", "items": [1, 1]}, "groups": 1, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 1}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 2}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "shared_object_id": 3, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 4, "axes": {"-1": 2}}, "shared_object_id": 12}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 3, 3, 2]}}2
+�root.layer-1"_tf_keras_layer*�{"name": "flatten", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "Flatten", "config": {"name": "flatten", "trainable": true, "dtype": "float32", "data_format": "channels_last"}, "shared_object_id": 4, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 1, "axes": {}}, "shared_object_id": 13}}2
+�root.layer_with_weights-1"_tf_keras_layer*�{"name": "dense", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": {"class_name": "__tuple__", "items": [null, 1, 18]}, "stateful": false, "must_restore_from_config": false, "class_name": "Dense", "config": {"name": "dense", "trainable": true, "batch_input_shape": {"class_name": "__tuple__", "items": [null, 1, 18]}, "dtype": "float32", "units": 27, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "RandomNormal", "config": {"mean": 0.0, "stddev": 0.05, "seed": null}, "shared_object_id": 5}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 6}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "shared_object_id": 7, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 2, "axes": {"-1": 48}}, "shared_object_id": 14}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 48]}}2
+�root.layer_with_weights-2"_tf_keras_layer*�{"name": "dense_1", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "dtype": "float32", "units": 1, "activation": "tanh", "use_bias": true, "kernel_initializer": {"class_name": "RandomNormal", "config": {"mean": 0.0, "stddev": 0.05, "seed": null}, "shared_object_id": 8}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 9}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "shared_object_id": 10, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 2, "axes": {"-1": 27}}, "shared_object_id": 15}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 27]}}2
+�>root.keras_api.metrics.0"_tf_keras_metric*�{"class_name": "Mean", "name": "loss", "dtype": "float32", "config": {"name": "loss", "dtype": "float32"}, "shared_object_id": 16}2
diff --git a/good-models/modelXO/saved_model.pb b/good-models/modelXO/saved_model.pb
diff --git a/good-models/modelXO/variables/variables.data-00000-of-00001 b/good-models/modelXO/variables/variables.data-00000-of-00001
diff --git a/good-models/modelXO/variables/variables.index b/good-models/modelXO/variables/variables.index
diff --git a/hof.py b/hof.py
@@ -4,12 +4,15 @@
 import os
 from math import floor
 from model import Model
+from agent import Agent
+from mnk import Board
 
 
 class HOF:
-    def __init__(self, folder):
+    def __init__(self, mnk, folder):
         self.hof = []
         self.folder = folder
+        self.mnk = mnk
         self.sample_history = []
         self.pop_size = 0
         self.basel = 0  # Used in limit-uniform sampling
@@ -41,6 +44,8 @@ def sample(self, method='uniform'):
                     break
         elif method == 'uniform':
             ind = floor(random.random()*self.pop_size)
+        elif method == 'naive':
+            ind = self.pop_size-1
 
         self.sample_history.append(ind)
 
@@ -53,3 +58,32 @@ def sample_histogram(self, num=100):
         pyplot.title("Sampling of Model Indices from HOF")
         pyplot.show()
 
+    # Displays a winrate matrix of the historical policies for the given player
+    def winrate_matrix(self, iterations):
+        matrix = []
+        for i in range (0, self.pop_size, iterations):
+            matrix.append([])
+            for j in range (0, self.pop_size, iterations):
+                model_i = Model("{}/{}".format(self.folder, self.hof[i]))
+                model_j = Model("{}/{}".format(self.folder, self.hof[j]))
+
+                value = self.run_game(Agent(model_i, 1), Agent(model_j, -1))
+                matrix[-1].append(value)
+        pyplot.imshow(matrix, cmap="bwr")
+
+    def run_game(self, agent1, agent2):
+        board = Board(*self.mnk, form="multiplanar-2", hist_length=-1)
+
+        while board.game_ongoing():
+            if board.player == agent1.player:
+                agent1.action(board)
+            else:
+                agent2.action(board)
+
+        return board.who_won()
+
+
+
+
+
+
diff --git a/mnk.py b/mnk.py
@@ -93,6 +93,16 @@ def get_board(self):
                     elif self.board[i][j] == -1:
                         board_planes[i][j][1] = 1
             return np.copy(board_planes.reshape(1, self.m, self.n, 2))
+        elif self.form == "multiplanar-2":
+            board_planes = np.zeros((self.m, self.n, 2), dtype=int)
+            for i in range(self.m):
+                for j in range(self.n):
+                    if self.board[i][j] == self.player:
+                        board_planes[i][j][0] = 1
+                    elif self.board[i][j] == -1*self.player:
+                        board_planes[i][j][1] = 1
+            return np.copy(board_planes.reshape(1, self.m, self.n, 2))
+
 
     def game_ongoing(self):
         return not ( self.player_has_lost() or (self.num_legal_moves() == 0) )

diff --git a/model.py b/model.py
@@ -2,7 +2,7 @@
 import tensorflow as tf
 from keras.models import Sequential
 from keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D
-from tensorflow.keras.optimizers import Adam
+from tensorflow.keras.optimizers import SGD
 
 class Model:
 
@@ -13,11 +13,12 @@ def __init__(self, location=False):
             self.model = self.retrieve(location)
             return
 
-        opt = Adam(learning_rate=0.1, beta_1=0.9, beta_2=0.999)
+        opt = SGD(learning_rate=0.02, momentum=0.0)
 
         self.model = Sequential()
-        self.model.add(Dense(27, input_shape=(1, 9), kernel_initializer='normal', activation='tanh'))
-        self.model.add(Dense(27, kernel_initializer='normal', activation='tanh'))
+        self.model.add(Conv2D(48, 3, activation='relu', input_shape=(3,3,2)))
+        self.model.add(Flatten())
+        self.model.add(Dense(27, kernel_initializer='normal', activation='relu', input_shape=(1,18)))
         self.model.add(Dense(1, kernel_initializer='normal', activation='tanh'))
 
         self.model.compile(loss='mean_squared_error', optimizer=opt)
@@ -33,28 +34,51 @@ def raw_value(self, board):
         if board.who_won() != 2:
             return tf.constant(board.who_won(), dtype="float32", shape=(1, 1))
         else:
-            return self.model(board.get_board())
+            return board.player*self.model(board.get_board())
+
+    def raw_action_value(self, board, move):
+        board.move(*move)
+        val = self.raw_value(board)
+        board.undo_move(*move)
+
+        return val
 
     # Changes 1 to mean the supplied player is at advantage, -1 disadvantage
-    def state_value(self, board, player):
-        return player * self.raw_value(board)
+    def state_value(self, board):
+        if board.who_won() == 0:
+            return tf.constant(0, dtype="float32", shape=(1, 1))
+        elif board.who_won() == board.player:
+            return tf.constant(1, dtype="float32", shape=(1, 1))
+        elif board.who_won() == -1*board.player:
+            return tf.constant(-1, dtype="float32", shape=(1, 1))
+        else:
+            return self.model(board.get_board())
 
     # Returns the value of taking a move from the given board state
     def action_value(self, board, move):
-        player = board.player
-
         board.move(*move)
-        val = self.state_value(board, player)
+        val = self.state_value(board)
         board.undo_move(*move)
 
         return val
 
+    def scheduler(self, epoch, lr):
+        if epoch < 5000:
+            return 0.02
+        elif epoch < 15000:
+            return 0.01
+        elif epoch < 25000:
+            return 0.002
+        else:
+            return 0.001
+
     # Performs a temporal difference update of the model
     def td_update(self, board, greedy_move=(), terminal=False):
+        callback = tf.keras.callbacks.LearningRateScheduler(self.scheduler)
         if terminal:
             assert board.who_won() != 2
             assert greedy_move == ()
-            self.model.fit(board.history()[-2], self.raw_value(board), batch_size=1, verbose=0)
+            self.model.fit(board.history()[-2], self.state_value(board), batch_size=1, verbose=0, callbacks=[callback])
         else:
-            self.model.fit(board.history()[-2], self.action_value(board, greedy_move), batch_size=1, verbose=0)
+            self.model.fit(board.history()[-2], self.action_value(board, greedy_move), batch_size=1, verbose=0, callbacks=[callback])